%{ /* C declarations used in actions */ #define stack_size 100 static int sp, stack [stack_size]; static void push (int i) { if (++sp<stack_size) stack[sp]= i; else {printf ("error: stack overflow\n"); exit(1);} } static int pop (void) { if (sp>=0) return stack[sp --]; else {printf ("error: stack underflow\n"); exit(1);} } %} /* Lex definitions - not used in this example */ %% /* descriptions of corresponding actions expected inputs (C statements or blocks) */ [0-9]+ {push(atoi(yytext));} "+" {push(pop()+pop());} "*" {push(pop()*pop());} - {int rhs=pop(); push(pop()-rhs);} "/" {int rhs=pop(); push(pop()/rhs);} ; {printf("%d\n", pop());} [ \t\n] ; [^-0-9+*/; \t\n]+ {ECHO; printf(" unexpected\n");} %% C code int main (void) { sp=-1; yylex(); return 0; } static int yywrap (void) {return 1;}
characters | those characters (special characters must be in " ") | |
or | while or "while" |
(the keyword) while |
" characters" |
"+" |
(the operator) + |
\ character |
\n \t \\ |
newline tab etc. as for C |
otherwise, character is an alternative to "character" | ||
\+ |
+ | |
^ and $ |
the start and the end of line respectively | |
^aardvark$ |
a line just containing aardvark |
|
[ characters] |
any one character in the set of characters | |
[AEIOUaeiou] |
any one vowel | |
[A-Za-z] |
any one letter | |
[^ characters] |
any one character not in the set of characters | |
[^\n] |
any character except a newline | |
. |
any one character except a newline | |
+ |
1 or more times | |
* |
0 or 1 or more times | |
{ n, m} |
n to m times | |
[0-9]+ |
an unsigned integer | |
[A-Z][a-z]* |
a capitalised name | |
[A-Za-z][A-Za-z0-9_]* |
a C identifier | |
a{2,3} |
aa or aaa | |
| |
separates alternatives | |
WHILE|while |
either WHILE or while | |
( any sub-pattern) |
that sub-pattern | |
([A-Z][A-Z])+ |
an even number of upper-case letters | |
pattern? |
optional (but only the immediately preceding pattern!) | |
ab?c |
ac or abc | |
/ pattern |
only if followed by the pattern | |
ab/c |
ab, but only if followed by c | |
ab/\n |
the same as ab$ |
Rule Priority: the first rule, matching the longest possible string.
e.g. put error/default rules at end
- , ; |
the character itself |
"+" , "*" , "/" |
special characters have to be quoted to just recognise them |
[ \t\n] |
white space (ignored) |
[0-9]+ |
one or more numeric characters (a number) |
[^-0-9+*/; \t\n]+ |
any other characters |
e.g.
ALPHANUMERIC [A-Za-z0-9] ALPHABETIC [A-Za-z] %% {ALPHABETIC}{ALPHANUMERIC}*recognises identifiers
yytext: contains the actual characters recognised from input
yyleng: the number of characters in yytext
ECHO = printf ("%s", yytext): the default action
yylex: routine created by Lex from (expected input, action) lists.
yywrap: called at EOF; (boolean) result = terminate or not
Lex deals with EOF (i.e. End-Of-File e.g. control-D) as if with the following
rule:
EOF if (yywrap( )) return 0;
yywrap is useful during file inclusion, to return to the original file at
the end of the included file.
flex : infix.l infix.c
gcc : infix.c infix
infix : equations answers
Lex has other facilities, but those described above are the most important.
Refer to the Lex manual in the departmental library or at URL
http://www.cs.man.ac.uk/~pjj/cs2111/lex/lex.html
if necessary.
#include <stdio.h> int main (void) { printf ("hello world\n"); return 0; cauliflower; "cauliflower"; /*cauliflower*/ /* if I do this then do you get it wrong? */ "if I do this then do you get it wrong?"; {static float i_1234 = 1 + 2.0 * 3 / .4 - 'i';} if (I_do_this) /*then*/ do_you(get+it-wrong)?or:not; else this(is_1.load,of->old*whatsit); does<this>>iffy*=expression%cause||problems&&elsewhere; "a string"; not-a-string; "another string"; /* these are not valid operators */ a#b; a@b; /* nor are these - recognise the individual operators they consist of */ a:=b; a<>b; a**b; a~~b; a^^b; /* if you can do all the above, try the following: */ 012345; 12345; 0x12345abc; 012345LU; 12345l; 0x12345abcu; 123.; 123.123; 123.e-1; 123.123E99; 123e+23; 123.e-1f; 123.123E99l; 123e+23F; 123L; 'a'; '\n'; '\''; '\"'; '\\'; '\0'; '\01'; '\012'; '\xAB'; /* a comment */ not-a-comment; /* another comment */ /* a nasty * / * /* multi-line comment */ "a nasty \" string"; #define a(nasty)\ multi-line command /* if you can do all the above, try bigdata.c */ }
Use any available ANSI C documentation. If you don't have anything with you, "Standard C - A Reference" by Plauger and Brodie is at URL http://www.cs.man.ac.uk/applhax/C/standard_c/index.html
Why does ANSI C only include unsigned numbers in its syntax, rather than signed numbers?