Answers to CS5031 Exercises 2: Lexical Analysis for C

1. This is what my version of the program did:

#include <stdio.h>				preprocessor command
int						built-in type
main						identifier
(						punctuation
void						keyword
)						punctuation
{						punctuation
printf						identifier
(						punctuation
"hello world\n"					string
)						punctuation
;						punctuation
return						keyword
0						octal integer number
;						punctuation
cauliflower					identifier
;						punctuation
"cauliflower"					string
;						punctuation
/*cauliflower*/					comment
/* if I do this then do you get it wrong? */	comment
"if I do this then do you get it wrong?"	string
;						punctuation
answer						identifier
=						operator(s)
1						decimal integer number
+						operator(s)
2.0						floating point number
*						operator(s)
3						decimal integer number
/						operator(s)
4						decimal integer number
-						operator(s)
'i'						character
;						punctuation
if						keyword
(						punctuation
I_do_this					identifier
)						punctuation
/*then*/					comment
do_you						identifier
(						punctuation
get						identifier
+						operator(s)
it						identifier
-						operator(s)
wrong						identifier
)						punctuation
?						operator(s)
or						identifier
:						punctuation
not						identifier
;						punctuation
else						keyword
this_is						identifier
(						punctuation
a						identifier
.						operator(s)
load						identifier
,						punctuation
of						identifier
->						operator(s)
old						identifier
*						operator(s)
whatsit						identifier
)						punctuation
;						punctuation
does						identifier
<						operator(s)
this						identifier
>>						operator(s)
iffy						identifier
*=						operator(s)
expression					identifier
%						operator(s)
cause						identifier
|						operator(s)
problems					identifier
&&						operator(s)
elsewhere					identifier
;						punctuation
"a string"					string
;						punctuation
not						identifier
-						operator(s)
a						identifier
-						operator(s)
string						identifier
;						punctuation
"another string"				string
;						punctuation
/* these are not valid operators */		comment
a						identifier
#						????????
b						identifier
;						punctuation
a						identifier
@						????????
b						identifier
;						punctuation
/* just recognise the individual operators these are made up from */comment
a						identifier
:						punctuation
=						operator(s)
b						identifier
;						punctuation
a						identifier
<						operator(s)
>						operator(s)
b						identifier
;						punctuation
a						identifier
*						operator(s)
*						operator(s)
b						identifier
;						punctuation
a						identifier
~						operator(s)
~						operator(s)
b						identifier
;						punctuation
a						identifier
^						operator(s)
^						operator(s)
b						identifier
;						punctuation
/* if you can do all the above, try "bigdata.c", or the following: */comment
012345						octal integer number
;						punctuation
12345						decimal integer number
;						punctuation
0x12345abc					hexadecimal integer number
;						punctuation
012345LU					octal integer number
;						punctuation
12345l						decimal integer number
;						punctuation
0x12345abcu					hexadecimal integer number
;						punctuation
123.						floating point number
;						punctuation
123.123						floating point number
;						punctuation
123.e-1						floating point number
;						punctuation
123.123E99					floating point number
;						punctuation
123e+23						floating point number
;						punctuation
123.e-1f					floating point number
;						punctuation
123.123E99l					floating point number
;						punctuation
123e+23F					floating point number
;						punctuation
123L						decimal integer number
;						punctuation
'a'						character
;						punctuation
'\n'						character
;						punctuation
'\''						character
;						punctuation
'\"'						character
;						punctuation
'\\'						character
;						punctuation
'\0'						character
;						punctuation
'\01'						character
;						punctuation
'\012'						character
;						punctuation
'\xAB'						character
;						punctuation
/* a comment */					comment
not						identifier
-						operator(s)
a						identifier
-						operator(s)
comment						identifier
;						punctuation
/* another comment */				comment
}						punctuation

2. This is a minor point, but you may find it interesting. ANSI C only includes unsigned numbers in its syntax, rather than signed numbers, because otherwise it can't tell the difference between

	a= b - 1
and
	a= -1
i.e. the latter expression consists of the operator "negate" acting on the integer value "1". This causes irritating problems if you try to write the most negative number possible e.g. on a 16-bit computer:
	a= -32768
as this is "negate" acting on "32768" which probably causes an overflow at compile-time! Instead, you have to write
	a= 0x8000