index
1 /************************************************************
2 * file: Lexer.cpp
3 * date: 2006-03-31
4 * author: ideawu
5 * describe: none;
6 *************************************************************/
7
8 #include "Lexer.h"
9 #include <stdio.h>
10 #include <string.h>
11
12 #define MAXRESERVED 7
13 #define TOKENBUFSIZE 64
14
15 void getSingleOperator(char c, Token &token);
16 void keywordLookup(Token &token);
17
18
19 static Token ReservedWords[MAXRESERVED] = {
20 {IF, "if"},
21 {THEN, "then"},
22 {ELSE, "else"},
23 {WHILE, "while"},
24 {DO, "do"},
25 {BEGIN, "begin"},
26 {END, "end"}
27 };
28
29
30 Lexer::Lexer(char *filename){
31 buf = new char[TOKENBUFSIZE];
32 FILE *fp = fopen(filename, "r");
33 index = 0;
34 if(fp==NULL){
35 src = NULL;
36 printf("\n\n********************************************\n");
37 printf("* FATAL ERROR! LEXER COULD NOT OPEN FILE!!!\n");
38 printf("* %s : No such file.\n", filename);
39 printf("********************************************\n\n");
40 length = 0;
41 }else{
42 int i = 0;
43 while(fgetc(fp)!=EOF){
44 i++;
45 }
46 length = i;
47 src = new char[i+1];
48
49 rewind(fp);
50 i = 0;
51 while(src[i] = fgetc(fp)){
52 if(src[i] == EOF){
53 src[i] = '\0';
54 break;
55 }
56 i++;
57 }
58
59 fclose(fp);
60 }
61 }
62
63 Lexer::Lexer(){
64 }
65
66 Lexer::~Lexer(){
67 delete[] src;
68 }
69
70 void Lexer::reset(){
71 index = 0;
72 }
73
74 bool Lexer::isFinished(){
75 return (index == length - 1);
76 }
77
78 bool Lexer::isReady(){
79 return (src != NULL);
80 }
81
82 char* Lexer::getSrc(){
83 return src;
84 }
85
86 void Lexer::setSrc(char *s, int len){
87 src = s;
88 index = 0;
89 length = len;
90 buf = new char[TOKENBUFSIZE];
91 }
92
93 int Lexer::getIndex(){
94 return index;
95 }
96
97
98 /***=======================================================****/
99
100
101 Token Lexer::nextToken(){
102 Token token;
103 ScannerState state = START;
104 int bufindex = 0;
105 bool next = true; // index++
106 char c;
107
108 if(index==length-1){
109 token.type = ERROR;
110 token.name = "NO CHAR LEFT.";
111 return token;
112 }
113
114 c = src[index];
115 while(c==' ' || c=='\n' || c=='\r' || c=='\t'){
116 index ++;
117 c = src[index];
118 }
119
120 // get started
121 if((c>='a' && c<='z') || (c>='A' && c<='Z')){
122 state = INID;
123 token.type = ID;
124 buf[bufindex++] = c;
125 }else if(c>='0' && c<='9'){
126 state = INNUM;
127 token.type = NUM;
128 buf[bufindex++] = c;
129 }else if(c=='='){
130 state = INEQ;
131 }else if(c=='<'){
132 state = INLE;
133 }else if(c=='>'){
134 state = INGE;
135 }else{
136 state = DONE;
137 getSingleOperator(c, token);
138 }
139 index ++;
140
141 while(state!=DONE){
142 c = src[index];
143 switch(state){
144 case INEQ:
145 if(c=='='){
146 token.type = EQ;
147 token.name = "==";
148 }else{
149 token.type = ASSIGN;
150 token.name = "=";
151 next = false;
152 }
153 state = DONE;
154 break;
155 case INLE:
156 if(c=='='){
157 token.type = LE;
158 token.name = "<=";
159 }else{
160 token.type = LT;
161 token.name = "<";
162 next = false;
163 }
164 state = DONE;
165 break;
166 case INGE:
167 if(c=='='){
168 token.type = GE;
169 token.name = ">=";
170 }else{
171 token.type = GT;
172 token.name = ">";
173 next = false;
174 }
175 state = DONE;
176 break;
177 case INID:
178 if((c>='a' && c<='z') || (c>='A' && c<='Z') || (c>='0' && c<='9')){
179 buf[bufindex++] = c;
180 }else{
181 state = DONE;
182 next = false;
183 }
184 break;
185 case INNUM:
186 if(c>='0' && c<='9'){
187 buf[bufindex++] = c;
188 }else{
189 state = DONE;
190 next = false;
191 }
192 break;
193 default:
194 state = DONE;
195 token.type = ERROR;
196 token.name = "ERROR!";
197 printf("Error! Because no state is define! This should never happen! \
198 Current character is: %c\n", src[index]);
199 break;
200 }// end scanner state
201 index ++;
202 }// end while
203
204 if(next==false){
205 index --;
206 }
207
208 if(bufindex == TOKENBUFSIZE - 1){
209 // OUT OF BUFFER! It should never happen.
210 token.type = ERROR;
211 token.name = "OUT OF BUFFER!";
212 }
213
214 if(bufindex!=0){
215 buf[bufindex] = '\0';
216 token.name = new char[bufindex];
217 strcpy(token.name, buf);
218 if(token.type==ID){ // looking for reserved word and set the right type
219 keywordLookup(token);
220 }
221 }
222
223 return token;
224 }
225
226
227 // looking for reserved word and set the right type
228 void keywordLookup(Token &token){
229 for(int i=0;i<MAXRESERVED;i++){
230 if(strcmp(token.name, ReservedWords[i].name)==0){
231 token.type = ReservedWords[i].type;
232 if(token.type == BEGIN)
233 token.name = "{";
234 if(token.type == END)
235 token.name = "}";
236 break;
237 }
238 }
239 }
240
241
242 void getSingleOperator(char c, Token &token){
243 switch(c){
244 case '{':
245 token.type = BEGIN;
246 token.name = "{";
247 break;
248 case '}':
249 token.type = END;
250 token.name = "}";
251 break;
252 case '+':
253 token.type = PLUS;
254 token.name = "+";
255 break;
256 case '-':
257 token.type = MINUS;
258 token.name = "-";
259 break;
260 case '*':
261 token.type = MUL;
262 token.name = "*";
263 break;
264 case '/':
265 token.type = DIV;
266 token.name = "/";
267 break;
268 case '#':
269 case '\0': // end of file
270 token.type = LEXER_DONE;
271 token.name = "FINISH";
272 break;
273 case '&':
274 token.type = AND;
275 token.name = "&";
276 break;
277 case '|':
278 token.type = OR;
279 token.name = "|";
280 break;
281 case ';':
282 token.type = SEMI;
283 token.name = ";";
284 break;
285 default:
286 token.type = ERROR;
287 token.name = "ERROR! Unknown character.";
288 printf("Error occured when state=START! Current character is: %c\n", c);
289 break;
290 }
291 }
292
293
294