index
  1  /************************************************************
  2  * file: Lexer.cpp
  3  * date: 2006-03-31
  4  * author: ideawu
  5  * describe: none;
  6  *************************************************************/
  7
  8  #include "Lexer.h"
  9  #include <stdio.h>
 10  #include <string.h>
 11
 12  #define MAXRESERVED 7
 13  #define TOKENBUFSIZE 64
 14
 15  void getSingleOperator(char c, Token &token);
 16  void keywordLookup(Token &token);
 17
 18
 19  static Token ReservedWords[MAXRESERVED] = {
 20      {IF, "if"},
 21      {THEN, "then"},
 22      {ELSE, "else"},
 23      {WHILE, "while"},
 24      {DO, "do"},
 25      {BEGIN, "begin"},
 26      {END, "end"}
 27  };
 28
 29
 30  Lexer::Lexer(char *filename){
 31      buf = new char[TOKENBUFSIZE];
 32      FILE *fp = fopen(filename, "r");
 33      index = 0;
 34      if(fp==NULL){
 35          src = NULL;
 36          printf("\n\n********************************************\n");
 37          printf("* FATAL ERROR! LEXER COULD NOT OPEN FILE!!!\n");
 38          printf("* %s : No such file.\n", filename);
 39          printf("********************************************\n\n");
 40          length = 0;
 41      }else{
 42          int i = 0;
 43          while(fgetc(fp)!=EOF){
 44              i++;
 45          }
 46          length = i;
 47          src = new char[i+1];
 48
 49          rewind(fp);
 50          i = 0;
 51          while(src[i] = fgetc(fp)){
 52              if(src[i] == EOF){
 53                  src[i] = '\0';
 54                  break;
 55              }
 56              i++;
 57          }
 58
 59          fclose(fp);
 60      }
 61  }
 62
 63  Lexer::Lexer(){
 64  }
 65
 66  Lexer::~Lexer(){
 67      delete[] src;
 68  }
 69
 70  void Lexer::reset(){
 71      index = 0;
 72  }
 73
 74  bool Lexer::isFinished(){
 75      return (index == length - 1);
 76  }
 77
 78  bool Lexer::isReady(){
 79      return (src != NULL);
 80  }
 81
 82  char* Lexer::getSrc(){
 83      return src;
 84  }
 85
 86  void Lexer::setSrc(char *s, int len){
 87      src = s;
 88      index = 0;
 89      length = len;
 90      buf = new char[TOKENBUFSIZE];
 91  }
 92
 93  int  Lexer::getIndex(){
 94      return index;
 95  }
 96
 97
 98  /***=======================================================****/
 99
100
101  Token Lexer::nextToken(){
102      Token token;
103      ScannerState state = START;
104      int bufindex = 0;
105      bool next = true;   // index++
106      char c;
107
108      if(index==length-1){
109          token.type = ERROR;
110          token.name = "NO CHAR LEFT.";
111          return token;
112      }
113
114      c = src[index];
115      while(c==' ' || c=='\n' || c=='\r' || c=='\t'){
116          index ++;
117          c = src[index];
118      }
119
120      // get started
121      if((c>='a' && c<='z') || (c>='A' && c<='Z')){
122          state = INID;
123          token.type = ID;
124          buf[bufindex++] = c;
125      }else if(c>='0' && c<='9'){
126          state = INNUM;
127          token.type = NUM;
128          buf[bufindex++] = c;
129      }else if(c=='='){
130          state = INEQ;
131      }else if(c=='<'){
132          state = INLE;
133      }else if(c=='>'){
134          state = INGE;
135      }else{
136          state = DONE;
137          getSingleOperator(c, token);
138      }
139      index ++;
140
141      while(state!=DONE){
142          c = src[index];
143          switch(state){
144              case INEQ:
145                  if(c=='='){
146                      token.type = EQ;
147                      token.name = "==";
148                  }else{
149                      token.type = ASSIGN;
150                      token.name = "=";
151                      next = false;
152                  }
153                  state = DONE;
154                  break;
155              case INLE:
156                  if(c=='='){
157                      token.type = LE;
158                      token.name = "<=";
159                  }else{
160                      token.type = LT;
161                      token.name = "<";
162                      next = false;
163                  }
164                  state = DONE;
165                  break;
166              case INGE:
167                  if(c=='='){
168                      token.type = GE;
169                      token.name = ">=";
170                  }else{
171                      token.type = GT;
172                      token.name = ">";
173                      next = false;
174                  }
175                  state = DONE;
176                  break;
177              case INID:
178                  if((c>='a' && c<='z') || (c>='A' && c<='Z') || (c>='0' && c<='9')){
179                      buf[bufindex++] = c;
180                  }else{
181                      state = DONE;
182                      next = false;
183                  }
184                  break;
185              case INNUM:
186                  if(c>='0' && c<='9'){
187                      buf[bufindex++] = c;
188                  }else{
189                      state = DONE;
190                      next = false;
191                  }
192                  break;
193              default:
194                  state = DONE;
195                  token.type = ERROR;
196                  token.name = "ERROR!";
197                  printf("Error! Because no state is define! This should never happen! \
198                          Current character is: %c\n", src[index]);
199                  break;
200          }// end scanner state
201          index ++;
202      }// end while
203
204      if(next==false){
205          index --;
206      }
207
208      if(bufindex == TOKENBUFSIZE - 1){
209          // OUT OF BUFFER! It should never happen.
210          token.type = ERROR;
211          token.name = "OUT OF BUFFER!";
212      }
213
214      if(bufindex!=0){
215          buf[bufindex] = '\0';
216          token.name = new char[bufindex];
217          strcpy(token.name, buf);
218          if(token.type==ID){ // looking for reserved word and set the right type
219              keywordLookup(token);
220          }
221      }
222
223      return token;
224  }
225
226
227  // looking for reserved word and set the right type
228  void keywordLookup(Token &token){
229      for(int i=0;i<MAXRESERVED;i++){
230          if(strcmp(token.name, ReservedWords[i].name)==0){
231              token.type = ReservedWords[i].type;
232              if(token.type == BEGIN)
233                  token.name = "{";
234              if(token.type == END)
235                  token.name = "}";
236              break;
237          }
238      }
239  }
240
241
242  void getSingleOperator(char c, Token &token){
243      switch(c){
244          case '{':
245              token.type = BEGIN;
246              token.name = "{";
247              break;
248          case '}':
249              token.type = END;
250              token.name = "}";
251              break;
252          case '+':
253              token.type = PLUS;
254              token.name = "+";
255              break;
256          case '-':
257              token.type = MINUS;
258              token.name = "-";
259              break;
260          case '*':
261              token.type = MUL;
262              token.name = "*";
263              break;
264          case '/':
265              token.type = DIV;
266              token.name = "/";
267              break;
268          case '#':
269          case '\0':  // end of file
270              token.type = LEXER_DONE;
271              token.name = "FINISH";
272              break;
273          case '&':
274              token.type = AND;
275              token.name = "&";
276              break;
277          case '|':
278              token.type = OR;
279              token.name = "|";
280              break;
281          case ';':
282              token.type = SEMI;
283              token.name = ";";
284              break;
285          default:
286              token.type = ERROR;
287              token.name = "ERROR! Unknown character.";
288              printf("Error occured when state=START! Current character is: %c\n", c);
289              break;
290      }
291  }
292
293
294