00001 // scanner.cpp 00002 // 00003 // Copyright (C) 2001 Chris Laurel <claurel@shatters.net> 00004 // 00005 // This program is free software; you can redistribute it and/or 00006 // modify it under the terms of the GNU General Public License 00007 // as published by the Free Software Foundation; either version 2 00008 // of the License, or (at your option) any later version. 00009 00010 #include <cctype> 00011 #include <cmath> 00012 #include <iomanip> 00013 #include <celscript/scanner.h> 00014 00015 using namespace std; 00016 using namespace celx; 00017 00018 00019 static bool issep(char c) 00020 { 00021 return !isdigit(c) && !isalpha(c) && c != '.'; 00022 } 00023 00024 00025 Scanner::Scanner(istream* _in) : 00026 in(_in), 00027 tokenType(TokenBegin), 00028 haveValidNumber(false), 00029 haveValidName(false), 00030 haveValidString(false), 00031 pushedBack(false) 00032 { 00033 } 00034 00035 00036 Scanner::TokenType Scanner::nextToken() 00037 { 00038 State state = StartState; 00039 00040 if (pushedBack) 00041 { 00042 pushedBack = false; 00043 return tokenType; 00044 } 00045 00046 textToken = ""; 00047 haveValidNumber = false; 00048 haveValidName = false; 00049 haveValidString = false; 00050 00051 if (tokenType == TokenBegin) 00052 { 00053 nextChar = readChar(); 00054 if (in->eof()) 00055 return TokenEnd; 00056 } 00057 else if (tokenType == TokenEnd) 00058 { 00059 return tokenType; 00060 } 00061 00062 double integerValue = 0; 00063 double fractionValue = 0; 00064 double sign = 1; 00065 double fracExp = 1; 00066 double exponentValue = 0; 00067 double exponentSign = 1; 00068 00069 TokenType newToken = TokenBegin; 00070 while (newToken == TokenBegin) 00071 { 00072 switch (state) 00073 { 00074 case StartState: 00075 if (isspace(nextChar)) 00076 { 00077 state = StartState; 00078 } 00079 else if (isdigit(nextChar)) 00080 { 00081 state = NumberState; 00082 integerValue = (int) nextChar - (int) '0'; 00083 } 00084 else if (nextChar == '(') 00085 { 00086 newToken = TokenOpen; 00087 nextChar = readChar(); 00088 } 00089 else if (nextChar == ')') 00090 { 00091 newToken = TokenClose; 00092 nextChar = readChar(); 00093 } 00094 else if (nextChar == ',') 00095 { 00096 newToken = TokenComma; 00097 nextChar = readChar(); 00098 } 00099 else if (nextChar == '+') 00100 { 00101 state = PlusState; 00102 } 00103 else if (nextChar == '-') 00104 { 00105 state = MinusState; 00106 } 00107 else if (nextChar == '*') 00108 { 00109 state = AsteriskState; 00110 } 00111 else if (nextChar == '/') 00112 { 00113 state = SlashState; 00114 } 00115 else if (isalpha(nextChar)) 00116 { 00117 state = NameState; 00118 textToken += (char) nextChar; 00119 } 00120 else if (nextChar == '#') 00121 { 00122 state = CommentState; 00123 } 00124 else if (nextChar == '"') 00125 { 00126 state = StringState; 00127 } 00128 else if (nextChar == ';') 00129 { 00130 newToken = TokenEndStatement; 00131 nextChar = readChar(); 00132 } 00133 else if (nextChar == '{') 00134 { 00135 newToken = TokenBeginGroup; 00136 nextChar = readChar(); 00137 } 00138 else if (nextChar == '}') 00139 { 00140 newToken = TokenEndGroup; 00141 nextChar = readChar(); 00142 } 00143 else if (nextChar == '[') 00144 { 00145 newToken = TokenBeginArray; 00146 nextChar = readChar(); 00147 } 00148 else if (nextChar == ']') 00149 { 00150 newToken = TokenEndArray; 00151 nextChar = readChar(); 00152 } 00153 else if (nextChar == '=') 00154 { 00155 state = EqualState; 00156 } 00157 else if (nextChar == '<') 00158 { 00159 state = LessState; 00160 } 00161 else if (nextChar == '>') 00162 { 00163 state = GreaterState; 00164 } 00165 else if (nextChar == '!') 00166 { 00167 state = BangState; 00168 } 00169 else if (nextChar == '|') 00170 { 00171 newToken = TokenBar; 00172 nextChar = readChar(); 00173 } 00174 else if (nextChar == -1) 00175 { 00176 newToken = TokenEnd; 00177 } 00178 else 00179 { 00180 newToken = TokenError; 00181 syntaxError("Bad character in stream"); 00182 } 00183 break; 00184 00185 case NameState: 00186 if (isalpha(nextChar) || isdigit(nextChar)) 00187 { 00188 state = NameState; 00189 textToken += (char) nextChar; 00190 } 00191 else 00192 { 00193 if (textToken == "for") 00194 newToken = KeywordFor; 00195 else if (textToken == "while") 00196 newToken = KeywordWhile; 00197 else if (textToken == "if") 00198 newToken = KeywordIf; 00199 else if (textToken == "else") 00200 newToken = KeywordElse; 00201 else if (textToken == "var") 00202 newToken = KeywordVar; 00203 else if (textToken == "return") 00204 newToken = KeywordReturn; 00205 else if (textToken == "function") 00206 newToken = KeywordFunction; 00207 else if (textToken == "lambda") 00208 newToken = KeywordLambda; 00209 else if (textToken == "null") 00210 newToken = KeywordNull; 00211 else if (textToken == "true") 00212 newToken = KeywordTrue; 00213 else if (textToken == "false") 00214 newToken = KeywordFalse; 00215 else 00216 { 00217 newToken = TokenName; 00218 haveValidName = true; 00219 } 00220 } 00221 break; 00222 00223 case CommentState: 00224 if (nextChar == '\n' || nextChar == '\r') 00225 state = StartState; 00226 break; 00227 00228 case StringState: 00229 if (nextChar == '"') 00230 { 00231 newToken = TokenString; 00232 haveValidString = true; 00233 nextChar = readChar(); 00234 } 00235 else if (nextChar == '\\') 00236 { 00237 state = StringEscapeState; 00238 } 00239 else 00240 { 00241 state = StringState; 00242 textToken += (char) nextChar; 00243 } 00244 break; 00245 00246 case StringEscapeState: 00247 if (nextChar == '\\') 00248 { 00249 textToken += '\\'; 00250 } 00251 else if (nextChar == 'n') 00252 { 00253 textToken += '\n'; 00254 } 00255 else if (nextChar == '"') 00256 { 00257 textToken += '"'; 00258 } 00259 else 00260 { 00261 newToken = TokenError; 00262 syntaxError("Unknown escape code in string"); 00263 } 00264 state = StringState; 00265 break; 00266 00267 case MinusState: 00268 newToken = TokenMinus; 00269 state = StartState; 00270 break; 00271 00272 case PlusState: 00273 newToken = TokenPlus; 00274 state = StartState; 00275 break; 00276 00277 case AsteriskState: 00278 newToken = TokenMultiply; 00279 state = StartState; 00280 break; 00281 00282 case SlashState: 00283 newToken = TokenDivide; 00284 state = StartState; 00285 break; 00286 00287 case EqualState: 00288 if (nextChar == '=') 00289 { 00290 newToken = TokenEqual; 00291 nextChar = readChar(); 00292 state = StartState; 00293 } 00294 else 00295 { 00296 newToken = TokenAssign; 00297 state = StartState; 00298 } 00299 break; 00300 00301 case LessState: 00302 if (nextChar == '=') 00303 { 00304 newToken = TokenLesserEqual; 00305 nextChar = readChar(); 00306 state = StartState; 00307 } 00308 else 00309 { 00310 newToken = TokenLesser; 00311 state = StartState; 00312 } 00313 break; 00314 00315 case GreaterState: 00316 if (nextChar == '=') 00317 { 00318 newToken = TokenGreaterEqual; 00319 nextChar = readChar(); 00320 state = StartState; 00321 } 00322 else 00323 { 00324 newToken = TokenGreater; 00325 state = StartState; 00326 } 00327 break; 00328 00329 case BangState: 00330 if (nextChar == '=') 00331 { 00332 newToken = TokenNotEqual; 00333 nextChar = readChar(); 00334 state = StartState; 00335 } 00336 else 00337 { 00338 newToken = TokenNot; 00339 state = StartState; 00340 } 00341 break; 00342 00343 case NumberState: 00344 if (isdigit(nextChar)) 00345 { 00346 state = NumberState; 00347 integerValue = integerValue * 10 + (int) nextChar - (int) '0'; 00348 } 00349 else if (nextChar == '.') 00350 { 00351 state = FractionState; 00352 } 00353 else if (nextChar == 'e' || nextChar == 'E') 00354 { 00355 state = ExponentFirstState; 00356 } 00357 else if (issep(nextChar)) 00358 { 00359 newToken = TokenNumber; 00360 haveValidNumber = true; 00361 } 00362 else 00363 { 00364 newToken = TokenError; 00365 syntaxError("Bad character in number"); 00366 } 00367 break; 00368 00369 case FractionState: 00370 if (isdigit(nextChar)) 00371 { 00372 state = FractionState; 00373 fractionValue = fractionValue * 10 + nextChar - (int) '0'; 00374 fracExp *= 10; 00375 } 00376 else if (nextChar == 'e' || nextChar == 'E') 00377 { 00378 state = ExponentFirstState; 00379 } 00380 else if (issep(nextChar)) 00381 { 00382 newToken = TokenNumber; 00383 haveValidNumber = true; 00384 } else { 00385 newToken = TokenError; 00386 syntaxError("Bad character in number"); 00387 } 00388 break; 00389 00390 case ExponentFirstState: 00391 if (isdigit(nextChar)) 00392 { 00393 state = ExponentState; 00394 exponentValue = (int) nextChar - (int) '0'; 00395 } 00396 else if (nextChar == '-') 00397 { 00398 state = ExponentState; 00399 exponentSign = -1; 00400 } 00401 else if (nextChar == '+') 00402 { 00403 state = ExponentState; 00404 } 00405 else 00406 { 00407 state = ErrorState; 00408 syntaxError("Bad character in number"); 00409 } 00410 break; 00411 00412 case ExponentState: 00413 if (isdigit(nextChar)) 00414 { 00415 state = ExponentState; 00416 exponentValue = exponentValue * 10 + (int) nextChar - (int) '0'; 00417 } 00418 else if (issep(nextChar)) 00419 { 00420 newToken = TokenNumber; 00421 haveValidNumber = true; 00422 } 00423 else 00424 { 00425 state = ErrorState; 00426 syntaxError("Bad character in number"); 00427 } 00428 break; 00429 00430 case DotState: 00431 if (isdigit(nextChar)) 00432 { 00433 state = FractionState; 00434 fractionValue = fractionValue * 10 + (int) nextChar - (int) '0'; 00435 fracExp = 10; 00436 } 00437 else 00438 { 00439 state = ErrorState; 00440 syntaxError("'.' in stupid place"); 00441 } 00442 break; 00443 } 00444 00445 if (newToken == TokenBegin) 00446 { 00447 nextChar = readChar(); 00448 } 00449 } 00450 00451 tokenType = newToken; 00452 if (haveValidNumber) 00453 { 00454 numberValue = integerValue + fractionValue / fracExp; 00455 if (exponentValue != 0) 00456 numberValue *= pow(10, exponentValue * exponentSign); 00457 numberValue *= sign; 00458 } 00459 00460 return tokenType; 00461 } 00462 00463 00464 Scanner::TokenType Scanner::getTokenType() 00465 { 00466 return tokenType; 00467 } 00468 00469 00470 void Scanner::pushBack() 00471 { 00472 pushedBack = true; 00473 } 00474 00475 00476 double Scanner::getNumberValue() 00477 { 00478 return numberValue; 00479 } 00480 00481 00482 string Scanner::getNameValue() 00483 { 00484 return textToken; 00485 } 00486 00487 00488 string Scanner::getStringValue() 00489 { 00490 return textToken; 00491 } 00492 00493 00494 int Scanner::readChar() 00495 { 00496 return (int) in->get(); 00497 } 00498 00499 void Scanner::syntaxError(char* message) 00500 { 00501 cerr << message << '\n'; 00502 } 00503 00504 00505 int Scanner::getLineNumber() 00506 { 00507 return 0; 00508 } 00509 00510 #if 0 00511 // Scanner test 00512 int main(int argc, char *argv[]) 00513 { 00514 Scanner scanner(&cin); 00515 Scanner::TokenType tok = Scanner::TokenBegin; 00516 00517 while (tok != Scanner::TokenEnd && tok != Scanner::TokenError) 00518 { 00519 tok = scanner.nextToken(); 00520 switch (tok) 00521 { 00522 case Scanner::TokenBegin: 00523 cout << "Begin"; 00524 break; 00525 case Scanner::TokenEnd: 00526 cout << "End"; 00527 break; 00528 case Scanner::TokenName: 00529 cout << "Name = " << scanner.getNameValue(); 00530 break; 00531 case Scanner::TokenNumber: 00532 cout << "Number = " << scanner.getNumberValue(); 00533 break; 00534 case Scanner::TokenString: 00535 cout << "String = " << '"' << scanner.getStringValue() << '"'; 00536 break; 00537 case Scanner::TokenOpen: 00538 cout << '('; 00539 break; 00540 case Scanner::TokenClose: 00541 cout << ')'; 00542 break; 00543 case Scanner::TokenBeginGroup: 00544 cout << '{'; 00545 break; 00546 case Scanner::TokenEndGroup: 00547 cout << '}'; 00548 break; 00549 case Scanner::TokenEqual: 00550 cout << "=="; 00551 break; 00552 case Scanner::TokenAssign: 00553 cout << '='; 00554 break; 00555 case Scanner::TokenPlus: 00556 cout << '+'; 00557 break; 00558 case Scanner::TokenMinus: 00559 cout << '-'; 00560 break; 00561 case Scanner::TokenMultiply: 00562 cout << '*'; 00563 break; 00564 case Scanner::TokenEndStatement: 00565 cout << ';'; 00566 break; 00567 case Scanner::TokenDivide: 00568 cout << '/'; 00569 break; 00570 default: 00571 cout << "Other"; 00572 break; 00573 } 00574 00575 cout << '\n'; 00576 } 00577 00578 return 0; 00579 } 00580 #endif
1.4.1