Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | File Members

scanner.cpp

Go to the documentation of this file.
00001 // scanner.cpp
00002 //
00003 // Copyright (C) 2001 Chris Laurel <claurel@shatters.net>
00004 //
00005 // This program is free software; you can redistribute it and/or
00006 // modify it under the terms of the GNU General Public License
00007 // as published by the Free Software Foundation; either version 2
00008 // of the License, or (at your option) any later version.
00009 
00010 #include <cctype>
00011 #include <cmath>
00012 #include <iomanip>
00013 #include <celscript/scanner.h>
00014 
00015 using namespace std;
00016 using namespace celx;
00017 
00018 
00019 static bool issep(char c)
00020 {
00021     return !isdigit(c) && !isalpha(c) && c != '.';
00022 }
00023 
00024 
00025 Scanner::Scanner(istream* _in) :
00026     in(_in),
00027     tokenType(TokenBegin),
00028     haveValidNumber(false),
00029     haveValidName(false),
00030     haveValidString(false),
00031     pushedBack(false)
00032 {
00033 }
00034 
00035 
00036 Scanner::TokenType Scanner::nextToken()
00037 {
00038     State state = StartState;
00039 
00040     if (pushedBack)
00041     {
00042         pushedBack = false;
00043         return tokenType;
00044     }
00045 
00046     textToken = "";
00047     haveValidNumber = false;
00048     haveValidName = false;
00049     haveValidString = false;
00050 
00051     if (tokenType == TokenBegin)
00052     {
00053         nextChar = readChar();
00054         if (in->eof())
00055             return TokenEnd;
00056     }
00057     else if (tokenType == TokenEnd)
00058     {
00059         return tokenType;
00060     }
00061 
00062     double integerValue = 0;
00063     double fractionValue = 0;
00064     double sign = 1;
00065     double fracExp = 1;
00066     double exponentValue = 0;
00067     double exponentSign = 1;
00068 
00069     TokenType newToken = TokenBegin;
00070     while (newToken == TokenBegin)
00071     {
00072         switch (state)
00073         {
00074         case StartState:
00075             if (isspace(nextChar))
00076             {
00077                 state = StartState;
00078             }
00079             else if (isdigit(nextChar))
00080             {
00081                 state = NumberState;
00082                 integerValue = (int) nextChar - (int) '0';
00083             }
00084             else if (nextChar == '(')
00085             {
00086                 newToken = TokenOpen;
00087                 nextChar = readChar();                
00088             }
00089             else if (nextChar == ')')
00090             {
00091                 newToken = TokenClose;
00092                 nextChar = readChar();                
00093             }
00094             else if (nextChar == ',')
00095             {
00096                 newToken = TokenComma;
00097                 nextChar = readChar();                
00098             }
00099             else if (nextChar == '+')
00100             {
00101                 state = PlusState;
00102             }
00103             else if (nextChar == '-')
00104             {
00105                 state = MinusState;
00106             }
00107             else if (nextChar == '*')
00108             {
00109                 state = AsteriskState;
00110             }
00111             else if (nextChar == '/')
00112             {
00113                 state = SlashState;
00114             }
00115             else if (isalpha(nextChar))
00116             {
00117                 state = NameState;
00118                 textToken += (char) nextChar;
00119             }
00120             else if (nextChar == '#')
00121             {
00122                 state = CommentState;
00123             }
00124             else if (nextChar == '"')
00125             {
00126                 state = StringState;
00127             }
00128             else if (nextChar == ';')
00129             {
00130                 newToken = TokenEndStatement;
00131                 nextChar = readChar();
00132             }
00133             else if (nextChar == '{')
00134             {
00135                 newToken = TokenBeginGroup;
00136                 nextChar = readChar();
00137             }
00138             else if (nextChar == '}')
00139             {
00140                 newToken = TokenEndGroup;
00141                 nextChar = readChar();
00142             }
00143             else if (nextChar == '[')
00144             {
00145                 newToken = TokenBeginArray;
00146                 nextChar = readChar();
00147             }
00148             else if (nextChar == ']')
00149             {
00150                 newToken = TokenEndArray;
00151                 nextChar = readChar();
00152             }
00153             else if (nextChar == '=')
00154             {
00155                 state = EqualState;
00156             }
00157             else if (nextChar == '<')
00158             {
00159                 state = LessState;
00160             }
00161             else if (nextChar == '>')
00162             {
00163                 state = GreaterState;
00164             }
00165             else if (nextChar == '!')
00166             {
00167                 state = BangState;
00168             }
00169             else if (nextChar == '|')
00170             {
00171                 newToken = TokenBar;
00172                 nextChar = readChar();
00173             }
00174             else if (nextChar == -1)
00175             {
00176                 newToken = TokenEnd;
00177             }
00178             else
00179             {
00180                 newToken = TokenError;
00181                 syntaxError("Bad character in stream");
00182             }
00183             break;
00184 
00185         case NameState:
00186             if (isalpha(nextChar) || isdigit(nextChar))
00187             {
00188                 state = NameState;
00189                 textToken += (char) nextChar;
00190             }
00191             else
00192             {
00193                 if (textToken == "for")
00194                     newToken = KeywordFor;
00195                 else if (textToken == "while")
00196                     newToken = KeywordWhile;
00197                 else if (textToken == "if")
00198                     newToken = KeywordIf;
00199                 else if (textToken == "else")
00200                     newToken = KeywordElse;
00201                 else if (textToken == "var")
00202                     newToken = KeywordVar;
00203                 else if (textToken == "return")
00204                     newToken = KeywordReturn;
00205                 else if (textToken == "function")
00206                     newToken = KeywordFunction;
00207                 else if (textToken == "lambda")
00208                     newToken = KeywordLambda;
00209                 else if (textToken == "null")
00210                     newToken = KeywordNull;
00211                 else if (textToken == "true")
00212                     newToken = KeywordTrue;
00213                 else if (textToken == "false")
00214                     newToken = KeywordFalse;
00215                 else
00216                 {
00217                     newToken = TokenName;
00218                     haveValidName = true;
00219                 }
00220             }
00221             break;
00222 
00223         case CommentState:
00224             if (nextChar == '\n' || nextChar == '\r')
00225                 state = StartState;
00226             break;
00227 
00228         case StringState:
00229             if (nextChar == '"')
00230             {
00231                 newToken = TokenString;
00232                 haveValidString = true;
00233                 nextChar = readChar();
00234             }
00235             else if (nextChar == '\\')
00236             {
00237                 state = StringEscapeState;
00238             }
00239             else
00240             {
00241                 state = StringState;
00242                 textToken += (char) nextChar;
00243             }
00244             break;
00245 
00246         case StringEscapeState:
00247             if (nextChar == '\\')
00248             {
00249                 textToken += '\\';
00250             }
00251             else if (nextChar == 'n')
00252             {
00253                 textToken += '\n';
00254             }
00255             else if (nextChar == '"')
00256             {
00257                 textToken += '"';
00258             }
00259             else
00260             {
00261                 newToken = TokenError;
00262                 syntaxError("Unknown escape code in string");
00263             }
00264             state = StringState;
00265             break;
00266 
00267         case MinusState:
00268             newToken = TokenMinus;
00269             state = StartState;
00270             break;
00271 
00272         case PlusState:
00273             newToken = TokenPlus;
00274             state = StartState;
00275             break;
00276 
00277         case AsteriskState:
00278             newToken = TokenMultiply;
00279             state = StartState;
00280             break;
00281 
00282         case SlashState:
00283             newToken = TokenDivide;
00284             state = StartState;
00285             break;
00286 
00287         case EqualState:
00288             if (nextChar == '=')
00289             {
00290                 newToken = TokenEqual;
00291                 nextChar = readChar();
00292                 state = StartState;
00293             }
00294             else
00295             {
00296                 newToken = TokenAssign;
00297                 state = StartState;
00298             }
00299             break;
00300 
00301         case LessState:
00302             if (nextChar == '=')
00303             {
00304                 newToken = TokenLesserEqual;
00305                 nextChar = readChar();
00306                 state = StartState;
00307             }
00308             else
00309             {
00310                 newToken = TokenLesser;
00311                 state = StartState;
00312             }
00313             break;
00314 
00315         case GreaterState:
00316             if (nextChar == '=')
00317             {
00318                 newToken = TokenGreaterEqual;
00319                 nextChar = readChar();
00320                 state = StartState;
00321             }
00322             else
00323             {
00324                 newToken = TokenGreater;
00325                 state = StartState;
00326             }
00327             break;
00328 
00329         case BangState:
00330             if (nextChar == '=')
00331             {
00332                 newToken = TokenNotEqual;
00333                 nextChar = readChar();
00334                 state = StartState;
00335             }
00336             else
00337             {
00338                 newToken = TokenNot;
00339                 state = StartState;
00340             }
00341             break;
00342 
00343         case NumberState:
00344             if (isdigit(nextChar))
00345             {
00346                 state = NumberState;
00347                 integerValue = integerValue * 10 + (int) nextChar - (int) '0';
00348             }
00349             else if (nextChar == '.')
00350             {
00351                 state = FractionState;
00352             }
00353             else if (nextChar == 'e' || nextChar == 'E')
00354             {
00355                 state = ExponentFirstState;
00356             }
00357             else if (issep(nextChar))
00358             {
00359                 newToken = TokenNumber;
00360                 haveValidNumber = true;
00361             }
00362             else
00363             {
00364                 newToken = TokenError;
00365                 syntaxError("Bad character in number");
00366             }
00367             break;
00368 
00369         case FractionState:
00370             if (isdigit(nextChar))
00371             {
00372                 state = FractionState;
00373                 fractionValue = fractionValue * 10 + nextChar - (int) '0';
00374                 fracExp *= 10;
00375             } 
00376             else if (nextChar == 'e' || nextChar == 'E')
00377             {
00378                 state = ExponentFirstState;
00379             }
00380             else if (issep(nextChar))
00381             {
00382                 newToken = TokenNumber;
00383                 haveValidNumber = true;
00384             } else {
00385                 newToken = TokenError;
00386                 syntaxError("Bad character in number");
00387             }
00388             break;
00389 
00390         case ExponentFirstState:
00391             if (isdigit(nextChar))
00392             {
00393                 state = ExponentState;
00394                 exponentValue = (int) nextChar - (int) '0';
00395             }
00396             else if (nextChar == '-')
00397             {
00398                 state = ExponentState;
00399                 exponentSign = -1;
00400             }
00401             else if (nextChar == '+')
00402             {
00403                 state = ExponentState;
00404             }
00405             else
00406             {
00407                 state = ErrorState;
00408                 syntaxError("Bad character in number");
00409             }
00410             break;
00411 
00412         case ExponentState:
00413             if (isdigit(nextChar))
00414             {
00415                 state = ExponentState;
00416                 exponentValue = exponentValue * 10 + (int) nextChar - (int) '0';
00417             }
00418             else if (issep(nextChar))
00419             {
00420                 newToken = TokenNumber;
00421                 haveValidNumber = true;
00422             }
00423             else
00424             {
00425                 state = ErrorState;
00426                 syntaxError("Bad character in number");
00427             }
00428             break;
00429 
00430         case DotState:
00431             if (isdigit(nextChar))
00432             {
00433                 state = FractionState;
00434                 fractionValue = fractionValue * 10 + (int) nextChar - (int) '0';
00435                 fracExp = 10;
00436             }
00437             else
00438             {
00439                 state = ErrorState;
00440                 syntaxError("'.' in stupid place");
00441             }
00442             break;
00443         }
00444 
00445         if (newToken == TokenBegin)
00446         {
00447             nextChar = readChar();
00448         }
00449     }
00450 
00451     tokenType = newToken;
00452     if (haveValidNumber)
00453     {
00454         numberValue = integerValue + fractionValue / fracExp;
00455         if (exponentValue != 0)
00456             numberValue *= pow(10, exponentValue * exponentSign);
00457         numberValue *= sign;
00458     }
00459 
00460     return tokenType;
00461 }
00462 
00463 
00464 Scanner::TokenType Scanner::getTokenType()
00465 {
00466     return tokenType;
00467 }
00468 
00469 
00470 void Scanner::pushBack()
00471 {
00472     pushedBack = true;
00473 }
00474 
00475 
00476 double Scanner::getNumberValue()
00477 {
00478     return numberValue;
00479 }
00480 
00481 
00482 string Scanner::getNameValue()
00483 {
00484     return textToken;
00485 }
00486 
00487 
00488 string Scanner::getStringValue()
00489 {
00490     return textToken;
00491 }
00492 
00493 
00494 int Scanner::readChar()
00495 {
00496     return (int) in->get();
00497 }
00498 
00499 void Scanner::syntaxError(char* message)
00500 {
00501     cerr << message << '\n';
00502 }
00503 
00504 
00505 int Scanner::getLineNumber()
00506 {
00507     return 0;
00508 }
00509 
00510 #if 0
00511 // Scanner test
00512 int main(int argc, char *argv[])
00513 {
00514     Scanner scanner(&cin);
00515     Scanner::TokenType tok = Scanner::TokenBegin;
00516 
00517     while (tok != Scanner::TokenEnd && tok != Scanner::TokenError)
00518     {
00519         tok = scanner.nextToken();
00520         switch (tok)
00521         {
00522         case Scanner::TokenBegin:
00523             cout << "Begin";
00524             break;
00525         case Scanner::TokenEnd:
00526             cout << "End";
00527             break;
00528         case Scanner::TokenName:
00529             cout << "Name = " << scanner.getNameValue();
00530             break;
00531         case Scanner::TokenNumber:
00532             cout << "Number = " << scanner.getNumberValue();
00533             break;
00534         case Scanner::TokenString:
00535             cout << "String = " << '"' << scanner.getStringValue() << '"';
00536             break;
00537         case Scanner::TokenOpen:
00538             cout << '(';
00539             break;
00540         case Scanner::TokenClose:
00541             cout << ')';
00542             break;
00543         case Scanner::TokenBeginGroup:
00544             cout << '{';
00545             break;
00546         case Scanner::TokenEndGroup:
00547             cout << '}';
00548             break;
00549         case Scanner::TokenEqual:
00550             cout << "==";
00551             break;
00552         case Scanner::TokenAssign:
00553             cout << '=';
00554             break;
00555         case Scanner::TokenPlus:
00556             cout << '+';
00557             break;
00558         case Scanner::TokenMinus:
00559             cout << '-';
00560             break;
00561         case Scanner::TokenMultiply:
00562             cout << '*';
00563             break;
00564         case Scanner::TokenEndStatement:
00565             cout << ';';
00566             break;
00567         case Scanner::TokenDivide:
00568             cout << '/';
00569             break;
00570         default:
00571             cout << "Other";
00572             break;
00573         }
00574 
00575         cout << '\n';
00576     }
00577 
00578     return 0;
00579 }
00580 #endif

Generated on Sat Jan 14 22:30:32 2006 for Celestia by  doxygen 1.4.1