Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | File Members

celx::Scanner Class Reference

#include <scanner.h>

List of all members.

Public Types

enum  TokenType {
  TokenName = 0, TokenString = 1, TokenNumber = 2, TokenBegin = 3,
  TokenEnd = 4, TokenNull = 5, TokenBeginGroup = 6, TokenEndGroup = 7,
  TokenBeginArray = 8, TokenEndArray = 9, TokenEqual = 10, TokenNotEqual = 11,
  TokenBar = 12, TokenOpen = 13, TokenClose = 14, TokenPlus = 15,
  TokenMinus = 16, TokenMultiply = 17, TokenDivide = 18, TokenEndStatement = 19,
  TokenAssign = 20, TokenGreater = 21, TokenLesser = 22, TokenGreaterEqual = 23,
  TokenLesserEqual = 24, TokenNot = 25, TokenComma = 26, KeywordIf = 50,
  KeywordElse = 51, KeywordFor = 52, KeywordWhile = 53, KeywordTrue = 54,
  KeywordFalse = 55, KeywordNull = 56, KeywordVar = 57, KeywordReturn = 58,
  KeywordFunction = 59, KeywordLambda = 60, TokenError = 255
}

Public Member Functions

int getLineNumber ()
std::string getNameValue ()
double getNumberValue ()
std::string getStringValue ()
TokenType getTokenType ()
TokenType nextToken ()
void pushBack ()
 Scanner (std::istream *)

Private Types

enum  State {
  StartState = 0, NameState = 1, NumberState = 2, FractionState = 3,
  ExponentState = 4, ExponentFirstState = 5, DotState = 6, CommentState = 7,
  StringState = 8, ErrorState = 9, StringEscapeState = 10, MinusState = 11,
  PlusState = 12, EqualState = 13, AsteriskState = 14, SlashState = 15,
  LessState = 16, GreaterState = 17, BangState = 18
}

Private Member Functions

int readChar ()
void syntaxError (char *)

Private Attributes

bool haveValidName
bool haveValidNumber
bool haveValidString
std::istream * in
int nextChar
double numberValue
bool pushedBack
std::string textToken
TokenType tokenType


Member Enumeration Documentation

enum celx::Scanner::State [private]
 

Enumeration values:
StartState 
NameState 
NumberState 
FractionState 
ExponentState 
ExponentFirstState 
DotState 
CommentState 
StringState 
ErrorState 
StringEscapeState 
MinusState 
PlusState 
EqualState 
AsteriskState 
SlashState 
LessState 
GreaterState 
BangState 

Definition at line 78 of file scanner.h.

00079     {
00080         StartState          = 0,
00081         NameState           = 1,
00082         NumberState         = 2,
00083         FractionState       = 3,
00084         ExponentState       = 4,
00085         ExponentFirstState  = 5,
00086         DotState            = 6,
00087         CommentState        = 7,
00088         StringState         = 8,
00089         ErrorState          = 9,
00090         StringEscapeState   = 10,
00091         MinusState          = 11,
00092         PlusState           = 12,
00093         EqualState          = 13,
00094         AsteriskState       = 14,
00095         SlashState          = 15,
00096         LessState           = 16,
00097         GreaterState        = 17,
00098         BangState           = 18,
00099     };

enum celx::Scanner::TokenType
 

Enumeration values:
TokenName 
TokenString 
TokenNumber 
TokenBegin 
TokenEnd 
TokenNull 
TokenBeginGroup 
TokenEndGroup 
TokenBeginArray 
TokenEndArray 
TokenEqual 
TokenNotEqual 
TokenBar 
TokenOpen 
TokenClose 
TokenPlus 
TokenMinus 
TokenMultiply 
TokenDivide 
TokenEndStatement 
TokenAssign 
TokenGreater 
TokenLesser 
TokenGreaterEqual 
TokenLesserEqual 
TokenNot 
TokenComma 
KeywordIf 
KeywordElse 
KeywordFor 
KeywordWhile 
KeywordTrue 
KeywordFalse 
KeywordNull 
KeywordVar 
KeywordReturn 
KeywordFunction 
KeywordLambda 
TokenError 

Definition at line 23 of file scanner.h.

00024     {
00025         TokenName           = 0,
00026         TokenString         = 1,
00027         TokenNumber         = 2,
00028         TokenBegin          = 3,
00029         TokenEnd            = 4,
00030         TokenNull           = 5,
00031         TokenBeginGroup     = 6,
00032         TokenEndGroup       = 7,
00033         TokenBeginArray     = 8,
00034         TokenEndArray       = 9,
00035         TokenEqual          = 10,
00036         TokenNotEqual       = 11,
00037         TokenBar            = 12,
00038         TokenOpen           = 13,
00039         TokenClose          = 14,
00040         TokenPlus           = 15,
00041         TokenMinus          = 16,
00042         TokenMultiply       = 17,
00043         TokenDivide         = 18,
00044         TokenEndStatement   = 19,
00045         TokenAssign         = 20,
00046         TokenGreater        = 21,
00047         TokenLesser         = 22,
00048         TokenGreaterEqual   = 23,
00049         TokenLesserEqual    = 24,
00050         TokenNot            = 25,
00051         TokenComma          = 26,
00052         KeywordIf           = 50,
00053         KeywordElse         = 51,
00054         KeywordFor          = 52,
00055         KeywordWhile        = 53,
00056         KeywordTrue         = 54,
00057         KeywordFalse        = 55,
00058         KeywordNull         = 56,
00059         KeywordVar          = 57,
00060         KeywordReturn       = 58,
00061         KeywordFunction     = 59,
00062         KeywordLambda       = 60,
00063         TokenError          = 255,
00064     };


Constructor & Destructor Documentation

Scanner::Scanner std::istream *   ) 
 

Definition at line 25 of file scanner.cpp.

00025                              :
00026     in(_in),
00027     tokenType(TokenBegin),
00028     haveValidNumber(false),
00029     haveValidName(false),
00030     haveValidString(false),
00031     pushedBack(false)
00032 {
00033 }


Member Function Documentation

int Scanner::getLineNumber  ) 
 

Definition at line 505 of file scanner.cpp.

00506 {
00507     return 0;
00508 }

string Scanner::getNameValue  ) 
 

Definition at line 482 of file scanner.cpp.

References textToken.

00483 {
00484     return textToken;
00485 }

double Scanner::getNumberValue  ) 
 

Definition at line 476 of file scanner.cpp.

References numberValue.

00477 {
00478     return numberValue;
00479 }

string Scanner::getStringValue  ) 
 

Definition at line 488 of file scanner.cpp.

References textToken.

00489 {
00490     return textToken;
00491 }

Scanner::TokenType Scanner::getTokenType  ) 
 

Definition at line 464 of file scanner.cpp.

References tokenType.

00465 {
00466     return tokenType;
00467 }

Scanner::TokenType Scanner::nextToken  ) 
 

Definition at line 36 of file scanner.cpp.

References AsteriskState, BangState, CommentState, DotState, EqualState, ErrorState, ExponentFirstState, ExponentState, FractionState, GreaterState, haveValidName, haveValidNumber, haveValidString, in, issep(), KeywordElse, KeywordFalse, KeywordFor, KeywordFunction, KeywordIf, KeywordLambda, KeywordNull, KeywordReturn, KeywordTrue, KeywordVar, KeywordWhile, LessState, MinusState, NameState, nextChar, NumberState, numberValue, PlusState, pow(), pushedBack, readChar(), sign(), SlashState, StartState, StringEscapeState, StringState, syntaxError(), textToken, TokenAssign, TokenBar, TokenBegin, TokenBeginArray, TokenBeginGroup, TokenClose, TokenComma, TokenDivide, TokenEnd, TokenEndArray, TokenEndGroup, TokenEndStatement, TokenEqual, TokenError, TokenGreater, TokenGreaterEqual, TokenLesser, TokenLesserEqual, TokenMinus, TokenMultiply, TokenName, TokenNot, TokenNotEqual, TokenNumber, TokenOpen, TokenPlus, TokenString, and tokenType.

Referenced by main().

00037 {
00038     State state = StartState;
00039 
00040     if (pushedBack)
00041     {
00042         pushedBack = false;
00043         return tokenType;
00044     }
00045 
00046     textToken = "";
00047     haveValidNumber = false;
00048     haveValidName = false;
00049     haveValidString = false;
00050 
00051     if (tokenType == TokenBegin)
00052     {
00053         nextChar = readChar();
00054         if (in->eof())
00055             return TokenEnd;
00056     }
00057     else if (tokenType == TokenEnd)
00058     {
00059         return tokenType;
00060     }
00061 
00062     double integerValue = 0;
00063     double fractionValue = 0;
00064     double sign = 1;
00065     double fracExp = 1;
00066     double exponentValue = 0;
00067     double exponentSign = 1;
00068 
00069     TokenType newToken = TokenBegin;
00070     while (newToken == TokenBegin)
00071     {
00072         switch (state)
00073         {
00074         case StartState:
00075             if (isspace(nextChar))
00076             {
00077                 state = StartState;
00078             }
00079             else if (isdigit(nextChar))
00080             {
00081                 state = NumberState;
00082                 integerValue = (int) nextChar - (int) '0';
00083             }
00084             else if (nextChar == '(')
00085             {
00086                 newToken = TokenOpen;
00087                 nextChar = readChar();                
00088             }
00089             else if (nextChar == ')')
00090             {
00091                 newToken = TokenClose;
00092                 nextChar = readChar();                
00093             }
00094             else if (nextChar == ',')
00095             {
00096                 newToken = TokenComma;
00097                 nextChar = readChar();                
00098             }
00099             else if (nextChar == '+')
00100             {
00101                 state = PlusState;
00102             }
00103             else if (nextChar == '-')
00104             {
00105                 state = MinusState;
00106             }
00107             else if (nextChar == '*')
00108             {
00109                 state = AsteriskState;
00110             }
00111             else if (nextChar == '/')
00112             {
00113                 state = SlashState;
00114             }
00115             else if (isalpha(nextChar))
00116             {
00117                 state = NameState;
00118                 textToken += (char) nextChar;
00119             }
00120             else if (nextChar == '#')
00121             {
00122                 state = CommentState;
00123             }
00124             else if (nextChar == '"')
00125             {
00126                 state = StringState;
00127             }
00128             else if (nextChar == ';')
00129             {
00130                 newToken = TokenEndStatement;
00131                 nextChar = readChar();
00132             }
00133             else if (nextChar == '{')
00134             {
00135                 newToken = TokenBeginGroup;
00136                 nextChar = readChar();
00137             }
00138             else if (nextChar == '}')
00139             {
00140                 newToken = TokenEndGroup;
00141                 nextChar = readChar();
00142             }
00143             else if (nextChar == '[')
00144             {
00145                 newToken = TokenBeginArray;
00146                 nextChar = readChar();
00147             }
00148             else if (nextChar == ']')
00149             {
00150                 newToken = TokenEndArray;
00151                 nextChar = readChar();
00152             }
00153             else if (nextChar == '=')
00154             {
00155                 state = EqualState;
00156             }
00157             else if (nextChar == '<')
00158             {
00159                 state = LessState;
00160             }
00161             else if (nextChar == '>')
00162             {
00163                 state = GreaterState;
00164             }
00165             else if (nextChar == '!')
00166             {
00167                 state = BangState;
00168             }
00169             else if (nextChar == '|')
00170             {
00171                 newToken = TokenBar;
00172                 nextChar = readChar();
00173             }
00174             else if (nextChar == -1)
00175             {
00176                 newToken = TokenEnd;
00177             }
00178             else
00179             {
00180                 newToken = TokenError;
00181                 syntaxError("Bad character in stream");
00182             }
00183             break;
00184 
00185         case NameState:
00186             if (isalpha(nextChar) || isdigit(nextChar))
00187             {
00188                 state = NameState;
00189                 textToken += (char) nextChar;
00190             }
00191             else
00192             {
00193                 if (textToken == "for")
00194                     newToken = KeywordFor;
00195                 else if (textToken == "while")
00196                     newToken = KeywordWhile;
00197                 else if (textToken == "if")
00198                     newToken = KeywordIf;
00199                 else if (textToken == "else")
00200                     newToken = KeywordElse;
00201                 else if (textToken == "var")
00202                     newToken = KeywordVar;
00203                 else if (textToken == "return")
00204                     newToken = KeywordReturn;
00205                 else if (textToken == "function")
00206                     newToken = KeywordFunction;
00207                 else if (textToken == "lambda")
00208                     newToken = KeywordLambda;
00209                 else if (textToken == "null")
00210                     newToken = KeywordNull;
00211                 else if (textToken == "true")
00212                     newToken = KeywordTrue;
00213                 else if (textToken == "false")
00214                     newToken = KeywordFalse;
00215                 else
00216                 {
00217                     newToken = TokenName;
00218                     haveValidName = true;
00219                 }
00220             }
00221             break;
00222 
00223         case CommentState:
00224             if (nextChar == '\n' || nextChar == '\r')
00225                 state = StartState;
00226             break;
00227 
00228         case StringState:
00229             if (nextChar == '"')
00230             {
00231                 newToken = TokenString;
00232                 haveValidString = true;
00233                 nextChar = readChar();
00234             }
00235             else if (nextChar == '\\')
00236             {
00237                 state = StringEscapeState;
00238             }
00239             else
00240             {
00241                 state = StringState;
00242                 textToken += (char) nextChar;
00243             }
00244             break;
00245 
00246         case StringEscapeState:
00247             if (nextChar == '\\')
00248             {
00249                 textToken += '\\';
00250             }
00251             else if (nextChar == 'n')
00252             {
00253                 textToken += '\n';
00254             }
00255             else if (nextChar == '"')
00256             {
00257                 textToken += '"';
00258             }
00259             else
00260             {
00261                 newToken = TokenError;
00262                 syntaxError("Unknown escape code in string");
00263             }
00264             state = StringState;
00265             break;
00266 
00267         case MinusState:
00268             newToken = TokenMinus;
00269             state = StartState;
00270             break;
00271 
00272         case PlusState:
00273             newToken = TokenPlus;
00274             state = StartState;
00275             break;
00276 
00277         case AsteriskState:
00278             newToken = TokenMultiply;
00279             state = StartState;
00280             break;
00281 
00282         case SlashState:
00283             newToken = TokenDivide;
00284             state = StartState;
00285             break;
00286 
00287         case EqualState:
00288             if (nextChar == '=')
00289             {
00290                 newToken = TokenEqual;
00291                 nextChar = readChar();
00292                 state = StartState;
00293             }
00294             else
00295             {
00296                 newToken = TokenAssign;
00297                 state = StartState;
00298             }
00299             break;
00300 
00301         case LessState:
00302             if (nextChar == '=')
00303             {
00304                 newToken = TokenLesserEqual;
00305                 nextChar = readChar();
00306                 state = StartState;
00307             }
00308             else
00309             {
00310                 newToken = TokenLesser;
00311                 state = StartState;
00312             }
00313             break;
00314 
00315         case GreaterState:
00316             if (nextChar == '=')
00317             {
00318                 newToken = TokenGreaterEqual;
00319                 nextChar = readChar();
00320                 state = StartState;
00321             }
00322             else
00323             {
00324                 newToken = TokenGreater;
00325                 state = StartState;
00326             }
00327             break;
00328 
00329         case BangState:
00330             if (nextChar == '=')
00331             {
00332                 newToken = TokenNotEqual;
00333                 nextChar = readChar();
00334                 state = StartState;
00335             }
00336             else
00337             {
00338                 newToken = TokenNot;
00339                 state = StartState;
00340             }
00341             break;
00342 
00343         case NumberState:
00344             if (isdigit(nextChar))
00345             {
00346                 state = NumberState;
00347                 integerValue = integerValue * 10 + (int) nextChar - (int) '0';
00348             }
00349             else if (nextChar == '.')
00350             {
00351                 state = FractionState;
00352             }
00353             else if (nextChar == 'e' || nextChar == 'E')
00354             {
00355                 state = ExponentFirstState;
00356             }
00357             else if (issep(nextChar))
00358             {
00359                 newToken = TokenNumber;
00360                 haveValidNumber = true;
00361             }
00362             else
00363             {
00364                 newToken = TokenError;
00365                 syntaxError("Bad character in number");
00366             }
00367             break;
00368 
00369         case FractionState:
00370             if (isdigit(nextChar))
00371             {
00372                 state = FractionState;
00373                 fractionValue = fractionValue * 10 + nextChar - (int) '0';
00374                 fracExp *= 10;
00375             } 
00376             else if (nextChar == 'e' || nextChar == 'E')
00377             {
00378                 state = ExponentFirstState;
00379             }
00380             else if (issep(nextChar))
00381             {
00382                 newToken = TokenNumber;
00383                 haveValidNumber = true;
00384             } else {
00385                 newToken = TokenError;
00386                 syntaxError("Bad character in number");
00387             }
00388             break;
00389 
00390         case ExponentFirstState:
00391             if (isdigit(nextChar))
00392             {
00393                 state = ExponentState;
00394                 exponentValue = (int) nextChar - (int) '0';
00395             }
00396             else if (nextChar == '-')
00397             {
00398                 state = ExponentState;
00399                 exponentSign = -1;
00400             }
00401             else if (nextChar == '+')
00402             {
00403                 state = ExponentState;
00404             }
00405             else
00406             {
00407                 state = ErrorState;
00408                 syntaxError("Bad character in number");
00409             }
00410             break;
00411 
00412         case ExponentState:
00413             if (isdigit(nextChar))
00414             {
00415                 state = ExponentState;
00416                 exponentValue = exponentValue * 10 + (int) nextChar - (int) '0';
00417             }
00418             else if (issep(nextChar))
00419             {
00420                 newToken = TokenNumber;
00421                 haveValidNumber = true;
00422             }
00423             else
00424             {
00425                 state = ErrorState;
00426                 syntaxError("Bad character in number");
00427             }
00428             break;
00429 
00430         case DotState:
00431             if (isdigit(nextChar))
00432             {
00433                 state = FractionState;
00434                 fractionValue = fractionValue * 10 + (int) nextChar - (int) '0';
00435                 fracExp = 10;
00436             }
00437             else
00438             {
00439                 state = ErrorState;
00440                 syntaxError("'.' in stupid place");
00441             }
00442             break;
00443         }
00444 
00445         if (newToken == TokenBegin)
00446         {
00447             nextChar = readChar();
00448         }
00449     }
00450 
00451     tokenType = newToken;
00452     if (haveValidNumber)
00453     {
00454         numberValue = integerValue + fractionValue / fracExp;
00455         if (exponentValue != 0)
00456             numberValue *= pow(10, exponentValue * exponentSign);
00457         numberValue *= sign;
00458     }
00459 
00460     return tokenType;
00461 }

void Scanner::pushBack  ) 
 

Definition at line 470 of file scanner.cpp.

References pushedBack.

Referenced by main().

00471 {
00472     pushedBack = true;
00473 }

int Scanner::readChar  )  [private]
 

Definition at line 494 of file scanner.cpp.

References in.

Referenced by nextToken().

00495 {
00496     return (int) in->get();
00497 }

void Scanner::syntaxError char *   )  [private]
 

Definition at line 499 of file scanner.cpp.

Referenced by nextToken().

00500 {
00501     cerr << message << '\n';
00502 }


Member Data Documentation

bool celx::Scanner::haveValidName [private]
 

Definition at line 106 of file scanner.h.

Referenced by nextToken().

bool celx::Scanner::haveValidNumber [private]
 

Definition at line 105 of file scanner.h.

Referenced by nextToken().

bool celx::Scanner::haveValidString [private]
 

Definition at line 107 of file scanner.h.

Referenced by nextToken().

std::istream* celx::Scanner::in [private]
 

Definition at line 101 of file scanner.h.

Referenced by nextToken(), and readChar().

int celx::Scanner::nextChar [private]
 

Definition at line 103 of file scanner.h.

Referenced by nextToken().

double celx::Scanner::numberValue [private]
 

Definition at line 114 of file scanner.h.

Referenced by getNumberValue(), and nextToken().

bool celx::Scanner::pushedBack [private]
 

Definition at line 109 of file scanner.h.

Referenced by nextToken(), and pushBack().

std::string celx::Scanner::textToken [private]
 

Definition at line 116 of file scanner.h.

Referenced by getNameValue(), getStringValue(), and nextToken().

TokenType celx::Scanner::tokenType [private]
 

Definition at line 104 of file scanner.h.

Referenced by getTokenType(), and nextToken().


The documentation for this class was generated from the following files:
Generated on Sat Jan 14 22:33:45 2006 for Celestia by  doxygen 1.4.1