Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | File Members

Tokenizer Class Reference

#include <tokenizer.h>

List of all members.

Public Types

enum  TokenType {
  TokenName = 0, TokenString = 1, TokenNumber = 2, TokenBegin = 3,
  TokenEnd = 4, TokenNull = 5, TokenBeginGroup = 6, TokenEndGroup = 7,
  TokenBeginArray = 8, TokenEndArray = 9, TokenEquals = 10, TokenError = 11,
  TokenBar = 12
}

Public Member Functions

int getLineNumber () const
string getNameValue ()
double getNumberValue ()
string getStringValue ()
TokenType getTokenType ()
TokenType nextToken ()
void pushBack ()
 Tokenizer (istream *)

Private Types

enum  State {
  StartState = 0, NameState = 1, NumberState = 2, FractionState = 3,
  ExponentState = 4, ExponentFirstState = 5, DotState = 6, CommentState = 7,
  StringState = 8, ErrorState = 9, StringEscapeState = 10, UnicodeEscapeState = 11
}

Private Member Functions

int readChar ()
void syntaxError (const char *)

Private Attributes

bool haveValidName
bool haveValidNumber
bool haveValidString
istream * in
int lineNum
int nextChar
double numberValue
bool pushedBack
string textToken
TokenType tokenType
unsigned int unicodeEscapeDigits
unsigned int unicodeValue


Member Enumeration Documentation

enum Tokenizer::State [private]
 

Enumeration values:
StartState 
NameState 
NumberState 
FractionState 
ExponentState 
ExponentFirstState 
DotState 
CommentState 
StringState 
ErrorState 
StringEscapeState 
UnicodeEscapeState 

Definition at line 51 of file tokenizer.h.

00052     {
00053         StartState          = 0,
00054         NameState           = 1,
00055         NumberState         = 2,
00056         FractionState       = 3,
00057         ExponentState       = 4,
00058         ExponentFirstState  = 5,
00059         DotState            = 6,
00060         CommentState        = 7,
00061         StringState         = 8,
00062         ErrorState          = 9,
00063         StringEscapeState   = 10,
00064         UnicodeEscapeState  = 11,
00065     };

enum Tokenizer::TokenType
 

Enumeration values:
TokenName 
TokenString 
TokenNumber 
TokenBegin 
TokenEnd 
TokenNull 
TokenBeginGroup 
TokenEndGroup 
TokenBeginArray 
TokenEndArray 
TokenEquals 
TokenError 
TokenBar 

Definition at line 22 of file tokenizer.h.

00023     {
00024         TokenName           = 0,
00025         TokenString         = 1,
00026         TokenNumber         = 2,
00027         TokenBegin          = 3,
00028         TokenEnd            = 4,
00029         TokenNull           = 5,
00030         TokenBeginGroup     = 6,
00031         TokenEndGroup       = 7,
00032         TokenBeginArray     = 8,
00033         TokenEndArray       = 9,
00034         TokenEquals         = 10,
00035         TokenError          = 11,
00036         TokenBar            = 12,
00037     };


Constructor & Destructor Documentation

Tokenizer::Tokenizer istream *   ) 
 

Definition at line 23 of file tokenizer.cpp.

00023                                  :
00024     in(_in),
00025     tokenType(TokenBegin),
00026     haveValidNumber(false),
00027     haveValidName(false),
00028     haveValidString(false),
00029     pushedBack(false),
00030     lineNum(1)
00031 {
00032 }


Member Function Documentation

int Tokenizer::getLineNumber  )  const
 

Definition at line 402 of file tokenizer.cpp.

References lineNum.

Referenced by ReadCelestiaConfig(), and AsciiModelLoader::reportError().

00403 {
00404     return lineNum;
00405 }

string Tokenizer::getNameValue  ) 
 

Definition at line 375 of file tokenizer.cpp.

References textToken.

Referenced by StarDatabase::load(), AsciiModelLoader::load(), DSODatabase::load(), AsciiModelLoader::loadMaterial(), AsciiModelLoader::loadMesh(), LoadSolarSystemObjects(), AsciiModelLoader::loadVertexDescription(), AsciiModelLoader::loadVertices(), LoadVirtualTexture(), parseLabelFlags(), parseOrbitFlags(), parseRenderFlags(), Parser::readHash(), and Parser::readValue().

00376 {
00377     return textToken;
00378 }

double Tokenizer::getNumberValue  ) 
 

Definition at line 369 of file tokenizer.cpp.

References numberValue.

Referenced by StarDatabase::load(), DSODatabase::load(), AsciiModelLoader::loadMaterial(), AsciiModelLoader::loadMesh(), AsciiModelLoader::loadVertices(), and Parser::readValue().

00370 {
00371     return numberValue;
00372 }

string Tokenizer::getStringValue  ) 
 

Definition at line 381 of file tokenizer.cpp.

References textToken.

Referenced by StarDatabase::load(), DSODatabase::load(), LoadCelestiaMesh(), AsciiModelLoader::loadMaterial(), LoadSolarSystemObjects(), CommandParser::parseCommand(), ReadAsterismList(), ReadCelestiaConfig(), ReadFavoritesList(), and Parser::readValue().

00382 {
00383     return textToken;
00384 }

Tokenizer::TokenType Tokenizer::getTokenType  ) 
 

Definition at line 357 of file tokenizer.cpp.

References tokenType.

Referenced by StarDatabase::load(), DSODatabase::load(), AsciiModelLoader::loadMaterial(), LoadSolarSystemObjects(), AsciiModelLoader::loadVertexDescription(), ReadAsterismList(), ReadDestinationList(), and ReadFavoritesList().

00358 {
00359     return tokenType;
00360 }

Tokenizer::TokenType Tokenizer::nextToken  ) 
 

Definition at line 35 of file tokenizer.cpp.

References CommentState, DotState, ErrorState, ExponentFirstState, ExponentState, FractionState, haveValidName, haveValidNumber, haveValidString, in, issep(), NameState, nextChar, NumberState, numberValue, pow(), pushedBack, readChar(), sign(), StartState, StringEscapeState, StringState, syntaxError(), textToken, TokenBar, TokenBegin, TokenBeginArray, TokenBeginGroup, TokenEnd, TokenEndArray, TokenEndGroup, TokenEquals, TokenError, TokenName, TokenNumber, TokenString, tokenType, unicodeEscapeDigits, UnicodeEscapeState, unicodeValue, and UTF8Encode().

Referenced by StarDatabase::load(), AsciiModelLoader::load(), DSODatabase::load(), LoadCelestiaMesh(), AsciiModelLoader::loadMaterial(), AsciiModelLoader::loadMesh(), LoadSolarSystemObjects(), AsciiModelLoader::loadVertexDescription(), AsciiModelLoader::loadVertices(), LoadVirtualTexture(), CommandParser::parse(), CommandParser::parseCommand(), parseLabelFlags(), parseOrbitFlags(), parseRenderFlags(), Parser::readArray(), ReadAsterismList(), ReadCelestiaConfig(), ReadDestinationList(), ReadFavoritesList(), Parser::readHash(), and Parser::readValue().

00036 {
00037     State state = StartState;
00038 
00039     if (pushedBack)
00040     {
00041         pushedBack = false;
00042         return tokenType;
00043     }
00044 
00045     textToken = "";
00046     haveValidNumber = false;
00047     haveValidName = false;
00048     haveValidString = false;
00049 
00050     if (tokenType == TokenBegin)
00051     {
00052         nextChar = readChar();
00053         if (in->eof())
00054             return TokenEnd;
00055     }
00056     else if (tokenType == TokenEnd)
00057     {
00058         return tokenType;
00059     }
00060 
00061     double integerValue = 0;
00062     double fractionValue = 0;
00063     double sign = 1;
00064     double fracExp = 1;
00065     double exponentValue = 0;
00066     double exponentSign = 1;
00067 
00068     TokenType newToken = TokenBegin;
00069     while (newToken == TokenBegin)
00070     {
00071         switch (state)
00072         {
00073         case StartState:
00074             if (isspace(nextChar))
00075             {
00076                 state = StartState;
00077             }
00078             else if (isdigit(nextChar))
00079             {
00080                 state = NumberState;
00081                 integerValue = (int) nextChar - (int) '0';
00082             }
00083             else if (nextChar == '-')
00084             {
00085                 state = NumberState;
00086                 sign = -1;
00087                 integerValue = 0;
00088             }
00089             else if (isalpha(nextChar) || nextChar == '_')
00090             {
00091                 state = NameState;
00092                 textToken += (char) nextChar;
00093             }
00094             else if (nextChar == '#')
00095             {
00096                 state = CommentState;
00097             }
00098             else if (nextChar == '"')
00099             {
00100                 state = StringState;
00101             }
00102             else if (nextChar == '{')
00103             {
00104                 newToken = TokenBeginGroup;
00105                 nextChar = readChar();
00106             }
00107             else if (nextChar == '}')
00108             {
00109                 newToken = TokenEndGroup;
00110                 nextChar = readChar();
00111             }
00112             else if (nextChar == '[')
00113             {
00114                 newToken = TokenBeginArray;
00115                 nextChar = readChar();
00116             }
00117             else if (nextChar == ']')
00118             {
00119                 newToken = TokenEndArray;
00120                 nextChar = readChar();
00121             }
00122             else if (nextChar == '=')
00123             {
00124                 newToken = TokenEquals;
00125                 nextChar = readChar();
00126             }
00127             else if (nextChar == '|')
00128             {
00129                 newToken = TokenBar;
00130                 nextChar = readChar();
00131             }
00132             else if (nextChar == -1)
00133             {
00134                 newToken = TokenEnd;
00135             }
00136             else
00137             {
00138                 newToken = TokenError;
00139                 syntaxError("Bad character in stream");
00140             }
00141             break;
00142 
00143         case NameState:
00144             if (isalpha(nextChar) || isdigit(nextChar) || nextChar == '_')
00145             {
00146                 state = NameState;
00147                 textToken += (char) nextChar;
00148             }
00149             else
00150             {
00151                 newToken = TokenName;
00152                 haveValidName = true;
00153             }
00154             break;
00155 
00156         case CommentState:
00157             if (nextChar == '\n' || nextChar == '\r')
00158                 state = StartState;
00159             break;
00160 
00161         case StringState:
00162             if (nextChar == '"')
00163             {
00164                 newToken = TokenString;
00165                 haveValidString = true;
00166                 nextChar = readChar();
00167             }
00168             else if (nextChar == '\\')
00169             {
00170                 state = StringEscapeState;
00171             }
00172             else
00173             {
00174                 state = StringState;
00175                 textToken += (char) nextChar;
00176             }
00177             break;
00178 
00179         case StringEscapeState:
00180             if (nextChar == '\\')
00181             {
00182                 textToken += '\\';
00183                 state = StringState;
00184             }
00185             else if (nextChar == 'n')
00186             {
00187                 textToken += '\n';
00188                 state = StringState;
00189             }
00190             else if (nextChar == '"')
00191             {
00192                 textToken += '"';
00193                 state = StringState;
00194             }
00195             else if (nextChar == 'u')
00196             {
00197                 unicodeValue = 0;
00198                 unicodeEscapeDigits = 0;
00199                 state = UnicodeEscapeState;
00200             }
00201             else
00202             {
00203                 newToken = TokenError;
00204                 syntaxError("Unknown escape code in string");
00205                 state = StringState;
00206             }
00207             break;
00208 
00209         case NumberState:
00210             if (isdigit(nextChar))
00211             {
00212                 state = NumberState;
00213                 integerValue = integerValue * 10 + (int) nextChar - (int) '0';
00214             }
00215             else if (nextChar == '.')
00216             {
00217                 state = FractionState;
00218             }
00219             else if (nextChar == 'e' || nextChar == 'E')
00220             {
00221                 state = ExponentFirstState;
00222             }
00223             else if (issep(nextChar))
00224             {
00225                 newToken = TokenNumber;
00226                 haveValidNumber = true;
00227             }
00228             else
00229             {
00230                 newToken = TokenError;
00231                 syntaxError("Bad character in number");
00232             }
00233             break;
00234 
00235         case FractionState:
00236             if (isdigit(nextChar))
00237             {
00238                 state = FractionState;
00239                 fractionValue = fractionValue * 10 + nextChar - (int) '0';
00240                 fracExp *= 10;
00241             } 
00242             else if (nextChar == 'e' || nextChar == 'E')
00243             {
00244                 state = ExponentFirstState;
00245             }
00246             else if (issep(nextChar))
00247             {
00248                 newToken = TokenNumber;
00249                 haveValidNumber = true;
00250             } else {
00251                 newToken = TokenError;
00252                 syntaxError("Bad character in number");
00253             }
00254             break;
00255 
00256         case ExponentFirstState:
00257             if (isdigit(nextChar))
00258             {
00259                 state = ExponentState;
00260                 exponentValue = (int) nextChar - (int) '0';
00261             }
00262             else if (nextChar == '-')
00263             {
00264                 state = ExponentState;
00265                 exponentSign = -1;
00266             }
00267             else if (nextChar == '+')
00268             {
00269                 state = ExponentState;
00270             }
00271             else
00272             {
00273                 state = ErrorState;
00274                 syntaxError("Bad character in number");
00275             }
00276             break;
00277 
00278         case ExponentState:
00279             if (isdigit(nextChar))
00280             {
00281                 state = ExponentState;
00282                 exponentValue = exponentValue * 10 + (int) nextChar - (int) '0';
00283             }
00284             else if (issep(nextChar))
00285             {
00286                 newToken = TokenNumber;
00287                 haveValidNumber = true;
00288             }
00289             else
00290             {
00291                 state = ErrorState;
00292                 syntaxError("Bad character in number");
00293             }
00294             break;
00295 
00296         case DotState:
00297             if (isdigit(nextChar))
00298             {
00299                 state = FractionState;
00300                 fractionValue = fractionValue * 10 + (int) nextChar - (int) '0';
00301                 fracExp = 10;
00302             }
00303             else
00304             {
00305                 state = ErrorState;
00306                 syntaxError("'.' in stupid place");
00307             }
00308             break;
00309 
00310         case UnicodeEscapeState:
00311             if (isxdigit(nextChar))
00312             {
00313                 unsigned int digitValue;
00314                 if (nextChar >= 'a' && nextChar <= 'f')
00315                     digitValue = nextChar - 'a' + 10;
00316                 else if (nextChar >= 'A' && nextChar <= 'F')
00317                     digitValue = nextChar - 'A' + 10;
00318                 else
00319                     digitValue = nextChar - '0';
00320                 unicodeValue = (unicodeValue << 4) + digitValue;
00321                 unicodeEscapeDigits++;
00322                 if (unicodeEscapeDigits == 4)
00323                 {
00324                     char utf8Encoded[7];
00325                     UTF8Encode((wchar_t) unicodeValue, utf8Encoded);
00326                     textToken += utf8Encoded;
00327                     state = StringState;
00328                 }
00329             }
00330             else
00331             {
00332                 state = ErrorState;
00333                 syntaxError("Bad Unicode escape in string");
00334             }
00335             break;
00336         }
00337 
00338         if (newToken == TokenBegin)
00339         {
00340             nextChar = readChar();
00341         }
00342     }
00343 
00344     tokenType = newToken;
00345     if (haveValidNumber)
00346     {
00347         numberValue = integerValue + fractionValue / fracExp;
00348         if (exponentValue != 0)
00349             numberValue *= pow(10.0, exponentValue * exponentSign);
00350         numberValue *= sign;
00351     }
00352 
00353     return tokenType;
00354 }

void Tokenizer::pushBack  ) 
 

Definition at line 363 of file tokenizer.cpp.

References pushedBack.

Referenced by StarDatabase::load(), AsciiModelLoader::load(), CommandParser::parse(), Parser::readArray(), ReadDestinationList(), Parser::readHash(), and Parser::readValue().

00364 {
00365     pushedBack = true;
00366 }

int Tokenizer::readChar  )  [private]
 

Definition at line 387 of file tokenizer.cpp.

References in, and lineNum.

Referenced by nextToken().

00388 {
00389     int c = (int) in->get();
00390     if (c == '\n')
00391         lineNum++;
00392 
00393     return c;
00394 }

void Tokenizer::syntaxError const char *   )  [private]
 

Definition at line 396 of file tokenizer.cpp.

Referenced by nextToken().

00397 {
00398     cerr << message << '\n';
00399 }


Member Data Documentation

bool Tokenizer::haveValidName [private]
 

Definition at line 72 of file tokenizer.h.

Referenced by nextToken().

bool Tokenizer::haveValidNumber [private]
 

Definition at line 71 of file tokenizer.h.

Referenced by nextToken().

bool Tokenizer::haveValidString [private]
 

Definition at line 73 of file tokenizer.h.

Referenced by nextToken().

istream* Tokenizer::in [private]
 

Definition at line 67 of file tokenizer.h.

Referenced by nextToken(), and readChar().

int Tokenizer::lineNum [private]
 

Definition at line 87 of file tokenizer.h.

Referenced by getLineNumber(), and readChar().

int Tokenizer::nextChar [private]
 

Definition at line 69 of file tokenizer.h.

Referenced by nextToken().

double Tokenizer::numberValue [private]
 

Definition at line 83 of file tokenizer.h.

Referenced by getNumberValue(), and nextToken().

bool Tokenizer::pushedBack [private]
 

Definition at line 78 of file tokenizer.h.

Referenced by nextToken(), and pushBack().

string Tokenizer::textToken [private]
 

Definition at line 85 of file tokenizer.h.

Referenced by getNameValue(), getStringValue(), and nextToken().

TokenType Tokenizer::tokenType [private]
 

Definition at line 70 of file tokenizer.h.

Referenced by getTokenType(), and nextToken().

unsigned int Tokenizer::unicodeEscapeDigits [private]
 

Definition at line 76 of file tokenizer.h.

Referenced by nextToken().

unsigned int Tokenizer::unicodeValue [private]
 

Definition at line 75 of file tokenizer.h.

Referenced by nextToken().


The documentation for this class was generated from the following files:
Generated on Sat Jan 14 22:33:39 2006 for Celestia by  doxygen 1.4.1