mirror of https://github.com/x64dbg/btparser
				
				
				
			refactor (moved Lexer to separate file)
This commit is contained in:
		
							parent
							
								
									8365d3444b
								
							
						
					
					
						commit
						547b65b284
					
				|  | @ -20,6 +20,7 @@ | |||
|   </ItemGroup> | ||||
|   <ItemGroup> | ||||
|     <ClCompile Include="filehelper.cpp" /> | ||||
|     <ClCompile Include="lexer.cpp" /> | ||||
|     <ClCompile Include="main.cpp" /> | ||||
|     <ClCompile Include="stringutils.cpp" /> | ||||
|   </ItemGroup> | ||||
|  | @ -28,6 +29,7 @@ | |||
|     <ClInclude Include="filehelper.h" /> | ||||
|     <ClInclude Include="handle.h" /> | ||||
|     <ClInclude Include="keywords.h" /> | ||||
|     <ClInclude Include="lexer.h" /> | ||||
|     <ClInclude Include="operators.h" /> | ||||
|     <ClInclude Include="stringutils.h" /> | ||||
|     <ClInclude Include="testfiles.h" /> | ||||
|  |  | |||
|  | @ -24,6 +24,9 @@ | |||
|     <ClCompile Include="stringutils.cpp"> | ||||
|       <Filter>Source Files</Filter> | ||||
|     </ClCompile> | ||||
|     <ClCompile Include="lexer.cpp"> | ||||
|       <Filter>Source Files</Filter> | ||||
|     </ClCompile> | ||||
|   </ItemGroup> | ||||
|   <ItemGroup> | ||||
|     <ClInclude Include="filehelper.h"> | ||||
|  | @ -47,5 +50,8 @@ | |||
|     <ClInclude Include="operators.h"> | ||||
|       <Filter>Header Files</Filter> | ||||
|     </ClInclude> | ||||
|     <ClInclude Include="lexer.h"> | ||||
|       <Filter>Header Files</Filter> | ||||
|     </ClInclude> | ||||
|   </ItemGroup> | ||||
| </Project> | ||||
|  | @ -0,0 +1,499 @@ | |||
| #include "lexer.h" | ||||
| #include "stringutils.h" | ||||
| #include <cctype> | ||||
| #include "filehelper.h" | ||||
| 
 | ||||
| #define MAKE_OP_TRIPLE(ch1, ch2, ch3) (ch3 << 16 | ch2 << 8 | ch1) | ||||
| #define MAKE_OP_DOUBLE(ch1, ch2) (ch2 << 8 | ch1) | ||||
| #define MAKE_OP_SINGLE(ch1) (ch1) | ||||
| #define DEFAULT_STRING_BUFFER 65536 | ||||
| 
 | ||||
| static void clearReserve(std::string & str, size_t reserve = DEFAULT_STRING_BUFFER) | ||||
| { | ||||
|     str.clear(); | ||||
|     str.reserve(reserve); | ||||
| } | ||||
| 
 | ||||
| static void appendCh(std::string & str, char ch) | ||||
| { | ||||
|     str.resize(str.size() + 1); | ||||
|     str[str.size() - 1] = ch; | ||||
| } | ||||
| 
 | ||||
| static const char* convertNumber(const char* str, uint64_t & result, int radix) | ||||
| { | ||||
|     errno = 0; | ||||
|     char* end; | ||||
|     result = strtoull(str, &end, radix); | ||||
|     if (!result && end == str) | ||||
|         return "not a number"; | ||||
|     if (result == ULLONG_MAX && errno) | ||||
|         return "does not fit"; | ||||
|     if (*end) | ||||
|         return "str not completely consumed"; | ||||
|     return nullptr; | ||||
| } | ||||
| 
 | ||||
| Lexer::Lexer() | ||||
| { | ||||
|     setupTokenMaps(); | ||||
| } | ||||
| 
 | ||||
| bool Lexer::ReadInputFile(const std::string & filename) | ||||
| { | ||||
|     resetLexerState(); | ||||
|     return FileHelper::ReadAllData(filename, mInput); | ||||
| } | ||||
| 
 | ||||
| bool Lexer::DoLexing(std::vector<TokenState> & tokens, std::string & error) | ||||
| { | ||||
|     while (true) | ||||
|     { | ||||
|         auto token = getToken(); | ||||
|         mState.Token = token; | ||||
|         if (token == tok_eof) | ||||
|             break; | ||||
|         if (token == tok_error) | ||||
|         { | ||||
|             error = StringUtils::sprintf("line %d, col %d: %s", mState.CurLine + 1, mState.LineIndex, mError.c_str()); | ||||
|             return false; | ||||
|         } | ||||
|         tokens.push_back(mState); | ||||
|     } | ||||
|     return true; | ||||
| } | ||||
| 
 | ||||
| bool Lexer::Test(const std::function<void(const std::string & line)> & lexEnum, bool output) | ||||
| { | ||||
|     size_t line = 0; | ||||
|     if (output) | ||||
|         lexEnum("1: "); | ||||
|     Token tok; | ||||
|     std::string toks; | ||||
|     clearReserve(toks); | ||||
|     char newlineText[128] = ""; | ||||
|     do | ||||
|     { | ||||
|         tok = getToken(); | ||||
|         if (!output) | ||||
|             continue; | ||||
|         toks.clear(); | ||||
|         while (line < mState.CurLine) | ||||
|         { | ||||
|             line++; | ||||
|             sprintf_s(newlineText, "\n%d: ", line + 1); | ||||
|             toks.append(newlineText); | ||||
|         } | ||||
|         toks.append(tokString(tok)); | ||||
|         appendCh(toks, ' '); | ||||
|         lexEnum(toks); | ||||
|     } while (tok != tok_eof && tok != tok_error); | ||||
|     if (tok != tok_error && tok != tok_eof) | ||||
|         tok = reportError("lexer did not finish at the end of the file"); | ||||
|     for (const auto& warning : mWarnings) | ||||
|         if (output) | ||||
|             lexEnum("\nwarning: " + warning); | ||||
|     return tok != tok_error; | ||||
| } | ||||
| 
 | ||||
| Lexer::Token Lexer::getToken() | ||||
| { | ||||
|     //skip whitespace
 | ||||
|     while (isspace(mLastChar)) | ||||
|     { | ||||
|         if (mLastChar == '\n') | ||||
|             signalNewLine(); | ||||
|         nextChar(); | ||||
|     } | ||||
| 
 | ||||
|     //skip \\[\r\n]
 | ||||
|     if (mLastChar == '\\' && (peekChar() == '\r' || peekChar() == '\n')) | ||||
|     { | ||||
|         nextChar(); | ||||
|         return getToken(); | ||||
|     } | ||||
| 
 | ||||
|     //character literal
 | ||||
|     if (mLastChar == '\'') | ||||
|     { | ||||
|         std::string charLit; | ||||
|         while (true) | ||||
|         { | ||||
|             nextChar(); | ||||
|             if (mLastChar == EOF) //end of file
 | ||||
|                 return reportError("unexpected end of file in character literal (1)"); | ||||
|             if (mLastChar == '\r' || mLastChar == '\n') | ||||
|                 return reportError("unexpected newline in character literal (1)"); | ||||
|             if (mLastChar == '\'') //end of character literal
 | ||||
|             { | ||||
|                 if (charLit.length() != 1) | ||||
|                     return reportError(StringUtils::sprintf("invalid character literal '%s'", charLit.c_str())); | ||||
|                 mState.CharLit = charLit[0]; | ||||
|                 nextChar(); | ||||
|                 return tok_charlit; | ||||
|             } | ||||
|             if (mLastChar == '\\') //escape sequence
 | ||||
|             { | ||||
|                 nextChar(); | ||||
|                 if (mLastChar == EOF) | ||||
|                     return reportError("unexpected end of file in character literal (2)"); | ||||
|                 if (mLastChar == '\r' || mLastChar == '\n') | ||||
|                     return reportError("unexpected newline in character literal (2)"); | ||||
|                 if (mLastChar == '\'' || mLastChar == '\"' || mLastChar == '?' || mLastChar == '\\') | ||||
|                     mLastChar = mLastChar; | ||||
|                 else if (mLastChar == 'a') | ||||
|                     mLastChar = '\a'; | ||||
|                 else if (mLastChar == 'b') | ||||
|                     mLastChar = '\b'; | ||||
|                 else if (mLastChar == 'f') | ||||
|                     mLastChar = '\f'; | ||||
|                 else if (mLastChar == 'n') | ||||
|                     mLastChar = '\n'; | ||||
|                 else if (mLastChar == 'r') | ||||
|                     mLastChar = '\r'; | ||||
|                 else if (mLastChar == 't') | ||||
|                     mLastChar = '\t'; | ||||
|                 else if (mLastChar == 'v') | ||||
|                     mLastChar = '\v'; | ||||
|                 else if (mLastChar == '0') | ||||
|                     mLastChar = '\0'; | ||||
|                 else if (mLastChar == 'x') //\xHH
 | ||||
|                 { | ||||
|                     auto ch1 = nextChar(); | ||||
|                     auto ch2 = nextChar(); | ||||
|                     if (isxdigit(ch1) && isxdigit(ch2)) | ||||
|                     { | ||||
|                         char byteStr[3] = ""; | ||||
|                         byteStr[0] = ch1; | ||||
|                         byteStr[1] = ch2; | ||||
|                         uint64_t hexData; | ||||
|                         auto error = convertNumber(byteStr, hexData, 16); | ||||
|                         if (error) | ||||
|                             return reportError(StringUtils::sprintf("convertNumber failed (%s) for hex sequence \"\\x%c%c\" in character literal", error, ch1, ch2)); | ||||
|                         mLastChar = hexData & 0xFF; | ||||
|                     } | ||||
|                     else | ||||
|                         return reportError(StringUtils::sprintf("invalid hex sequence \"\\x%c%c\" in character literal", ch1, ch2)); | ||||
|                 } | ||||
|                 else | ||||
|                     return reportError(StringUtils::sprintf("invalid escape sequence \"\\%c\" in character literal", mLastChar)); | ||||
|             } | ||||
|             charLit += mLastChar; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     //string literal
 | ||||
|     if (mLastChar == '\"') | ||||
|     { | ||||
|         mState.StringLit.clear(); | ||||
|         while (true) | ||||
|         { | ||||
|             nextChar(); | ||||
|             if (mLastChar == EOF) //end of file
 | ||||
|                 return reportError("unexpected end of file in string literal (1)"); | ||||
|             if (mLastChar == '\r' || mLastChar == '\n') | ||||
|                 return reportError("unexpected newline in string literal (1)"); | ||||
|             if (mLastChar == '\"') //end of string literal
 | ||||
|             { | ||||
|                 nextChar(); | ||||
|                 return tok_stringlit; | ||||
|             } | ||||
|             if (mLastChar == '\\') //escape sequence
 | ||||
|             { | ||||
|                 nextChar(); | ||||
|                 if (mLastChar == EOF) | ||||
|                     return reportError("unexpected end of file in string literal (2)"); | ||||
|                 if (mLastChar == '\r' || mLastChar == '\n') | ||||
|                     return reportError("unexpected newline in string literal (2)"); | ||||
|                 if (mLastChar == '\'' || mLastChar == '\"' || mLastChar == '?' || mLastChar == '\\') | ||||
|                     mLastChar = mLastChar; | ||||
|                 else if (mLastChar == 'a') | ||||
|                     mLastChar = '\a'; | ||||
|                 else if (mLastChar == 'b') | ||||
|                     mLastChar = '\b'; | ||||
|                 else if (mLastChar == 'f') | ||||
|                     mLastChar = '\f'; | ||||
|                 else if (mLastChar == 'n') | ||||
|                     mLastChar = '\n'; | ||||
|                 else if (mLastChar == 'r') | ||||
|                     mLastChar = '\r'; | ||||
|                 else if (mLastChar == 't') | ||||
|                     mLastChar = '\t'; | ||||
|                 else if (mLastChar == 'v') | ||||
|                     mLastChar = '\v'; | ||||
|                 else if (mLastChar == '0') | ||||
|                     mLastChar = '\0'; | ||||
|                 else if (mLastChar == 'x') //\xHH
 | ||||
|                 { | ||||
|                     auto ch1 = nextChar(); | ||||
|                     auto ch2 = nextChar(); | ||||
|                     if (isxdigit(ch1) && isxdigit(ch2)) | ||||
|                     { | ||||
|                         char byteStr[3] = ""; | ||||
|                         byteStr[0] = ch1; | ||||
|                         byteStr[1] = ch2; | ||||
|                         uint64_t hexData; | ||||
|                         auto error = convertNumber(byteStr, hexData, 16); | ||||
|                         if (error) | ||||
|                             return reportError(StringUtils::sprintf("convertNumber failed (%s) for hex sequence \"\\x%c%c\" in string literal", error, ch1, ch2)); | ||||
|                         mLastChar = hexData & 0xFF; | ||||
|                     } | ||||
|                     else | ||||
|                         return reportError(StringUtils::sprintf("invalid hex sequence \"\\x%c%c\" in string literal", ch1, ch2)); | ||||
|                 } | ||||
|                 else | ||||
|                     return reportError(StringUtils::sprintf("invalid escape sequence \"\\%c\" in string literal", mLastChar)); | ||||
|             } | ||||
|             appendCh(mState.StringLit, mLastChar); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     //identifier/keyword
 | ||||
|     if (isalpha(mLastChar) || mLastChar == '_') //[a-zA-Z_]
 | ||||
|     { | ||||
|         mState.IdentifierStr = mLastChar; | ||||
|         nextChar(); | ||||
|         while (isalnum(mLastChar) || mLastChar == '_') //[0-9a-zA-Z_]
 | ||||
|         { | ||||
|             appendCh(mState.IdentifierStr, mLastChar); | ||||
|             nextChar(); | ||||
|         } | ||||
| 
 | ||||
|         //keywords
 | ||||
|         auto found = mKeywordMap.find(mState.IdentifierStr); | ||||
|         if (found != mKeywordMap.end()) | ||||
|             return found->second; | ||||
| 
 | ||||
|         return tok_identifier; | ||||
|     } | ||||
| 
 | ||||
|     //hex numbers
 | ||||
|     if (mLastChar == '0' && peekChar() == 'x') //0x
 | ||||
|     { | ||||
|         nextChar(); //consume the 'x'
 | ||||
|         mNumStr.clear(); | ||||
| 
 | ||||
|         while (isxdigit(nextChar())) //[0-9a-fA-F]*
 | ||||
|             appendCh(mNumStr, mLastChar); | ||||
| 
 | ||||
|         if (!mNumStr.length()) //check for error condition
 | ||||
|             return reportError("no hex digits after \"0x\" prefix"); | ||||
| 
 | ||||
|         auto error = convertNumber(mNumStr.c_str(), mState.NumberVal, 16); | ||||
|         if (error) | ||||
|             return reportError(StringUtils::sprintf("convertNumber failed (%s) on hexadecimal number", error)); | ||||
|         mIsHexNumberVal = true; | ||||
|         return tok_number; | ||||
|     } | ||||
|     if (isdigit(mLastChar)) //[0-9]
 | ||||
|     { | ||||
|         mNumStr = mLastChar; | ||||
| 
 | ||||
|         while (isdigit(nextChar())) //[0-9]*
 | ||||
|             mNumStr += mLastChar; | ||||
| 
 | ||||
|         auto error = convertNumber(mNumStr.c_str(), mState.NumberVal, 10); | ||||
|         if (error) | ||||
|             return reportError(StringUtils::sprintf("convertNumber failed (%s) on decimal number", error)); | ||||
|         mIsHexNumberVal = false; | ||||
|         return tok_number; | ||||
|     } | ||||
| 
 | ||||
|     //comments
 | ||||
|     if (mLastChar == '/' && peekChar() == '/') //line comment
 | ||||
|     { | ||||
|         do | ||||
|         { | ||||
|             if (mLastChar == '\n') | ||||
|                 signalNewLine(); | ||||
|             nextChar(); | ||||
|         } while (!(mLastChar == EOF || mLastChar == '\n')); | ||||
| 
 | ||||
|         return getToken(); //interpret the next line
 | ||||
|     } | ||||
|     if (mLastChar == '/' && peekChar() == '*') //block comment
 | ||||
|     { | ||||
|         do | ||||
|         { | ||||
|             if (mLastChar == '\n') | ||||
|                 signalNewLine(); | ||||
|             nextChar(); | ||||
|         } while (!(mLastChar == EOF || mLastChar == '*' && peekChar() == '/')); | ||||
| 
 | ||||
|         if (mLastChar == EOF) //unexpected end of file
 | ||||
|         { | ||||
|             mState.LineIndex++; | ||||
|             return reportError("unexpected end of file in block comment"); | ||||
|         } | ||||
| 
 | ||||
|         nextChar(); | ||||
|         nextChar(); | ||||
|         return getToken(); //get the next non-comment token
 | ||||
|     } | ||||
| 
 | ||||
|     //operators
 | ||||
|     auto opFound = mOpTripleMap.find(MAKE_OP_TRIPLE(mLastChar, peekChar(), peekChar(1))); | ||||
|     if (opFound != mOpTripleMap.end()) | ||||
|     { | ||||
|         nextChar(); | ||||
|         nextChar(); | ||||
|         nextChar(); | ||||
|         return opFound->second; | ||||
|     } | ||||
|     opFound = mOpDoubleMap.find(MAKE_OP_DOUBLE(mLastChar, peekChar())); | ||||
|     if (opFound != mOpDoubleMap.end()) | ||||
|     { | ||||
|         nextChar(); | ||||
|         nextChar(); | ||||
|         return opFound->second; | ||||
|     } | ||||
|     opFound = mOpSingleMap.find(MAKE_OP_SINGLE(mLastChar)); | ||||
|     if (opFound != mOpSingleMap.end()) | ||||
|     { | ||||
|         nextChar(); | ||||
|         return opFound->second; | ||||
|     } | ||||
| 
 | ||||
|     //end of file
 | ||||
|     if (mLastChar == EOF) | ||||
|         return tok_eof; | ||||
| 
 | ||||
|     //unknown character
 | ||||
|     return reportError(StringUtils::sprintf("unexpected character \'%c\'", mLastChar)); | ||||
| } | ||||
| 
 | ||||
| Lexer::Token Lexer::reportError(const std::string & error) | ||||
| { | ||||
|     mError = error; | ||||
|     return tok_error; | ||||
| } | ||||
| 
 | ||||
| int Lexer::nextChar() | ||||
| { | ||||
|     return mLastChar = readChar(); | ||||
| } | ||||
| 
 | ||||
| void Lexer::reportWarning(const std::string & warning) | ||||
| { | ||||
|     mWarnings.push_back(warning); | ||||
| } | ||||
| 
 | ||||
| void Lexer::resetLexerState() | ||||
| { | ||||
|     mInput.clear(); | ||||
|     mInput.reserve(1024 * 1024); | ||||
|     mIndex = 0; | ||||
|     mError.clear(); | ||||
|     mWarnings.clear(); | ||||
|     clearReserve(mState.IdentifierStr); | ||||
|     mState.NumberVal = 0; | ||||
|     mIsHexNumberVal = false; | ||||
|     clearReserve(mState.StringLit); | ||||
|     clearReserve(mNumStr, 16); | ||||
|     mState.CharLit = '\0'; | ||||
|     mLastChar = ' '; | ||||
|     mState.CurLine = 0; | ||||
|     mState.LineIndex = 0; | ||||
| } | ||||
| 
 | ||||
| void Lexer::setupTokenMaps() | ||||
| { | ||||
|     //setup keyword map
 | ||||
| #define DEF_KEYWORD(keyword) mKeywordMap[#keyword] = tok_##keyword; | ||||
| #include "keywords.h" | ||||
| #undef DEF_KEYWORD | ||||
| 
 | ||||
|     //setup token maps
 | ||||
| #define DEF_OP_TRIPLE(enumval, ch1, ch2, ch3) mOpTripleMap[MAKE_OP_TRIPLE(ch1, ch2, ch3)] = tok_##enumval; | ||||
| #define DEF_OP_DOUBLE(enumval, ch1, ch2) mOpDoubleMap[MAKE_OP_DOUBLE(ch1, ch2)] = tok_##enumval; | ||||
| #define DEF_OP_SINGLE(enumval, ch1) mOpSingleMap[MAKE_OP_SINGLE(ch1)] = tok_##enumval; | ||||
| #include "operators.h" | ||||
| #undef DEF_OP_TRIPLE | ||||
| #undef DEF_OP_DOUBLE | ||||
| #undef DEF_OP_SINGLE | ||||
| 
 | ||||
|     //setup reverse token maps
 | ||||
| #define DEF_KEYWORD(keyword) mReverseTokenMap[tok_##keyword] = #keyword; | ||||
| #include "keywords.h" | ||||
| #undef DEF_KEYWORD | ||||
| 
 | ||||
| #define DEF_OP_TRIPLE(enumval, ch1, ch2, ch3) mReverseTokenMap[tok_##enumval] = std::string({ch1, ch2, ch3}); | ||||
| #define DEF_OP_DOUBLE(enumval, ch1, ch2) mReverseTokenMap[tok_##enumval] = std::string({ch1, ch2}); | ||||
| #define DEF_OP_SINGLE(enumval, ch1) mReverseTokenMap[tok_##enumval] = std::string({ch1}); | ||||
| #include "operators.h" | ||||
| #undef DEF_OP_TRIPLE | ||||
| #undef DEF_OP_DOUBLE | ||||
| #undef DEF_OP_SINGLE | ||||
| } | ||||
| 
 | ||||
| std::string Lexer::tokString(Token tok) | ||||
| { | ||||
|     switch (tok) | ||||
|     { | ||||
|     case tok_eof: return "tok_eof"; | ||||
|     case tok_error: return StringUtils::sprintf("error(line %d, col %d, \"%s\")", mState.CurLine + 1, mState.LineIndex, mError.c_str()); | ||||
|     case tok_identifier: return mState.IdentifierStr; | ||||
|     case tok_number: return StringUtils::sprintf(mIsHexNumberVal ? "0x%llX" : "%llu", mState.NumberVal); | ||||
|     case tok_stringlit: return StringUtils::sprintf("\"%s\"", StringUtils::Escape(mState.StringLit).c_str()); | ||||
|     case tok_charlit: | ||||
|         { | ||||
|             std::string s; | ||||
|             s = mState.CharLit; | ||||
|             return StringUtils::sprintf("'%s'", StringUtils::Escape(s).c_str()); | ||||
|         } | ||||
|     default: | ||||
|         { | ||||
|             auto found = mReverseTokenMap.find(Token(tok)); | ||||
|             if (found != mReverseTokenMap.end()) | ||||
|                 return found->second; | ||||
|             return "<UNKNOWN TOKEN>"; | ||||
|         } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| int Lexer::peekChar(size_t distance) | ||||
| { | ||||
|     if (mIndex + distance >= mInput.size()) | ||||
|         return EOF; | ||||
|     auto ch = mInput[mIndex + distance]; | ||||
|     if (ch == '\0') | ||||
|     { | ||||
|         reportWarning(StringUtils::sprintf("\\0 character in file data")); | ||||
|         return peekChar(distance + 1); | ||||
|     } | ||||
|     return ch; | ||||
| } | ||||
| 
 | ||||
| int Lexer::readChar() | ||||
| { | ||||
|     if (mIndex == mInput.size()) | ||||
|         return EOF; | ||||
|     auto ch = mInput[mIndex++]; | ||||
|     mState.LineIndex++; | ||||
|     if (ch == '\0') | ||||
|     { | ||||
|         reportWarning(StringUtils::sprintf("\\0 character in file data")); | ||||
|         return readChar(); | ||||
|     } | ||||
|     return ch; | ||||
| } | ||||
| 
 | ||||
| bool Lexer::checkString(const std::string & expected) | ||||
| { | ||||
|     for (size_t i = 0; i < expected.size(); i++) | ||||
|     { | ||||
|         auto ch = peekChar(i); | ||||
|         if (ch == EOF) | ||||
|             return false; | ||||
|         if (ch != uint8_t(expected[i])) | ||||
|             return false; | ||||
|     } | ||||
|     mIndex += expected.size(); | ||||
|     return true; | ||||
| } | ||||
| 
 | ||||
| void Lexer::signalNewLine() | ||||
| { | ||||
|     mState.CurLine++; | ||||
|     mState.LineIndex = 0; | ||||
| } | ||||
|  | @ -0,0 +1,81 @@ | |||
| #pragma once | ||||
| 
 | ||||
| #include <cstdint> | ||||
| #include <vector> | ||||
| #include <unordered_map> | ||||
| #include <functional> | ||||
| 
 | ||||
| class Lexer | ||||
| { | ||||
| public: | ||||
|     enum Token | ||||
|     { | ||||
|         //status tokens
 | ||||
|         tok_eof = -10000, | ||||
|         tok_error, | ||||
| 
 | ||||
|         //keywords
 | ||||
| #define DEF_KEYWORD(keyword) tok_##keyword, | ||||
| #include "keywords.h" | ||||
| #undef DEF_KEYWORD | ||||
| 
 | ||||
|         //others
 | ||||
|         tok_identifier, //[a-zA-Z_][a-zA-Z0-9_]
 | ||||
|         tok_number, //(0x[0-9a-fA-F]+)|([0-9]+)
 | ||||
|         tok_stringlit, //"([^\\"]|\\([\\"'?abfnrtv0]|x[0-9a-fA-f]{2}))*"
 | ||||
|         tok_charlit, //'([^\\]|\\([\\"'?abfnrtv0]|x[0-9a-fA-f]{2}))'
 | ||||
| 
 | ||||
|         //operators
 | ||||
| #define DEF_OP_TRIPLE(enumval, ch1, ch2, ch3) tok_##enumval, | ||||
| #define DEF_OP_DOUBLE(enumval, ch1, ch2) tok_##enumval, | ||||
| #define DEF_OP_SINGLE(enumval, ch1) tok_##enumval, | ||||
| #include "operators.h" | ||||
| #undef DEF_OP_TRIPLE | ||||
| #undef DEF_OP_DOUBLE | ||||
| #undef DEF_OP_SINGLE | ||||
|     }; | ||||
| 
 | ||||
|     struct TokenState | ||||
|     { | ||||
|         Token Token; | ||||
|         std::string IdentifierStr; //tok_identifier
 | ||||
|         uint64_t NumberVal = 0; //tok_number
 | ||||
|         std::string StringLit; //tok_stringlit
 | ||||
|         char CharLit = '\0'; //tok_charlit
 | ||||
| 
 | ||||
|         size_t CurLine = 0; | ||||
|         size_t LineIndex = 0; | ||||
|     }; | ||||
| 
 | ||||
|     explicit Lexer(); | ||||
|     bool ReadInputFile(const std::string & filename); | ||||
|     bool DoLexing(std::vector<TokenState> & tokens, std::string & error); | ||||
|     bool Test(const std::function<void(const std::string & line)> & lexEnum, bool output = true); | ||||
| 
 | ||||
| private: | ||||
|     TokenState mState; | ||||
|     std::vector<std::string> mWarnings; | ||||
|     std::string mError; | ||||
|     std::vector<uint8_t> mInput; | ||||
|     size_t mIndex = 0; | ||||
|     bool mIsHexNumberVal = false; | ||||
|     std::string mNumStr; | ||||
|     int mLastChar = ' '; | ||||
|     std::unordered_map<std::string, Token> mKeywordMap; | ||||
|     std::unordered_map<Token, std::string> mReverseTokenMap; | ||||
|     std::unordered_map<int, Token> mOpTripleMap; | ||||
|     std::unordered_map<int, Token> mOpDoubleMap; | ||||
|     std::unordered_map<int, Token> mOpSingleMap; | ||||
| 
 | ||||
|     void resetLexerState(); | ||||
|     void setupTokenMaps(); | ||||
|     Token reportError(const std::string & error); | ||||
|     void reportWarning(const std::string & warning); | ||||
|     std::string tokString(Token tok); | ||||
|     int peekChar(size_t distance = 0); | ||||
|     int readChar(); | ||||
|     bool checkString(const std::string & expected); | ||||
|     int nextChar(); | ||||
|     void signalNewLine(); | ||||
|     Token getToken(); | ||||
| }; | ||||
|  | @ -1,567 +1,27 @@ | |||
| #include <windows.h> | ||||
| #include <stdio.h> | ||||
| #include <string> | ||||
| #include <stdint.h> | ||||
| #include <unordered_map> | ||||
| #include <functional> | ||||
| #include <vector> | ||||
| #include "filehelper.h" | ||||
| #include "stringutils.h" | ||||
| #include "testfiles.h" | ||||
| #include "lexer.h" | ||||
| #include "filehelper.h" | ||||
| 
 | ||||
| #define MAKE_OP_TRIPLE(ch1, ch2, ch3) (ch3 << 16 | ch2 << 8 | ch1) | ||||
| #define MAKE_OP_DOUBLE(ch1, ch2) (ch2 << 8 | ch1) | ||||
| #define MAKE_OP_SINGLE(ch1) (ch1) | ||||
| 
 | ||||
| #define DEFAULT_STRING_BUFFER 65536 | ||||
| 
 | ||||
| using namespace std; | ||||
| 
 | ||||
| struct Lexer | ||||
| { | ||||
|     explicit Lexer() | ||||
|     { | ||||
|         SetupTokenMaps(); | ||||
|     } | ||||
| 
 | ||||
|     enum Token | ||||
|     { | ||||
|         //status tokens
 | ||||
|         tok_eof = -10000, | ||||
|         tok_error, | ||||
| 
 | ||||
|         //keywords
 | ||||
| #define DEF_KEYWORD(keyword) tok_##keyword, | ||||
| #include "keywords.h" | ||||
| #undef DEF_KEYWORD | ||||
| 
 | ||||
|         //others
 | ||||
|         tok_identifier, //[a-zA-Z_][a-zA-Z0-9_]
 | ||||
|         tok_number, //(0x[0-9a-fA-F]+)|([0-9]+)
 | ||||
|         tok_stringlit, //"([^\\"]|\\([\\"'?abfnrtv0]|x[0-9a-fA-f]{2}))*"
 | ||||
|         tok_charlit, //'([^\\]|\\([\\"'?abfnrtv0]|x[0-9a-fA-f]{2}))'
 | ||||
| 
 | ||||
|         //operators
 | ||||
| #define DEF_OP_TRIPLE(enumval, ch1, ch2, ch3) tok_##enumval, | ||||
| #define DEF_OP_DOUBLE(enumval, ch1, ch2) tok_##enumval, | ||||
| #define DEF_OP_SINGLE(enumval, ch1) tok_##enumval, | ||||
| #include "operators.h" | ||||
| #undef DEF_OP_TRIPLE | ||||
| #undef DEF_OP_DOUBLE | ||||
| #undef DEF_OP_SINGLE | ||||
|     }; | ||||
| 
 | ||||
|     vector<uint8_t> Input; | ||||
|     size_t Index = 0; | ||||
|     string Error; | ||||
|     vector<String> Warnings; | ||||
| 
 | ||||
|     //lexer state
 | ||||
|     string IdentifierStr; | ||||
|     uint64_t NumberVal = 0; | ||||
|     bool IsHexNumberVal = false; | ||||
|     string StringLit; | ||||
|     string NumStr; | ||||
|     char CharLit = '\0'; | ||||
|     int LastChar = ' '; | ||||
|     size_t CurLine = 0; | ||||
|     size_t LineIndex = 0; | ||||
| 
 | ||||
|     static void clearReserve(string & str, size_t reserve = DEFAULT_STRING_BUFFER) | ||||
|     { | ||||
|         str.clear(); | ||||
|         str.reserve(reserve); | ||||
|     } | ||||
| 
 | ||||
|     static void appendCh(string & str, char ch) | ||||
|     { | ||||
|         str.resize(str.size() + 1); | ||||
|         str[str.size() - 1] = ch; | ||||
|     } | ||||
| 
 | ||||
|     void ResetLexerState() | ||||
|     { | ||||
|         Input.clear(); | ||||
|         Input.reserve(1024 * 1024); | ||||
|         Index = 0; | ||||
|         Error.clear(); | ||||
|         Warnings.clear(); | ||||
|         clearReserve(IdentifierStr); | ||||
|         NumberVal = 0; | ||||
|         IsHexNumberVal = false; | ||||
|         clearReserve(StringLit); | ||||
|         clearReserve(NumStr, 16); | ||||
|         CharLit = '\0'; | ||||
|         LastChar = ' '; | ||||
|         CurLine = 0; | ||||
|         LineIndex = 0; | ||||
|     } | ||||
| 
 | ||||
|     unordered_map<string, Token> KeywordMap; | ||||
|     unordered_map<Token, string> ReverseTokenMap; | ||||
|     unordered_map<int, Token> OpTripleMap; | ||||
|     unordered_map<int, Token> OpDoubleMap; | ||||
|     unordered_map<int, Token> OpSingleMap; | ||||
| 
 | ||||
|     void SetupTokenMaps() | ||||
|     { | ||||
|         //setup keyword map
 | ||||
| #define DEF_KEYWORD(keyword) KeywordMap[#keyword] = tok_##keyword; | ||||
| #include "keywords.h" | ||||
| #undef DEF_KEYWORD | ||||
| 
 | ||||
|         //setup token maps
 | ||||
| #define DEF_OP_TRIPLE(enumval, ch1, ch2, ch3) OpTripleMap[MAKE_OP_TRIPLE(ch1, ch2, ch3)] = tok_##enumval; | ||||
| #define DEF_OP_DOUBLE(enumval, ch1, ch2) OpDoubleMap[MAKE_OP_DOUBLE(ch1, ch2)] = tok_##enumval; | ||||
| #define DEF_OP_SINGLE(enumval, ch1) OpSingleMap[MAKE_OP_SINGLE(ch1)] = tok_##enumval; | ||||
| #include "operators.h" | ||||
| #undef DEF_OP_TRIPLE | ||||
| #undef DEF_OP_DOUBLE | ||||
| #undef DEF_OP_SINGLE | ||||
| 
 | ||||
|         //setup reverse token maps
 | ||||
| #define DEF_KEYWORD(keyword) ReverseTokenMap[tok_##keyword] = #keyword; | ||||
| #include "keywords.h" | ||||
| #undef DEF_KEYWORD | ||||
| 
 | ||||
| #define DEF_OP_TRIPLE(enumval, ch1, ch2, ch3) ReverseTokenMap[tok_##enumval] = string({ch1, ch2, ch3}); | ||||
| #define DEF_OP_DOUBLE(enumval, ch1, ch2) ReverseTokenMap[tok_##enumval] = string({ch1, ch2}); | ||||
| #define DEF_OP_SINGLE(enumval, ch1) ReverseTokenMap[tok_##enumval] = string({ch1}); | ||||
| #include "operators.h" | ||||
| #undef DEF_OP_TRIPLE | ||||
| #undef DEF_OP_DOUBLE | ||||
| #undef DEF_OP_SINGLE | ||||
|     } | ||||
| 
 | ||||
|     Token ReportError(const String & error) | ||||
|     { | ||||
|         Error = error; | ||||
|         return tok_error; | ||||
|     } | ||||
| 
 | ||||
|     void ReportWarning(const String & warning) | ||||
|     { | ||||
|         Warnings.push_back(warning); | ||||
|     } | ||||
| 
 | ||||
|     String TokString(int tok) | ||||
|     { | ||||
|         switch (Token(tok)) | ||||
|         { | ||||
|         case tok_eof: return "tok_eof"; | ||||
|         case tok_error: return StringUtils::sprintf("error(line %d, col %d, \"%s\")", CurLine + 1, LineIndex, Error.c_str()); | ||||
|         case tok_identifier: return IdentifierStr; | ||||
|         case tok_number: return StringUtils::sprintf(IsHexNumberVal ? "0x%llX" : "%llu", NumberVal); | ||||
|         case tok_stringlit: return StringUtils::sprintf("\"%s\"", StringUtils::Escape(StringLit).c_str()); | ||||
|         case tok_charlit: | ||||
|         { | ||||
|             String s; | ||||
|             s = CharLit; | ||||
|             return StringUtils::sprintf("'%s'", StringUtils::Escape(s).c_str()); | ||||
|         } | ||||
|         default: | ||||
|         { | ||||
|             auto found = ReverseTokenMap.find(Token(tok)); | ||||
|             if (found != ReverseTokenMap.end()) | ||||
|                 return found->second; | ||||
|             return "<INVALID TOKEN>"; | ||||
|         } | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     int PeekChar(size_t distance = 0) | ||||
|     { | ||||
|         if (Index + distance >= Input.size()) | ||||
|             return EOF; | ||||
|         auto ch = Input[Index + distance]; | ||||
|         if (ch == '\0') | ||||
|         { | ||||
|             ReportWarning(StringUtils::sprintf("\\0 character in file data")); | ||||
|             return PeekChar(distance + 1); | ||||
|         } | ||||
|         return ch; | ||||
|     } | ||||
| 
 | ||||
|     int ReadChar() | ||||
|     { | ||||
|         if (Index == Input.size()) | ||||
|             return EOF; | ||||
|         auto ch = Input[Index++]; | ||||
|         LineIndex++; | ||||
|         if (ch == '\0') | ||||
|         { | ||||
|             ReportWarning(StringUtils::sprintf("\\0 character in file data")); | ||||
|             return ReadChar(); | ||||
|         } | ||||
|         return ch; | ||||
|     } | ||||
| 
 | ||||
|     bool CheckString(const string & expected) | ||||
|     { | ||||
|         for (size_t i = 0; i < expected.size(); i++) | ||||
|         { | ||||
|             auto ch = PeekChar(i); | ||||
|             if (ch == EOF) | ||||
|                 return false; | ||||
|             if (ch != uint8_t(expected[i])) | ||||
|                 return false; | ||||
|         } | ||||
|         Index += expected.size(); | ||||
|         return true; | ||||
|     } | ||||
| 
 | ||||
|     int NextChar() | ||||
|     { | ||||
|         return LastChar = ReadChar(); | ||||
|     } | ||||
| 
 | ||||
|     void SignalNextLine() | ||||
|     { | ||||
|         CurLine++; | ||||
|         LineIndex = 0; | ||||
|     } | ||||
| 
 | ||||
|     static const char* ConvertNumber(const char* str, uint64_t & result, int radix) | ||||
|     { | ||||
|         errno = 0; | ||||
|         char* end; | ||||
|         result = strtoull(str, &end, radix); | ||||
|         if (!result && end == str) | ||||
|             return "not a number"; | ||||
|         if (result == ULLONG_MAX && errno) | ||||
|             return "does not fit"; | ||||
|         if (*end) | ||||
|             return "str not completely consumed"; | ||||
|         return nullptr; | ||||
|     } | ||||
| 
 | ||||
|     int GetToken() | ||||
|     { | ||||
|         //skip whitespace
 | ||||
|         while (isspace(LastChar)) | ||||
|         { | ||||
|             if (LastChar == '\n') | ||||
|                 SignalNextLine(); | ||||
|             NextChar(); | ||||
|         } | ||||
| 
 | ||||
|         //skip \\[\r\n]
 | ||||
|         if (LastChar == '\\' && (PeekChar() == '\r' || PeekChar() == '\n')) | ||||
|         { | ||||
|             NextChar(); | ||||
|             return GetToken(); | ||||
|         } | ||||
| 
 | ||||
|         //character literal
 | ||||
|         if (LastChar == '\'') | ||||
|         { | ||||
|             string charLit; | ||||
|             while (true) | ||||
|             { | ||||
|                 NextChar(); | ||||
|                 if (LastChar == EOF) //end of file
 | ||||
|                     return ReportError("unexpected end of file in character literal (1)"); | ||||
|                 if (LastChar == '\r' || LastChar == '\n') | ||||
|                     return ReportError("unexpected newline in character literal (1)"); | ||||
|                 if (LastChar == '\'') //end of character literal
 | ||||
|                 { | ||||
|                     if (charLit.length() != 1) | ||||
|                         return ReportError(StringUtils::sprintf("invalid character literal '%s'", charLit.c_str())); | ||||
|                     CharLit = charLit[0]; | ||||
|                     NextChar(); | ||||
|                     return tok_charlit; | ||||
|                 } | ||||
|                 if (LastChar == '\\') //escape sequence
 | ||||
|                 { | ||||
|                     NextChar(); | ||||
|                     if (LastChar == EOF) | ||||
|                         return ReportError("unexpected end of file in character literal (2)"); | ||||
|                     if (LastChar == '\r' || LastChar == '\n') | ||||
|                         return ReportError("unexpected newline in character literal (2)"); | ||||
|                     if (LastChar == '\'' || LastChar == '\"' || LastChar == '?' || LastChar == '\\') | ||||
|                         LastChar = LastChar; | ||||
|                     else if (LastChar == 'a') | ||||
|                         LastChar = '\a'; | ||||
|                     else if (LastChar == 'b') | ||||
|                         LastChar = '\b'; | ||||
|                     else if (LastChar == 'f') | ||||
|                         LastChar = '\f'; | ||||
|                     else if (LastChar == 'n') | ||||
|                         LastChar = '\n'; | ||||
|                     else if (LastChar == 'r') | ||||
|                         LastChar = '\r'; | ||||
|                     else if (LastChar == 't') | ||||
|                         LastChar = '\t'; | ||||
|                     else if (LastChar == 'v') | ||||
|                         LastChar = '\v'; | ||||
|                     else if (LastChar == '0') | ||||
|                         LastChar = '\0'; | ||||
|                     else if (LastChar == 'x') //\xHH
 | ||||
|                     { | ||||
|                         auto ch1 = NextChar(); | ||||
|                         auto ch2 = NextChar(); | ||||
|                         if (isxdigit(ch1) && isxdigit(ch2)) | ||||
|                         { | ||||
|                             char byteStr[3] = ""; | ||||
|                             byteStr[0] = ch1; | ||||
|                             byteStr[1] = ch2; | ||||
|                             uint64_t hexData; | ||||
|                             auto error = ConvertNumber(byteStr, hexData, 16); | ||||
|                             if (error) | ||||
|                                 return ReportError(StringUtils::sprintf("ConvertNumber failed (%s) for hex sequence \"\\x%c%c\" in character literal", error, ch1, ch2)); | ||||
|                             LastChar = hexData & 0xFF; | ||||
|                         } | ||||
|                         else | ||||
|                             return ReportError(StringUtils::sprintf("invalid hex sequence \"\\x%c%c\" in character literal", ch1, ch2)); | ||||
|                     } | ||||
|                     else | ||||
|                         return ReportError(StringUtils::sprintf("invalid escape sequence \"\\%c\" in character literal", LastChar)); | ||||
|                 } | ||||
|                 charLit += LastChar; | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         //string literal
 | ||||
|         if (LastChar == '\"') | ||||
|         { | ||||
|             StringLit.clear(); | ||||
|             while (true) | ||||
|             { | ||||
|                 NextChar(); | ||||
|                 if (LastChar == EOF) //end of file
 | ||||
|                     return ReportError("unexpected end of file in string literal (1)"); | ||||
|                 if (LastChar == '\r' || LastChar == '\n') | ||||
|                     return ReportError("unexpected newline in string literal (1)"); | ||||
|                 if (LastChar == '\"') //end of string literal
 | ||||
|                 { | ||||
|                     NextChar(); | ||||
|                     return tok_stringlit; | ||||
|                 } | ||||
|                 if (LastChar == '\\') //escape sequence
 | ||||
|                 { | ||||
|                     NextChar(); | ||||
|                     if (LastChar == EOF) | ||||
|                         return ReportError("unexpected end of file in string literal (2)"); | ||||
|                     if (LastChar == '\r' || LastChar == '\n') | ||||
|                         return ReportError("unexpected newline in string literal (2)"); | ||||
|                     if (LastChar == '\'' || LastChar == '\"' || LastChar == '?' || LastChar == '\\') | ||||
|                         LastChar = LastChar; | ||||
|                     else if (LastChar == 'a') | ||||
|                         LastChar = '\a'; | ||||
|                     else if (LastChar == 'b') | ||||
|                         LastChar = '\b'; | ||||
|                     else if (LastChar == 'f') | ||||
|                         LastChar = '\f'; | ||||
|                     else if (LastChar == 'n') | ||||
|                         LastChar = '\n'; | ||||
|                     else if (LastChar == 'r') | ||||
|                         LastChar = '\r'; | ||||
|                     else if (LastChar == 't') | ||||
|                         LastChar = '\t'; | ||||
|                     else if (LastChar == 'v') | ||||
|                         LastChar = '\v'; | ||||
|                     else if (LastChar == '0') | ||||
|                         LastChar = '\0'; | ||||
|                     else if (LastChar == 'x') //\xHH
 | ||||
|                     { | ||||
|                         auto ch1 = NextChar(); | ||||
|                         auto ch2 = NextChar(); | ||||
|                         if (isxdigit(ch1) && isxdigit(ch2)) | ||||
|                         { | ||||
|                             char byteStr[3] = ""; | ||||
|                             byteStr[0] = ch1; | ||||
|                             byteStr[1] = ch2; | ||||
|                             uint64_t hexData; | ||||
|                             auto error = ConvertNumber(byteStr, hexData, 16); | ||||
|                             if (error) | ||||
|                                 return ReportError(StringUtils::sprintf("ConvertNumber failed (%s) for hex sequence \"\\x%c%c\" in string literal", error, ch1, ch2)); | ||||
|                             LastChar = hexData & 0xFF; | ||||
|                         } | ||||
|                         else | ||||
|                             return ReportError(StringUtils::sprintf("invalid hex sequence \"\\x%c%c\" in string literal", ch1, ch2)); | ||||
|                     } | ||||
|                     else | ||||
|                         return ReportError(StringUtils::sprintf("invalid escape sequence \"\\%c\" in string literal", LastChar)); | ||||
|                 } | ||||
|                 appendCh(StringLit, LastChar); | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         //identifier/keyword
 | ||||
|         if (isalpha(LastChar) || LastChar == '_') //[a-zA-Z_]
 | ||||
|         { | ||||
|             IdentifierStr = LastChar; | ||||
|             NextChar(); | ||||
|             while (isalnum(LastChar) || LastChar == '_') //[0-9a-zA-Z_]
 | ||||
|             { | ||||
|                 appendCh(IdentifierStr, LastChar); | ||||
|                 NextChar(); | ||||
|             } | ||||
| 
 | ||||
|             //keywords
 | ||||
|             auto found = KeywordMap.find(IdentifierStr); | ||||
|             if (found != KeywordMap.end()) | ||||
|                 return found->second; | ||||
| 
 | ||||
|             return tok_identifier; | ||||
|         } | ||||
| 
 | ||||
|         //hex numbers
 | ||||
|         if (LastChar == '0' && PeekChar() == 'x') //0x
 | ||||
|         { | ||||
|             NextChar(); //consume the 'x'
 | ||||
|             NumStr.clear(); | ||||
| 
 | ||||
|             while (isxdigit(NextChar())) //[0-9a-fA-F]*
 | ||||
|                 appendCh(NumStr, LastChar); | ||||
| 
 | ||||
|             if (!NumStr.length()) //check for error condition
 | ||||
|                 return ReportError("no hex digits after \"0x\" prefix"); | ||||
| 
 | ||||
|             auto error = ConvertNumber(NumStr.c_str(), NumberVal, 16); | ||||
|             if (error) | ||||
|                 return ReportError(StringUtils::sprintf("ConvertNumber failed (%s) on hexadecimal number", error)); | ||||
|             IsHexNumberVal = true; | ||||
|             return tok_number; | ||||
|         } | ||||
|         if (isdigit(LastChar)) //[0-9]
 | ||||
|         { | ||||
|             NumStr = LastChar; | ||||
| 
 | ||||
|             while (isdigit(NextChar())) //[0-9]*
 | ||||
|                 NumStr += LastChar; | ||||
| 
 | ||||
|             auto error = ConvertNumber(NumStr.c_str(), NumberVal, 10); | ||||
|             if (error) | ||||
|                 return ReportError(StringUtils::sprintf("ConvertNumber failed (%s) on decimal number", error)); | ||||
|             IsHexNumberVal = false; | ||||
|             return tok_number; | ||||
|         } | ||||
| 
 | ||||
|         //comments
 | ||||
|         if (LastChar == '/' && PeekChar() == '/') //line comment
 | ||||
|         { | ||||
|             do | ||||
|             { | ||||
|                 if (LastChar == '\n') | ||||
|                     SignalNextLine(); | ||||
|                 NextChar(); | ||||
|             } while (!(LastChar == EOF || LastChar == '\n')); | ||||
| 
 | ||||
|             return GetToken(); //interpret the next line
 | ||||
|         } | ||||
|         if (LastChar == '/' && PeekChar() == '*') //block comment
 | ||||
|         { | ||||
|             do | ||||
|             { | ||||
|                 if (LastChar == '\n') | ||||
|                     SignalNextLine(); | ||||
|                 NextChar(); | ||||
|             } while (!(LastChar == EOF || LastChar == '*' && PeekChar() == '/')); | ||||
| 
 | ||||
|             if (LastChar == EOF) //unexpected end of file
 | ||||
|             { | ||||
|                 LineIndex++; | ||||
|                 return ReportError("unexpected end of file in block comment"); | ||||
|             } | ||||
| 
 | ||||
|             NextChar(); | ||||
|             NextChar(); | ||||
|             return GetToken(); //get the next non-comment token
 | ||||
|         } | ||||
| 
 | ||||
|         //operators
 | ||||
|         auto opFound = OpTripleMap.find(MAKE_OP_TRIPLE(LastChar, PeekChar(), PeekChar(1))); | ||||
|         if (opFound != OpTripleMap.end()) | ||||
|         { | ||||
|             NextChar(); | ||||
|             NextChar(); | ||||
|             NextChar(); | ||||
|             return opFound->second; | ||||
|         } | ||||
|         opFound = OpDoubleMap.find(MAKE_OP_DOUBLE(LastChar, PeekChar())); | ||||
|         if (opFound != OpDoubleMap.end()) | ||||
|         { | ||||
|             NextChar(); | ||||
|             NextChar(); | ||||
|             return opFound->second; | ||||
|         } | ||||
|         opFound = OpSingleMap.find(MAKE_OP_SINGLE(LastChar)); | ||||
|         if (opFound != OpSingleMap.end()) | ||||
|         { | ||||
|             NextChar(); | ||||
|             return opFound->second; | ||||
|         } | ||||
| 
 | ||||
|         //end of file
 | ||||
|         if (LastChar == EOF) | ||||
|             return tok_eof; | ||||
| 
 | ||||
|         //unknown character
 | ||||
|         return ReportError(StringUtils::sprintf("unexpected character \'%c\'", LastChar)); | ||||
|     } | ||||
| 
 | ||||
|     bool ReadInputFile(const string & filename) | ||||
|     { | ||||
|         ResetLexerState(); | ||||
|         return FileHelper::ReadAllData(filename, Input); | ||||
|     } | ||||
| 
 | ||||
|     bool TestLex(const function<void(const string & line)> & lexEnum, bool output = true) | ||||
|     { | ||||
|         size_t line = 0; | ||||
|         if (output) | ||||
|             lexEnum("1: "); | ||||
|         int tok; | ||||
|         string toks; | ||||
|         clearReserve(toks); | ||||
|         char newlineText[128] = ""; | ||||
|         do | ||||
|         { | ||||
|             tok = GetToken(); | ||||
|             if (!output) | ||||
|                 continue; | ||||
|             toks.clear(); | ||||
|             while (line < CurLine) | ||||
|             { | ||||
|                 line++; | ||||
|                 sprintf_s(newlineText, "\n%d: ", line + 1); | ||||
|                 toks.append(newlineText); | ||||
|             } | ||||
|             toks.append(TokString(tok)); | ||||
|             appendCh(toks, ' '); | ||||
|             lexEnum(toks); | ||||
|         } while (tok != tok_eof && tok != tok_error); | ||||
|         if (tok != tok_error && tok != tok_eof) | ||||
|             tok = ReportError("lexer did not finish at the end of the file"); | ||||
|         for (const auto & warning : Warnings) | ||||
|             if (output) | ||||
|                 lexEnum("\nwarning: " + warning); | ||||
|         return tok != tok_error; | ||||
|     } | ||||
| }; | ||||
| 
 | ||||
| bool TestLexer(Lexer & lexer, const string & filename) | ||||
| bool TestLexer(Lexer & lexer, const std::string & filename) | ||||
| { | ||||
|     if (!lexer.ReadInputFile("tests\\" + filename)) | ||||
|     { | ||||
|         printf("failed to read \"%s\"\n", filename.c_str()); | ||||
|         return false; | ||||
|     } | ||||
|     string actual; | ||||
|     Lexer::clearReserve(actual); | ||||
|     auto success = lexer.TestLex([&](const string & line) | ||||
|     std::string actual; | ||||
|     actual.reserve(65536); | ||||
|     auto success = lexer.Test([&](const std::string & line) | ||||
|     { | ||||
|         actual.append(line); | ||||
|     }); | ||||
|     string expected; | ||||
|     if (FileHelper::ReadAllText("tests\\exp_lex\\" + filename, expected)) | ||||
|     std::string expected; | ||||
|     if (FileHelper::ReadAllText("tests\\exp_lex\\" + filename, expected) && expected == actual) | ||||
|     { | ||||
|         if (expected == actual) | ||||
|         { | ||||
|             printf("lexer test for \"%s\" success!\n", filename.c_str()); | ||||
|             return true; | ||||
|         } | ||||
|         printf("lexer test for \"%s\" success!\n", filename.c_str()); | ||||
|         return true; | ||||
|     } | ||||
|     if (success) | ||||
|         return true; | ||||
|  | @ -571,14 +31,14 @@ bool TestLexer(Lexer & lexer, const string & filename) | |||
|     return false; | ||||
| } | ||||
| 
 | ||||
| bool DebugLexer(Lexer & lexer, const string & filename, bool output) | ||||
| bool DebugLexer(Lexer & lexer, const std::string & filename, bool output) | ||||
| { | ||||
|     if (!lexer.ReadInputFile("tests\\" + filename)) | ||||
|     { | ||||
|         printf("failed to read \"%s\"\n", filename.c_str()); | ||||
|         return false; | ||||
|     } | ||||
|     auto success = lexer.TestLex([](const string & line) | ||||
|     auto success = lexer.Test([](const std::string & line) | ||||
|     { | ||||
|         printf("%s", line.c_str()); | ||||
|     }, output); | ||||
|  | @ -587,16 +47,16 @@ bool DebugLexer(Lexer & lexer, const string & filename, bool output) | |||
|     return success; | ||||
| } | ||||
| 
 | ||||
| void GenerateExpected(Lexer & lexer, const string & filename) | ||||
| void GenerateExpected(Lexer & lexer, const std::string & filename) | ||||
| { | ||||
|     if (!lexer.ReadInputFile("tests\\" + filename)) | ||||
|     { | ||||
|         printf("failed to read \"%s\"\n", filename.c_str()); | ||||
|         return; | ||||
|     } | ||||
|     string actual; | ||||
|     Lexer::clearReserve(actual); | ||||
|     lexer.TestLex([&](const string & line) | ||||
|     std::string actual; | ||||
|     actual.reserve(65536); | ||||
|     lexer.Test([&](const std::string & line) | ||||
|     { | ||||
|         actual.append(line); | ||||
|     }); | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue