mirror of https://github.com/x64dbg/btparser
				
				
				
			performance optimizations
This commit is contained in:
		
							parent
							
								
									2206f556c3
								
							
						
					
					
						commit
						a05ba4e8b7
					
				|  | @ -14,12 +14,9 @@ bool FileHelper::ReadAllData(const String & fileName, std::vector<unsigned char> | ||||||
|         content.clear(); |         content.clear(); | ||||||
|         return true; |         return true; | ||||||
|     } |     } | ||||||
|     Memory<char*> filedata(filesize + 1, "FileReader::ReadAllData:filedata"); |     content.resize(filesize); | ||||||
|     DWORD read = 0; |     DWORD read = 0; | ||||||
|     if(!ReadFile(hFile, filedata(), filesize, &read, nullptr)) |     return !!ReadFile(hFile, content.data(), filesize, &read, nullptr); | ||||||
|         return false; |  | ||||||
|     content = std::vector<unsigned char>(filedata(), filedata() + filesize); |  | ||||||
|     return true; |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| bool FileHelper::WriteAllData(const String & fileName, const void* data, size_t size) | bool FileHelper::WriteAllData(const String & fileName, const void* data, size_t size) | ||||||
|  |  | ||||||
							
								
								
									
										206
									
								
								cparser/main.cpp
								
								
								
								
							
							
						
						
									
										206
									
								
								cparser/main.cpp
								
								
								
								
							|  | @ -14,6 +14,8 @@ | ||||||
| #define MAKE_OP_DOUBLE(ch1, ch2) (ch2 << 8 | ch1) | #define MAKE_OP_DOUBLE(ch1, ch2) (ch2 << 8 | ch1) | ||||||
| #define MAKE_OP_SINGLE(ch1) (ch1) | #define MAKE_OP_SINGLE(ch1) (ch1) | ||||||
| 
 | 
 | ||||||
|  | #define DEFAULT_STRING_BUFFER 65536 | ||||||
|  | 
 | ||||||
| using namespace std; | using namespace std; | ||||||
| 
 | 
 | ||||||
| struct Lexer | struct Lexer | ||||||
|  | @ -51,7 +53,6 @@ struct Lexer | ||||||
|     }; |     }; | ||||||
| 
 | 
 | ||||||
|     vector<uint8_t> Input; |     vector<uint8_t> Input; | ||||||
|     string ConsumedInput; |  | ||||||
|     size_t Index = 0; |     size_t Index = 0; | ||||||
|     string Error; |     string Error; | ||||||
|     vector<String> Warnings; |     vector<String> Warnings; | ||||||
|  | @ -60,19 +61,34 @@ struct Lexer | ||||||
|     string IdentifierStr; |     string IdentifierStr; | ||||||
|     uint64_t NumberVal = 0; |     uint64_t NumberVal = 0; | ||||||
|     string StringLit; |     string StringLit; | ||||||
|  |     string NumStr; | ||||||
|     char CharLit = '\0'; |     char CharLit = '\0'; | ||||||
|     int LastChar = ' '; |     int LastChar = ' '; | ||||||
|     int CurLine = 0; |     int CurLine = 0; | ||||||
| 
 | 
 | ||||||
|  |     static void clearReserve(string & str, size_t reserve = DEFAULT_STRING_BUFFER) | ||||||
|  |     { | ||||||
|  |         str.clear(); | ||||||
|  |         str.reserve(reserve); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     static void appendCh(string & str, char ch) | ||||||
|  |     { | ||||||
|  |         str.resize(str.size() + 1); | ||||||
|  |         str[str.size() - 1] = ch; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     void ResetLexerState() |     void ResetLexerState() | ||||||
|     { |     { | ||||||
|         Input.clear(); |         Input.clear(); | ||||||
|         ConsumedInput.clear(); |         Input.reserve(1024 * 1024); | ||||||
|         Index = 0; |         Index = 0; | ||||||
|         Error.clear(); |         Error.clear(); | ||||||
|         IdentifierStr.clear(); |         Warnings.clear(); | ||||||
|  |         clearReserve(IdentifierStr); | ||||||
|         NumberVal = 0; |         NumberVal = 0; | ||||||
|         StringLit.clear(); |         clearReserve(StringLit); | ||||||
|  |         clearReserve(NumStr, 16); | ||||||
|         CharLit = '\0'; |         CharLit = '\0'; | ||||||
|         LastChar = ' '; |         LastChar = ' '; | ||||||
|         CurLine = 0; |         CurLine = 0; | ||||||
|  | @ -173,7 +189,6 @@ struct Lexer | ||||||
|             ReportWarning(StringUtils::sprintf("\\0 character in file data")); |             ReportWarning(StringUtils::sprintf("\\0 character in file data")); | ||||||
|             return ReadChar(); |             return ReadChar(); | ||||||
|         } |         } | ||||||
|         ConsumedInput += ch; |  | ||||||
|         return ch; |         return ch; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  | @ -191,11 +206,23 @@ struct Lexer | ||||||
|         return true; |         return true; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     int NextChar(int count = 1) |     int NextChar() | ||||||
|     { |     { | ||||||
|         for (auto i = 0; i < count; i++) |         return LastChar = ReadChar(); | ||||||
|             LastChar = ReadChar(); |     } | ||||||
|         return LastChar; | 
 | ||||||
|  |     static const char* ConvertNumber(const char* str, uint64_t & result, int radix) | ||||||
|  |     { | ||||||
|  |         errno = 0; | ||||||
|  |         char* end; | ||||||
|  |         result = strtoull(str, &end, radix); | ||||||
|  |         if (!result && end == str) | ||||||
|  |             return "not a number"; | ||||||
|  |         if (result == ULLONG_MAX && errno) | ||||||
|  |             return "does not fit"; | ||||||
|  |         if (*end) | ||||||
|  |             return "str not completely consumed"; | ||||||
|  |         return nullptr; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     int GetToken() |     int GetToken() | ||||||
|  | @ -265,9 +292,10 @@ struct Lexer | ||||||
|                             char byteStr[3] = ""; |                             char byteStr[3] = ""; | ||||||
|                             byteStr[0] = ch1; |                             byteStr[0] = ch1; | ||||||
|                             byteStr[1] = ch2; |                             byteStr[1] = ch2; | ||||||
|                             unsigned int hexData; |                             uint64_t hexData; | ||||||
|                             if (sscanf_s(byteStr, "%X", &hexData) != 1) |                             auto error = ConvertNumber(byteStr, hexData, 16); | ||||||
|                                 return ReportError(StringUtils::sprintf("sscanf_s failed for hex sequence \"\\x%c%c\" in character literal", ch1, ch2)); |                             if (error) | ||||||
|  |                                 return ReportError(StringUtils::sprintf("ConvertNumber failed (%s) for hex sequence \"\\x%c%c\" in character literal", error, ch1, ch2)); | ||||||
|                             LastChar = hexData & 0xFF; |                             LastChar = hexData & 0xFF; | ||||||
|                         } |                         } | ||||||
|                         else |                         else | ||||||
|  | @ -330,9 +358,10 @@ struct Lexer | ||||||
|                             char byteStr[3] = ""; |                             char byteStr[3] = ""; | ||||||
|                             byteStr[0] = ch1; |                             byteStr[0] = ch1; | ||||||
|                             byteStr[1] = ch2; |                             byteStr[1] = ch2; | ||||||
|                             unsigned int hexData; |                             uint64_t hexData; | ||||||
|                             if (sscanf_s(byteStr, "%X", &hexData) != 1) |                             auto error = ConvertNumber(byteStr, hexData, 16); | ||||||
|                                 return ReportError(StringUtils::sprintf("sscanf_s failed for hex sequence \"\\x%c%c\" in string literal", ch1, ch2)); |                             if (error) | ||||||
|  |                                 return ReportError(StringUtils::sprintf("ConvertNumber failed (%s) for hex sequence \"\\x%c%c\" in string literal", error, ch1, ch2)); | ||||||
|                             LastChar = hexData & 0xFF; |                             LastChar = hexData & 0xFF; | ||||||
|                         } |                         } | ||||||
|                         else |                         else | ||||||
|  | @ -341,7 +370,7 @@ struct Lexer | ||||||
|                     else |                     else | ||||||
|                         return ReportError(StringUtils::sprintf("invalid escape sequence \"\\%c\" in string literal", LastChar)); |                         return ReportError(StringUtils::sprintf("invalid escape sequence \"\\%c\" in string literal", LastChar)); | ||||||
|                 } |                 } | ||||||
|                 StringLit.push_back(LastChar); |                 appendCh(StringLit, LastChar); | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|  | @ -352,7 +381,7 @@ struct Lexer | ||||||
|             NextChar(); |             NextChar(); | ||||||
|             while (isalnum(LastChar) || LastChar == '_') //[0-9a-zA-Z_]
 |             while (isalnum(LastChar) || LastChar == '_') //[0-9a-zA-Z_]
 | ||||||
|             { |             { | ||||||
|                 IdentifierStr += LastChar; |                 appendCh(IdentifierStr, LastChar); | ||||||
|                 NextChar(); |                 NextChar(); | ||||||
|             } |             } | ||||||
| 
 | 
 | ||||||
|  | @ -367,29 +396,30 @@ struct Lexer | ||||||
|         //hex numbers
 |         //hex numbers
 | ||||||
|         if (LastChar == '0' && PeekChar() == 'x') //0x
 |         if (LastChar == '0' && PeekChar() == 'x') //0x
 | ||||||
|         { |         { | ||||||
|             string NumStr; |  | ||||||
|             ReadChar(); //consume the 'x'
 |             ReadChar(); //consume the 'x'
 | ||||||
|  |             NumStr.clear(); | ||||||
| 
 | 
 | ||||||
|             while (isxdigit(NextChar())) //[0-9a-fA-F]*
 |             while (isxdigit(NextChar())) //[0-9a-fA-F]*
 | ||||||
|                 NumStr += LastChar; |                 appendCh(NumStr, LastChar); | ||||||
| 
 | 
 | ||||||
|             if (!NumStr.length()) //check for error condition
 |             if (!NumStr.length()) //check for error condition
 | ||||||
|                 return ReportError("no hex digits after \"0x\" prefix"); |                 return ReportError("no hex digits after \"0x\" prefix"); | ||||||
| 
 | 
 | ||||||
|             if (sscanf_s(NumStr.c_str(), "%llX", &NumberVal) != 1) |             auto error = ConvertNumber(NumStr.c_str(), NumberVal, 16); | ||||||
|                 return ReportError("sscanf_s failed on hexadecimal number"); |             if (error) | ||||||
|  |                 return ReportError(StringUtils::sprintf("ConvertNumber failed (%s) on hexadecimal number", error)); | ||||||
|             return tok_number; |             return tok_number; | ||||||
|         } |         } | ||||||
|         if (isdigit(LastChar)) //[0-9]
 |         if (isdigit(LastChar)) //[0-9]
 | ||||||
|         { |         { | ||||||
|             string NumStr; |  | ||||||
|             NumStr = LastChar; |             NumStr = LastChar; | ||||||
| 
 | 
 | ||||||
|             while (isdigit(NextChar())) //[0-9]*
 |             while (isdigit(NextChar())) //[0-9]*
 | ||||||
|                 NumStr += LastChar; |                 NumStr += LastChar; | ||||||
| 
 | 
 | ||||||
|             if (sscanf_s(NumStr.c_str(), "%llu", &NumberVal) != 1) |             auto error = ConvertNumber(NumStr.c_str(), NumberVal, 10); | ||||||
|                 return ReportError("sscanf_s failed on decimal number"); |             if (error) | ||||||
|  |                 return ReportError(StringUtils::sprintf("ConvertNumber failed (%s) on decimal number", error)); | ||||||
|             return tok_number; |             return tok_number; | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|  | @ -418,7 +448,8 @@ struct Lexer | ||||||
|             if (LastChar == EOF) //unexpected end of file
 |             if (LastChar == EOF) //unexpected end of file
 | ||||||
|                 return ReportError("unexpected end of file in block comment"); |                 return ReportError("unexpected end of file in block comment"); | ||||||
| 
 | 
 | ||||||
|             NextChar(2); |             NextChar(); | ||||||
|  |             NextChar(); | ||||||
|             return GetToken(); //get the next non-comment token
 |             return GetToken(); //get the next non-comment token
 | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|  | @ -426,19 +457,22 @@ struct Lexer | ||||||
|         auto opFound = OpTripleMap.find(MAKE_OP_TRIPLE(LastChar, PeekChar(), PeekChar(1))); |         auto opFound = OpTripleMap.find(MAKE_OP_TRIPLE(LastChar, PeekChar(), PeekChar(1))); | ||||||
|         if (opFound != OpTripleMap.end()) |         if (opFound != OpTripleMap.end()) | ||||||
|         { |         { | ||||||
|             NextChar(3); |             NextChar(); | ||||||
|  |             NextChar(); | ||||||
|  |             NextChar(); | ||||||
|             return opFound->second; |             return opFound->second; | ||||||
|         } |         } | ||||||
|         opFound = OpDoubleMap.find(MAKE_OP_DOUBLE(LastChar, PeekChar())); |         opFound = OpDoubleMap.find(MAKE_OP_DOUBLE(LastChar, PeekChar())); | ||||||
|         if (opFound != OpDoubleMap.end()) |         if (opFound != OpDoubleMap.end()) | ||||||
|         { |         { | ||||||
|             NextChar(2); |             NextChar(); | ||||||
|  |             NextChar(); | ||||||
|             return opFound->second; |             return opFound->second; | ||||||
|         } |         } | ||||||
|         opFound = OpSingleMap.find(MAKE_OP_SINGLE(LastChar)); |         opFound = OpSingleMap.find(MAKE_OP_SINGLE(LastChar)); | ||||||
|         if (opFound != OpSingleMap.end()) |         if (opFound != OpSingleMap.end()) | ||||||
|         { |         { | ||||||
|             NextChar(1); |             NextChar(); | ||||||
|             return opFound->second; |             return opFound->second; | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|  | @ -447,7 +481,7 @@ struct Lexer | ||||||
|             return tok_eof; |             return tok_eof; | ||||||
| 
 | 
 | ||||||
|         //unknown character
 |         //unknown character
 | ||||||
|         return ReportError(StringUtils::sprintf("unexpected character \"%c\"", LastChar)); |         return ReportError(StringUtils::sprintf("unexpected character \'%c\'", LastChar)); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     bool ReadInputFile(const string & filename) |     bool ReadInputFile(const string & filename) | ||||||
|  | @ -456,107 +490,131 @@ struct Lexer | ||||||
|         return FileHelper::ReadAllData(filename, Input); |         return FileHelper::ReadAllData(filename, Input); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     bool TestLex(function<void(const string & line)> lexEnum) |     bool TestLex(function<void(const string & line)> lexEnum, bool output = true) | ||||||
|     { |     { | ||||||
|         auto line = 0; |         auto line = 0; | ||||||
|         lexEnum("1: "); |         if (output) | ||||||
|  |             lexEnum("1: "); | ||||||
|         int tok; |         int tok; | ||||||
|  |         string toks; | ||||||
|  |         clearReserve(toks); | ||||||
|  |         char newlineText[128] = ""; | ||||||
|         do |         do | ||||||
|         { |         { | ||||||
|             tok = GetToken(); |             tok = GetToken(); | ||||||
|             string toks; |             if (!output) | ||||||
|  |                 continue; | ||||||
|  |             toks.clear(); | ||||||
|             while (line < CurLine) |             while (line < CurLine) | ||||||
|             { |             { | ||||||
|                 line++; |                 line++; | ||||||
|                 toks += StringUtils::sprintf("\n%d: ", line + 1); |                 sprintf_s(newlineText, "\n%d: ", line + 1); | ||||||
|  |                 toks.append(newlineText); | ||||||
|             } |             } | ||||||
|             lexEnum(toks + TokString(tok) + " "); |             toks.append(TokString(tok)); | ||||||
|  |             appendCh(toks, ' '); | ||||||
|  |             lexEnum(toks); | ||||||
|         } while (tok != tok_eof && tok != tok_error); |         } while (tok != tok_eof && tok != tok_error); | ||||||
|         if (tok != tok_error && tok != tok_eof) |         if (tok != tok_error && tok != tok_eof) | ||||||
|             tok = ReportError("lexer did not finish at the end of the file"); |             tok = ReportError("lexer did not finish at the end of the file"); | ||||||
|         for (const auto & warning : Warnings) |         for (const auto & warning : Warnings) | ||||||
|             lexEnum("\nwarning: "  + warning); |             if (output) | ||||||
|  |                 lexEnum("\nwarning: " + warning); | ||||||
|         return tok != tok_error; |         return tok != tok_error; | ||||||
|     } |     } | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| bool TestLexer(const string & filename) | bool TestLexer(Lexer & lexer, const string & filename) | ||||||
| { | { | ||||||
|     Lexer lexer; |  | ||||||
|     if (!lexer.ReadInputFile("tests\\" + filename)) |     if (!lexer.ReadInputFile("tests\\" + filename)) | ||||||
|     { |     { | ||||||
|         printf("failed to read \"%s\"\n", filename.c_str()); |         printf("failed to read \"%s\"\n", filename.c_str()); | ||||||
|         return false; |         return false; | ||||||
|     } |     } | ||||||
|     string actual; |     string actual; | ||||||
|     if(!lexer.TestLex([&](const string & line) |     Lexer::clearReserve(actual); | ||||||
|  |     auto success = lexer.TestLex([&](const string & line) | ||||||
|     { |     { | ||||||
|         actual += line; |         actual.append(line); | ||||||
|     })) |     }); | ||||||
|     { |  | ||||||
|         actual += StringUtils::sprintf("lex error in \"%s\": %s\n", filename.c_str(), lexer.Error.c_str()); |  | ||||||
|     } |  | ||||||
|     actual = StringUtils::Trim(actual); |  | ||||||
|     string expected; |     string expected; | ||||||
|     if (!FileHelper::ReadAllText("tests\\expected\\" + filename + ".lextest", expected)) //don't fail tests that we didn't specify yet
 |     if (FileHelper::ReadAllText("tests\\exp_lex\\" + filename, expected)) | ||||||
|         return true; |  | ||||||
|     StringUtils::ReplaceAll(expected, "\r\n", "\n"); |  | ||||||
|     expected = StringUtils::Trim(expected); |  | ||||||
|     if (expected == actual) |  | ||||||
|     { |     { | ||||||
|         printf("lexer test for \"%s\" success!\n", filename.c_str()); |         if (expected == actual) | ||||||
|         return true; |         { | ||||||
|  |             printf("lexer test for \"%s\" success!\n", filename.c_str()); | ||||||
|  |             return true; | ||||||
|  |         } | ||||||
|     } |     } | ||||||
|     printf("lexer test for \"%s\" failed\n", filename.c_str()); |     if (success) | ||||||
|  |         return true; | ||||||
|  |     printf("lexer test for \"%s\" failed...\n", filename.c_str()); | ||||||
|     FileHelper::WriteAllText("expected.out", expected); |     FileHelper::WriteAllText("expected.out", expected); | ||||||
|     FileHelper::WriteAllText("actual.out", actual); |     FileHelper::WriteAllText("actual.out", actual); | ||||||
|     return false; |     return false; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void RunLexerTests() | bool DebugLexer(Lexer & lexer, const string & filename, bool output) | ||||||
| { | { | ||||||
|     for (auto file : testFiles) |  | ||||||
|         TestLexer(file); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| bool DebugLexer(const string & filename) |  | ||||||
| { |  | ||||||
|     Lexer lexer; |  | ||||||
|     if (!lexer.ReadInputFile("tests\\" + filename)) |     if (!lexer.ReadInputFile("tests\\" + filename)) | ||||||
|     { |     { | ||||||
|         printf("failed to read \"%s\"\n", filename.c_str()); |         printf("failed to read \"%s\"\n", filename.c_str()); | ||||||
|         return false; |         return false; | ||||||
|     } |     } | ||||||
|     lexer.TestLex([](const string & line) |     auto success = lexer.TestLex([](const string & line) | ||||||
|     { |     { | ||||||
|         printf("%s", line.c_str()); |         printf("%s", line.c_str()); | ||||||
|     }); |     }, output); | ||||||
|     puts(""); |     if (output) | ||||||
|     return true; |         puts(""); | ||||||
|  |     return success; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void GenerateExpected(const string & filename) | void GenerateExpected(Lexer & lexer, const string & filename) | ||||||
| { | { | ||||||
|     Lexer lexer; |  | ||||||
|     if (!lexer.ReadInputFile("tests\\" + filename)) |     if (!lexer.ReadInputFile("tests\\" + filename)) | ||||||
|     { |     { | ||||||
|         printf("failed to read \"%s\"\n", filename.c_str()); |         printf("failed to read \"%s\"\n", filename.c_str()); | ||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
|     string actual; |     string actual; | ||||||
|     if (!lexer.TestLex([&](const string & line) |     Lexer::clearReserve(actual); | ||||||
|  |     lexer.TestLex([&](const string & line) | ||||||
|     { |     { | ||||||
|         actual += line; |         actual.append(line); | ||||||
|     })) |     }); | ||||||
|     { |     FileHelper::WriteAllText("tests\\exp_lex\\" + filename, actual); | ||||||
|         actual += StringUtils::sprintf("lex error in \"%s\": %s\n", filename.c_str(), lexer.Error.c_str()); | } | ||||||
|     } | 
 | ||||||
|     FileHelper::WriteAllText("tests\\expected\\" + filename + ".lextest", actual); | void GenerateExpectedTests() | ||||||
|  | { | ||||||
|  |     Lexer lexer; | ||||||
|  |     for (auto file : testFiles) | ||||||
|  |         GenerateExpected(lexer, file); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void RunLexerTests() | ||||||
|  | { | ||||||
|  |     Lexer lexer; | ||||||
|  |     for (auto file : testFiles) | ||||||
|  |         TestLexer(lexer, file); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void DebugLexerTests(bool output = true) | ||||||
|  | { | ||||||
|  |     Lexer lexer; | ||||||
|  |     for (auto file : testFiles) | ||||||
|  |         DebugLexer(lexer, file, output); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| int main() | int main() | ||||||
| { | { | ||||||
|     RunLexerTests(); |     //GenerateExpectedTests();
 | ||||||
|  |     auto ticks = GetTickCount(); | ||||||
|  |     //Lexer lexer;
 | ||||||
|  |     //DebugLexer(lexer, "AndroidManifestTemplate.bt", false);
 | ||||||
|  |     DebugLexerTests(false); | ||||||
|  |     printf("finished in %ums\n", GetTickCount() - ticks); | ||||||
|     system("pause"); |     system("pause"); | ||||||
|     return 0; |     return 0; | ||||||
| } | } | ||||||
		Loading…
	
		Reference in New Issue