mirror of https://github.com/x64dbg/btparser
performance optimizations
This commit is contained in:
parent
2206f556c3
commit
a05ba4e8b7
|
@ -14,12 +14,9 @@ bool FileHelper::ReadAllData(const String & fileName, std::vector<unsigned char>
|
||||||
content.clear();
|
content.clear();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
Memory<char*> filedata(filesize + 1, "FileReader::ReadAllData:filedata");
|
content.resize(filesize);
|
||||||
DWORD read = 0;
|
DWORD read = 0;
|
||||||
if(!ReadFile(hFile, filedata(), filesize, &read, nullptr))
|
return !!ReadFile(hFile, content.data(), filesize, &read, nullptr);
|
||||||
return false;
|
|
||||||
content = std::vector<unsigned char>(filedata(), filedata() + filesize);
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool FileHelper::WriteAllData(const String & fileName, const void* data, size_t size)
|
bool FileHelper::WriteAllData(const String & fileName, const void* data, size_t size)
|
||||||
|
|
206
cparser/main.cpp
206
cparser/main.cpp
|
@ -14,6 +14,8 @@
|
||||||
#define MAKE_OP_DOUBLE(ch1, ch2) (ch2 << 8 | ch1)
|
#define MAKE_OP_DOUBLE(ch1, ch2) (ch2 << 8 | ch1)
|
||||||
#define MAKE_OP_SINGLE(ch1) (ch1)
|
#define MAKE_OP_SINGLE(ch1) (ch1)
|
||||||
|
|
||||||
|
#define DEFAULT_STRING_BUFFER 65536
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
struct Lexer
|
struct Lexer
|
||||||
|
@ -51,7 +53,6 @@ struct Lexer
|
||||||
};
|
};
|
||||||
|
|
||||||
vector<uint8_t> Input;
|
vector<uint8_t> Input;
|
||||||
string ConsumedInput;
|
|
||||||
size_t Index = 0;
|
size_t Index = 0;
|
||||||
string Error;
|
string Error;
|
||||||
vector<String> Warnings;
|
vector<String> Warnings;
|
||||||
|
@ -60,19 +61,34 @@ struct Lexer
|
||||||
string IdentifierStr;
|
string IdentifierStr;
|
||||||
uint64_t NumberVal = 0;
|
uint64_t NumberVal = 0;
|
||||||
string StringLit;
|
string StringLit;
|
||||||
|
string NumStr;
|
||||||
char CharLit = '\0';
|
char CharLit = '\0';
|
||||||
int LastChar = ' ';
|
int LastChar = ' ';
|
||||||
int CurLine = 0;
|
int CurLine = 0;
|
||||||
|
|
||||||
|
static void clearReserve(string & str, size_t reserve = DEFAULT_STRING_BUFFER)
|
||||||
|
{
|
||||||
|
str.clear();
|
||||||
|
str.reserve(reserve);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void appendCh(string & str, char ch)
|
||||||
|
{
|
||||||
|
str.resize(str.size() + 1);
|
||||||
|
str[str.size() - 1] = ch;
|
||||||
|
}
|
||||||
|
|
||||||
void ResetLexerState()
|
void ResetLexerState()
|
||||||
{
|
{
|
||||||
Input.clear();
|
Input.clear();
|
||||||
ConsumedInput.clear();
|
Input.reserve(1024 * 1024);
|
||||||
Index = 0;
|
Index = 0;
|
||||||
Error.clear();
|
Error.clear();
|
||||||
IdentifierStr.clear();
|
Warnings.clear();
|
||||||
|
clearReserve(IdentifierStr);
|
||||||
NumberVal = 0;
|
NumberVal = 0;
|
||||||
StringLit.clear();
|
clearReserve(StringLit);
|
||||||
|
clearReserve(NumStr, 16);
|
||||||
CharLit = '\0';
|
CharLit = '\0';
|
||||||
LastChar = ' ';
|
LastChar = ' ';
|
||||||
CurLine = 0;
|
CurLine = 0;
|
||||||
|
@ -173,7 +189,6 @@ struct Lexer
|
||||||
ReportWarning(StringUtils::sprintf("\\0 character in file data"));
|
ReportWarning(StringUtils::sprintf("\\0 character in file data"));
|
||||||
return ReadChar();
|
return ReadChar();
|
||||||
}
|
}
|
||||||
ConsumedInput += ch;
|
|
||||||
return ch;
|
return ch;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -191,11 +206,23 @@ struct Lexer
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
int NextChar(int count = 1)
|
int NextChar()
|
||||||
{
|
{
|
||||||
for (auto i = 0; i < count; i++)
|
return LastChar = ReadChar();
|
||||||
LastChar = ReadChar();
|
}
|
||||||
return LastChar;
|
|
||||||
|
static const char* ConvertNumber(const char* str, uint64_t & result, int radix)
|
||||||
|
{
|
||||||
|
errno = 0;
|
||||||
|
char* end;
|
||||||
|
result = strtoull(str, &end, radix);
|
||||||
|
if (!result && end == str)
|
||||||
|
return "not a number";
|
||||||
|
if (result == ULLONG_MAX && errno)
|
||||||
|
return "does not fit";
|
||||||
|
if (*end)
|
||||||
|
return "str not completely consumed";
|
||||||
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
int GetToken()
|
int GetToken()
|
||||||
|
@ -265,9 +292,10 @@ struct Lexer
|
||||||
char byteStr[3] = "";
|
char byteStr[3] = "";
|
||||||
byteStr[0] = ch1;
|
byteStr[0] = ch1;
|
||||||
byteStr[1] = ch2;
|
byteStr[1] = ch2;
|
||||||
unsigned int hexData;
|
uint64_t hexData;
|
||||||
if (sscanf_s(byteStr, "%X", &hexData) != 1)
|
auto error = ConvertNumber(byteStr, hexData, 16);
|
||||||
return ReportError(StringUtils::sprintf("sscanf_s failed for hex sequence \"\\x%c%c\" in character literal", ch1, ch2));
|
if (error)
|
||||||
|
return ReportError(StringUtils::sprintf("ConvertNumber failed (%s) for hex sequence \"\\x%c%c\" in character literal", error, ch1, ch2));
|
||||||
LastChar = hexData & 0xFF;
|
LastChar = hexData & 0xFF;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -330,9 +358,10 @@ struct Lexer
|
||||||
char byteStr[3] = "";
|
char byteStr[3] = "";
|
||||||
byteStr[0] = ch1;
|
byteStr[0] = ch1;
|
||||||
byteStr[1] = ch2;
|
byteStr[1] = ch2;
|
||||||
unsigned int hexData;
|
uint64_t hexData;
|
||||||
if (sscanf_s(byteStr, "%X", &hexData) != 1)
|
auto error = ConvertNumber(byteStr, hexData, 16);
|
||||||
return ReportError(StringUtils::sprintf("sscanf_s failed for hex sequence \"\\x%c%c\" in string literal", ch1, ch2));
|
if (error)
|
||||||
|
return ReportError(StringUtils::sprintf("ConvertNumber failed (%s) for hex sequence \"\\x%c%c\" in string literal", error, ch1, ch2));
|
||||||
LastChar = hexData & 0xFF;
|
LastChar = hexData & 0xFF;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -341,7 +370,7 @@ struct Lexer
|
||||||
else
|
else
|
||||||
return ReportError(StringUtils::sprintf("invalid escape sequence \"\\%c\" in string literal", LastChar));
|
return ReportError(StringUtils::sprintf("invalid escape sequence \"\\%c\" in string literal", LastChar));
|
||||||
}
|
}
|
||||||
StringLit.push_back(LastChar);
|
appendCh(StringLit, LastChar);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -352,7 +381,7 @@ struct Lexer
|
||||||
NextChar();
|
NextChar();
|
||||||
while (isalnum(LastChar) || LastChar == '_') //[0-9a-zA-Z_]
|
while (isalnum(LastChar) || LastChar == '_') //[0-9a-zA-Z_]
|
||||||
{
|
{
|
||||||
IdentifierStr += LastChar;
|
appendCh(IdentifierStr, LastChar);
|
||||||
NextChar();
|
NextChar();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -367,29 +396,30 @@ struct Lexer
|
||||||
//hex numbers
|
//hex numbers
|
||||||
if (LastChar == '0' && PeekChar() == 'x') //0x
|
if (LastChar == '0' && PeekChar() == 'x') //0x
|
||||||
{
|
{
|
||||||
string NumStr;
|
|
||||||
ReadChar(); //consume the 'x'
|
ReadChar(); //consume the 'x'
|
||||||
|
NumStr.clear();
|
||||||
|
|
||||||
while (isxdigit(NextChar())) //[0-9a-fA-F]*
|
while (isxdigit(NextChar())) //[0-9a-fA-F]*
|
||||||
NumStr += LastChar;
|
appendCh(NumStr, LastChar);
|
||||||
|
|
||||||
if (!NumStr.length()) //check for error condition
|
if (!NumStr.length()) //check for error condition
|
||||||
return ReportError("no hex digits after \"0x\" prefix");
|
return ReportError("no hex digits after \"0x\" prefix");
|
||||||
|
|
||||||
if (sscanf_s(NumStr.c_str(), "%llX", &NumberVal) != 1)
|
auto error = ConvertNumber(NumStr.c_str(), NumberVal, 16);
|
||||||
return ReportError("sscanf_s failed on hexadecimal number");
|
if (error)
|
||||||
|
return ReportError(StringUtils::sprintf("ConvertNumber failed (%s) on hexadecimal number", error));
|
||||||
return tok_number;
|
return tok_number;
|
||||||
}
|
}
|
||||||
if (isdigit(LastChar)) //[0-9]
|
if (isdigit(LastChar)) //[0-9]
|
||||||
{
|
{
|
||||||
string NumStr;
|
|
||||||
NumStr = LastChar;
|
NumStr = LastChar;
|
||||||
|
|
||||||
while (isdigit(NextChar())) //[0-9]*
|
while (isdigit(NextChar())) //[0-9]*
|
||||||
NumStr += LastChar;
|
NumStr += LastChar;
|
||||||
|
|
||||||
if (sscanf_s(NumStr.c_str(), "%llu", &NumberVal) != 1)
|
auto error = ConvertNumber(NumStr.c_str(), NumberVal, 10);
|
||||||
return ReportError("sscanf_s failed on decimal number");
|
if (error)
|
||||||
|
return ReportError(StringUtils::sprintf("ConvertNumber failed (%s) on decimal number", error));
|
||||||
return tok_number;
|
return tok_number;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -418,7 +448,8 @@ struct Lexer
|
||||||
if (LastChar == EOF) //unexpected end of file
|
if (LastChar == EOF) //unexpected end of file
|
||||||
return ReportError("unexpected end of file in block comment");
|
return ReportError("unexpected end of file in block comment");
|
||||||
|
|
||||||
NextChar(2);
|
NextChar();
|
||||||
|
NextChar();
|
||||||
return GetToken(); //get the next non-comment token
|
return GetToken(); //get the next non-comment token
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -426,19 +457,22 @@ struct Lexer
|
||||||
auto opFound = OpTripleMap.find(MAKE_OP_TRIPLE(LastChar, PeekChar(), PeekChar(1)));
|
auto opFound = OpTripleMap.find(MAKE_OP_TRIPLE(LastChar, PeekChar(), PeekChar(1)));
|
||||||
if (opFound != OpTripleMap.end())
|
if (opFound != OpTripleMap.end())
|
||||||
{
|
{
|
||||||
NextChar(3);
|
NextChar();
|
||||||
|
NextChar();
|
||||||
|
NextChar();
|
||||||
return opFound->second;
|
return opFound->second;
|
||||||
}
|
}
|
||||||
opFound = OpDoubleMap.find(MAKE_OP_DOUBLE(LastChar, PeekChar()));
|
opFound = OpDoubleMap.find(MAKE_OP_DOUBLE(LastChar, PeekChar()));
|
||||||
if (opFound != OpDoubleMap.end())
|
if (opFound != OpDoubleMap.end())
|
||||||
{
|
{
|
||||||
NextChar(2);
|
NextChar();
|
||||||
|
NextChar();
|
||||||
return opFound->second;
|
return opFound->second;
|
||||||
}
|
}
|
||||||
opFound = OpSingleMap.find(MAKE_OP_SINGLE(LastChar));
|
opFound = OpSingleMap.find(MAKE_OP_SINGLE(LastChar));
|
||||||
if (opFound != OpSingleMap.end())
|
if (opFound != OpSingleMap.end())
|
||||||
{
|
{
|
||||||
NextChar(1);
|
NextChar();
|
||||||
return opFound->second;
|
return opFound->second;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -447,7 +481,7 @@ struct Lexer
|
||||||
return tok_eof;
|
return tok_eof;
|
||||||
|
|
||||||
//unknown character
|
//unknown character
|
||||||
return ReportError(StringUtils::sprintf("unexpected character \"%c\"", LastChar));
|
return ReportError(StringUtils::sprintf("unexpected character \'%c\'", LastChar));
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ReadInputFile(const string & filename)
|
bool ReadInputFile(const string & filename)
|
||||||
|
@ -456,107 +490,131 @@ struct Lexer
|
||||||
return FileHelper::ReadAllData(filename, Input);
|
return FileHelper::ReadAllData(filename, Input);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool TestLex(function<void(const string & line)> lexEnum)
|
bool TestLex(function<void(const string & line)> lexEnum, bool output = true)
|
||||||
{
|
{
|
||||||
auto line = 0;
|
auto line = 0;
|
||||||
lexEnum("1: ");
|
if (output)
|
||||||
|
lexEnum("1: ");
|
||||||
int tok;
|
int tok;
|
||||||
|
string toks;
|
||||||
|
clearReserve(toks);
|
||||||
|
char newlineText[128] = "";
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
tok = GetToken();
|
tok = GetToken();
|
||||||
string toks;
|
if (!output)
|
||||||
|
continue;
|
||||||
|
toks.clear();
|
||||||
while (line < CurLine)
|
while (line < CurLine)
|
||||||
{
|
{
|
||||||
line++;
|
line++;
|
||||||
toks += StringUtils::sprintf("\n%d: ", line + 1);
|
sprintf_s(newlineText, "\n%d: ", line + 1);
|
||||||
|
toks.append(newlineText);
|
||||||
}
|
}
|
||||||
lexEnum(toks + TokString(tok) + " ");
|
toks.append(TokString(tok));
|
||||||
|
appendCh(toks, ' ');
|
||||||
|
lexEnum(toks);
|
||||||
} while (tok != tok_eof && tok != tok_error);
|
} while (tok != tok_eof && tok != tok_error);
|
||||||
if (tok != tok_error && tok != tok_eof)
|
if (tok != tok_error && tok != tok_eof)
|
||||||
tok = ReportError("lexer did not finish at the end of the file");
|
tok = ReportError("lexer did not finish at the end of the file");
|
||||||
for (const auto & warning : Warnings)
|
for (const auto & warning : Warnings)
|
||||||
lexEnum("\nwarning: " + warning);
|
if (output)
|
||||||
|
lexEnum("\nwarning: " + warning);
|
||||||
return tok != tok_error;
|
return tok != tok_error;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
bool TestLexer(const string & filename)
|
bool TestLexer(Lexer & lexer, const string & filename)
|
||||||
{
|
{
|
||||||
Lexer lexer;
|
|
||||||
if (!lexer.ReadInputFile("tests\\" + filename))
|
if (!lexer.ReadInputFile("tests\\" + filename))
|
||||||
{
|
{
|
||||||
printf("failed to read \"%s\"\n", filename.c_str());
|
printf("failed to read \"%s\"\n", filename.c_str());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
string actual;
|
string actual;
|
||||||
if(!lexer.TestLex([&](const string & line)
|
Lexer::clearReserve(actual);
|
||||||
|
auto success = lexer.TestLex([&](const string & line)
|
||||||
{
|
{
|
||||||
actual += line;
|
actual.append(line);
|
||||||
}))
|
});
|
||||||
{
|
|
||||||
actual += StringUtils::sprintf("lex error in \"%s\": %s\n", filename.c_str(), lexer.Error.c_str());
|
|
||||||
}
|
|
||||||
actual = StringUtils::Trim(actual);
|
|
||||||
string expected;
|
string expected;
|
||||||
if (!FileHelper::ReadAllText("tests\\expected\\" + filename + ".lextest", expected)) //don't fail tests that we didn't specify yet
|
if (FileHelper::ReadAllText("tests\\exp_lex\\" + filename, expected))
|
||||||
return true;
|
|
||||||
StringUtils::ReplaceAll(expected, "\r\n", "\n");
|
|
||||||
expected = StringUtils::Trim(expected);
|
|
||||||
if (expected == actual)
|
|
||||||
{
|
{
|
||||||
printf("lexer test for \"%s\" success!\n", filename.c_str());
|
if (expected == actual)
|
||||||
return true;
|
{
|
||||||
|
printf("lexer test for \"%s\" success!\n", filename.c_str());
|
||||||
|
return true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
printf("lexer test for \"%s\" failed\n", filename.c_str());
|
if (success)
|
||||||
|
return true;
|
||||||
|
printf("lexer test for \"%s\" failed...\n", filename.c_str());
|
||||||
FileHelper::WriteAllText("expected.out", expected);
|
FileHelper::WriteAllText("expected.out", expected);
|
||||||
FileHelper::WriteAllText("actual.out", actual);
|
FileHelper::WriteAllText("actual.out", actual);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
void RunLexerTests()
|
bool DebugLexer(Lexer & lexer, const string & filename, bool output)
|
||||||
{
|
{
|
||||||
for (auto file : testFiles)
|
|
||||||
TestLexer(file);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool DebugLexer(const string & filename)
|
|
||||||
{
|
|
||||||
Lexer lexer;
|
|
||||||
if (!lexer.ReadInputFile("tests\\" + filename))
|
if (!lexer.ReadInputFile("tests\\" + filename))
|
||||||
{
|
{
|
||||||
printf("failed to read \"%s\"\n", filename.c_str());
|
printf("failed to read \"%s\"\n", filename.c_str());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
lexer.TestLex([](const string & line)
|
auto success = lexer.TestLex([](const string & line)
|
||||||
{
|
{
|
||||||
printf("%s", line.c_str());
|
printf("%s", line.c_str());
|
||||||
});
|
}, output);
|
||||||
puts("");
|
if (output)
|
||||||
return true;
|
puts("");
|
||||||
|
return success;
|
||||||
}
|
}
|
||||||
|
|
||||||
void GenerateExpected(const string & filename)
|
void GenerateExpected(Lexer & lexer, const string & filename)
|
||||||
{
|
{
|
||||||
Lexer lexer;
|
|
||||||
if (!lexer.ReadInputFile("tests\\" + filename))
|
if (!lexer.ReadInputFile("tests\\" + filename))
|
||||||
{
|
{
|
||||||
printf("failed to read \"%s\"\n", filename.c_str());
|
printf("failed to read \"%s\"\n", filename.c_str());
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
string actual;
|
string actual;
|
||||||
if (!lexer.TestLex([&](const string & line)
|
Lexer::clearReserve(actual);
|
||||||
|
lexer.TestLex([&](const string & line)
|
||||||
{
|
{
|
||||||
actual += line;
|
actual.append(line);
|
||||||
}))
|
});
|
||||||
{
|
FileHelper::WriteAllText("tests\\exp_lex\\" + filename, actual);
|
||||||
actual += StringUtils::sprintf("lex error in \"%s\": %s\n", filename.c_str(), lexer.Error.c_str());
|
}
|
||||||
}
|
|
||||||
FileHelper::WriteAllText("tests\\expected\\" + filename + ".lextest", actual);
|
void GenerateExpectedTests()
|
||||||
|
{
|
||||||
|
Lexer lexer;
|
||||||
|
for (auto file : testFiles)
|
||||||
|
GenerateExpected(lexer, file);
|
||||||
|
}
|
||||||
|
|
||||||
|
void RunLexerTests()
|
||||||
|
{
|
||||||
|
Lexer lexer;
|
||||||
|
for (auto file : testFiles)
|
||||||
|
TestLexer(lexer, file);
|
||||||
|
}
|
||||||
|
|
||||||
|
void DebugLexerTests(bool output = true)
|
||||||
|
{
|
||||||
|
Lexer lexer;
|
||||||
|
for (auto file : testFiles)
|
||||||
|
DebugLexer(lexer, file, output);
|
||||||
}
|
}
|
||||||
|
|
||||||
int main()
|
int main()
|
||||||
{
|
{
|
||||||
RunLexerTests();
|
//GenerateExpectedTests();
|
||||||
|
auto ticks = GetTickCount();
|
||||||
|
//Lexer lexer;
|
||||||
|
//DebugLexer(lexer, "AndroidManifestTemplate.bt", false);
|
||||||
|
DebugLexerTests(false);
|
||||||
|
printf("finished in %ums\n", GetTickCount() - ticks);
|
||||||
system("pause");
|
system("pause");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
Loading…
Reference in New Issue