performance optimizations

This commit is contained in:
mrexodia 2016-06-05 14:55:46 +02:00
parent 2206f556c3
commit a05ba4e8b7
No known key found for this signature in database
GPG Key ID: D72F9A4FAA0073B4
2 changed files with 134 additions and 79 deletions

View File

@ -14,12 +14,9 @@ bool FileHelper::ReadAllData(const String & fileName, std::vector<unsigned char>
content.clear(); content.clear();
return true; return true;
} }
Memory<char*> filedata(filesize + 1, "FileReader::ReadAllData:filedata"); content.resize(filesize);
DWORD read = 0; DWORD read = 0;
if(!ReadFile(hFile, filedata(), filesize, &read, nullptr)) return !!ReadFile(hFile, content.data(), filesize, &read, nullptr);
return false;
content = std::vector<unsigned char>(filedata(), filedata() + filesize);
return true;
} }
bool FileHelper::WriteAllData(const String & fileName, const void* data, size_t size) bool FileHelper::WriteAllData(const String & fileName, const void* data, size_t size)

View File

@ -14,6 +14,8 @@
#define MAKE_OP_DOUBLE(ch1, ch2) (ch2 << 8 | ch1) #define MAKE_OP_DOUBLE(ch1, ch2) (ch2 << 8 | ch1)
#define MAKE_OP_SINGLE(ch1) (ch1) #define MAKE_OP_SINGLE(ch1) (ch1)
#define DEFAULT_STRING_BUFFER 65536
using namespace std; using namespace std;
struct Lexer struct Lexer
@ -51,7 +53,6 @@ struct Lexer
}; };
vector<uint8_t> Input; vector<uint8_t> Input;
string ConsumedInput;
size_t Index = 0; size_t Index = 0;
string Error; string Error;
vector<String> Warnings; vector<String> Warnings;
@ -60,19 +61,34 @@ struct Lexer
string IdentifierStr; string IdentifierStr;
uint64_t NumberVal = 0; uint64_t NumberVal = 0;
string StringLit; string StringLit;
string NumStr;
char CharLit = '\0'; char CharLit = '\0';
int LastChar = ' '; int LastChar = ' ';
int CurLine = 0; int CurLine = 0;
static void clearReserve(string & str, size_t reserve = DEFAULT_STRING_BUFFER)
{
str.clear();
str.reserve(reserve);
}
static void appendCh(string & str, char ch)
{
str.resize(str.size() + 1);
str[str.size() - 1] = ch;
}
void ResetLexerState() void ResetLexerState()
{ {
Input.clear(); Input.clear();
ConsumedInput.clear(); Input.reserve(1024 * 1024);
Index = 0; Index = 0;
Error.clear(); Error.clear();
IdentifierStr.clear(); Warnings.clear();
clearReserve(IdentifierStr);
NumberVal = 0; NumberVal = 0;
StringLit.clear(); clearReserve(StringLit);
clearReserve(NumStr, 16);
CharLit = '\0'; CharLit = '\0';
LastChar = ' '; LastChar = ' ';
CurLine = 0; CurLine = 0;
@ -173,7 +189,6 @@ struct Lexer
ReportWarning(StringUtils::sprintf("\\0 character in file data")); ReportWarning(StringUtils::sprintf("\\0 character in file data"));
return ReadChar(); return ReadChar();
} }
ConsumedInput += ch;
return ch; return ch;
} }
@ -191,11 +206,23 @@ struct Lexer
return true; return true;
} }
int NextChar(int count = 1) int NextChar()
{ {
for (auto i = 0; i < count; i++) return LastChar = ReadChar();
LastChar = ReadChar(); }
return LastChar;
static const char* ConvertNumber(const char* str, uint64_t & result, int radix)
{
errno = 0;
char* end;
result = strtoull(str, &end, radix);
if (!result && end == str)
return "not a number";
if (result == ULLONG_MAX && errno)
return "does not fit";
if (*end)
return "str not completely consumed";
return nullptr;
} }
int GetToken() int GetToken()
@ -265,9 +292,10 @@ struct Lexer
char byteStr[3] = ""; char byteStr[3] = "";
byteStr[0] = ch1; byteStr[0] = ch1;
byteStr[1] = ch2; byteStr[1] = ch2;
unsigned int hexData; uint64_t hexData;
if (sscanf_s(byteStr, "%X", &hexData) != 1) auto error = ConvertNumber(byteStr, hexData, 16);
return ReportError(StringUtils::sprintf("sscanf_s failed for hex sequence \"\\x%c%c\" in character literal", ch1, ch2)); if (error)
return ReportError(StringUtils::sprintf("ConvertNumber failed (%s) for hex sequence \"\\x%c%c\" in character literal", error, ch1, ch2));
LastChar = hexData & 0xFF; LastChar = hexData & 0xFF;
} }
else else
@ -330,9 +358,10 @@ struct Lexer
char byteStr[3] = ""; char byteStr[3] = "";
byteStr[0] = ch1; byteStr[0] = ch1;
byteStr[1] = ch2; byteStr[1] = ch2;
unsigned int hexData; uint64_t hexData;
if (sscanf_s(byteStr, "%X", &hexData) != 1) auto error = ConvertNumber(byteStr, hexData, 16);
return ReportError(StringUtils::sprintf("sscanf_s failed for hex sequence \"\\x%c%c\" in string literal", ch1, ch2)); if (error)
return ReportError(StringUtils::sprintf("ConvertNumber failed (%s) for hex sequence \"\\x%c%c\" in string literal", error, ch1, ch2));
LastChar = hexData & 0xFF; LastChar = hexData & 0xFF;
} }
else else
@ -341,7 +370,7 @@ struct Lexer
else else
return ReportError(StringUtils::sprintf("invalid escape sequence \"\\%c\" in string literal", LastChar)); return ReportError(StringUtils::sprintf("invalid escape sequence \"\\%c\" in string literal", LastChar));
} }
StringLit.push_back(LastChar); appendCh(StringLit, LastChar);
} }
} }
@ -352,7 +381,7 @@ struct Lexer
NextChar(); NextChar();
while (isalnum(LastChar) || LastChar == '_') //[0-9a-zA-Z_] while (isalnum(LastChar) || LastChar == '_') //[0-9a-zA-Z_]
{ {
IdentifierStr += LastChar; appendCh(IdentifierStr, LastChar);
NextChar(); NextChar();
} }
@ -367,29 +396,30 @@ struct Lexer
//hex numbers //hex numbers
if (LastChar == '0' && PeekChar() == 'x') //0x if (LastChar == '0' && PeekChar() == 'x') //0x
{ {
string NumStr;
ReadChar(); //consume the 'x' ReadChar(); //consume the 'x'
NumStr.clear();
while (isxdigit(NextChar())) //[0-9a-fA-F]* while (isxdigit(NextChar())) //[0-9a-fA-F]*
NumStr += LastChar; appendCh(NumStr, LastChar);
if (!NumStr.length()) //check for error condition if (!NumStr.length()) //check for error condition
return ReportError("no hex digits after \"0x\" prefix"); return ReportError("no hex digits after \"0x\" prefix");
if (sscanf_s(NumStr.c_str(), "%llX", &NumberVal) != 1) auto error = ConvertNumber(NumStr.c_str(), NumberVal, 16);
return ReportError("sscanf_s failed on hexadecimal number"); if (error)
return ReportError(StringUtils::sprintf("ConvertNumber failed (%s) on hexadecimal number", error));
return tok_number; return tok_number;
} }
if (isdigit(LastChar)) //[0-9] if (isdigit(LastChar)) //[0-9]
{ {
string NumStr;
NumStr = LastChar; NumStr = LastChar;
while (isdigit(NextChar())) //[0-9]* while (isdigit(NextChar())) //[0-9]*
NumStr += LastChar; NumStr += LastChar;
if (sscanf_s(NumStr.c_str(), "%llu", &NumberVal) != 1) auto error = ConvertNumber(NumStr.c_str(), NumberVal, 10);
return ReportError("sscanf_s failed on decimal number"); if (error)
return ReportError(StringUtils::sprintf("ConvertNumber failed (%s) on decimal number", error));
return tok_number; return tok_number;
} }
@ -418,7 +448,8 @@ struct Lexer
if (LastChar == EOF) //unexpected end of file if (LastChar == EOF) //unexpected end of file
return ReportError("unexpected end of file in block comment"); return ReportError("unexpected end of file in block comment");
NextChar(2); NextChar();
NextChar();
return GetToken(); //get the next non-comment token return GetToken(); //get the next non-comment token
} }
@ -426,19 +457,22 @@ struct Lexer
auto opFound = OpTripleMap.find(MAKE_OP_TRIPLE(LastChar, PeekChar(), PeekChar(1))); auto opFound = OpTripleMap.find(MAKE_OP_TRIPLE(LastChar, PeekChar(), PeekChar(1)));
if (opFound != OpTripleMap.end()) if (opFound != OpTripleMap.end())
{ {
NextChar(3); NextChar();
NextChar();
NextChar();
return opFound->second; return opFound->second;
} }
opFound = OpDoubleMap.find(MAKE_OP_DOUBLE(LastChar, PeekChar())); opFound = OpDoubleMap.find(MAKE_OP_DOUBLE(LastChar, PeekChar()));
if (opFound != OpDoubleMap.end()) if (opFound != OpDoubleMap.end())
{ {
NextChar(2); NextChar();
NextChar();
return opFound->second; return opFound->second;
} }
opFound = OpSingleMap.find(MAKE_OP_SINGLE(LastChar)); opFound = OpSingleMap.find(MAKE_OP_SINGLE(LastChar));
if (opFound != OpSingleMap.end()) if (opFound != OpSingleMap.end())
{ {
NextChar(1); NextChar();
return opFound->second; return opFound->second;
} }
@ -447,7 +481,7 @@ struct Lexer
return tok_eof; return tok_eof;
//unknown character //unknown character
return ReportError(StringUtils::sprintf("unexpected character \"%c\"", LastChar)); return ReportError(StringUtils::sprintf("unexpected character \'%c\'", LastChar));
} }
bool ReadInputFile(const string & filename) bool ReadInputFile(const string & filename)
@ -456,107 +490,131 @@ struct Lexer
return FileHelper::ReadAllData(filename, Input); return FileHelper::ReadAllData(filename, Input);
} }
bool TestLex(function<void(const string & line)> lexEnum) bool TestLex(function<void(const string & line)> lexEnum, bool output = true)
{ {
auto line = 0; auto line = 0;
if (output)
lexEnum("1: "); lexEnum("1: ");
int tok; int tok;
string toks;
clearReserve(toks);
char newlineText[128] = "";
do do
{ {
tok = GetToken(); tok = GetToken();
string toks; if (!output)
continue;
toks.clear();
while (line < CurLine) while (line < CurLine)
{ {
line++; line++;
toks += StringUtils::sprintf("\n%d: ", line + 1); sprintf_s(newlineText, "\n%d: ", line + 1);
toks.append(newlineText);
} }
lexEnum(toks + TokString(tok) + " "); toks.append(TokString(tok));
appendCh(toks, ' ');
lexEnum(toks);
} while (tok != tok_eof && tok != tok_error); } while (tok != tok_eof && tok != tok_error);
if (tok != tok_error && tok != tok_eof) if (tok != tok_error && tok != tok_eof)
tok = ReportError("lexer did not finish at the end of the file"); tok = ReportError("lexer did not finish at the end of the file");
for (const auto & warning : Warnings) for (const auto & warning : Warnings)
if (output)
lexEnum("\nwarning: " + warning); lexEnum("\nwarning: " + warning);
return tok != tok_error; return tok != tok_error;
} }
}; };
bool TestLexer(const string & filename) bool TestLexer(Lexer & lexer, const string & filename)
{ {
Lexer lexer;
if (!lexer.ReadInputFile("tests\\" + filename)) if (!lexer.ReadInputFile("tests\\" + filename))
{ {
printf("failed to read \"%s\"\n", filename.c_str()); printf("failed to read \"%s\"\n", filename.c_str());
return false; return false;
} }
string actual; string actual;
if(!lexer.TestLex([&](const string & line) Lexer::clearReserve(actual);
auto success = lexer.TestLex([&](const string & line)
{ {
actual += line; actual.append(line);
})) });
{
actual += StringUtils::sprintf("lex error in \"%s\": %s\n", filename.c_str(), lexer.Error.c_str());
}
actual = StringUtils::Trim(actual);
string expected; string expected;
if (!FileHelper::ReadAllText("tests\\expected\\" + filename + ".lextest", expected)) //don't fail tests that we didn't specify yet if (FileHelper::ReadAllText("tests\\exp_lex\\" + filename, expected))
return true; {
StringUtils::ReplaceAll(expected, "\r\n", "\n");
expected = StringUtils::Trim(expected);
if (expected == actual) if (expected == actual)
{ {
printf("lexer test for \"%s\" success!\n", filename.c_str()); printf("lexer test for \"%s\" success!\n", filename.c_str());
return true; return true;
} }
printf("lexer test for \"%s\" failed\n", filename.c_str()); }
if (success)
return true;
printf("lexer test for \"%s\" failed...\n", filename.c_str());
FileHelper::WriteAllText("expected.out", expected); FileHelper::WriteAllText("expected.out", expected);
FileHelper::WriteAllText("actual.out", actual); FileHelper::WriteAllText("actual.out", actual);
return false; return false;
} }
void RunLexerTests() bool DebugLexer(Lexer & lexer, const string & filename, bool output)
{ {
for (auto file : testFiles)
TestLexer(file);
}
bool DebugLexer(const string & filename)
{
Lexer lexer;
if (!lexer.ReadInputFile("tests\\" + filename)) if (!lexer.ReadInputFile("tests\\" + filename))
{ {
printf("failed to read \"%s\"\n", filename.c_str()); printf("failed to read \"%s\"\n", filename.c_str());
return false; return false;
} }
lexer.TestLex([](const string & line) auto success = lexer.TestLex([](const string & line)
{ {
printf("%s", line.c_str()); printf("%s", line.c_str());
}); }, output);
if (output)
puts(""); puts("");
return true; return success;
} }
void GenerateExpected(const string & filename) void GenerateExpected(Lexer & lexer, const string & filename)
{ {
Lexer lexer;
if (!lexer.ReadInputFile("tests\\" + filename)) if (!lexer.ReadInputFile("tests\\" + filename))
{ {
printf("failed to read \"%s\"\n", filename.c_str()); printf("failed to read \"%s\"\n", filename.c_str());
return; return;
} }
string actual; string actual;
if (!lexer.TestLex([&](const string & line) Lexer::clearReserve(actual);
lexer.TestLex([&](const string & line)
{ {
actual += line; actual.append(line);
})) });
{ FileHelper::WriteAllText("tests\\exp_lex\\" + filename, actual);
actual += StringUtils::sprintf("lex error in \"%s\": %s\n", filename.c_str(), lexer.Error.c_str());
} }
FileHelper::WriteAllText("tests\\expected\\" + filename + ".lextest", actual);
void GenerateExpectedTests()
{
Lexer lexer;
for (auto file : testFiles)
GenerateExpected(lexer, file);
}
void RunLexerTests()
{
Lexer lexer;
for (auto file : testFiles)
TestLexer(lexer, file);
}
void DebugLexerTests(bool output = true)
{
Lexer lexer;
for (auto file : testFiles)
DebugLexer(lexer, file, output);
} }
int main() int main()
{ {
RunLexerTests(); //GenerateExpectedTests();
auto ticks = GetTickCount();
//Lexer lexer;
//DebugLexer(lexer, "AndroidManifestTemplate.bt", false);
DebugLexerTests(false);
printf("finished in %ums\n", GetTickCount() - ticks);
system("pause"); system("pause");
return 0; return 0;
} }