mirror of https://github.com/x64dbg/btparser
support for more keywords
This commit is contained in:
parent
7c18c0238f
commit
306b819db3
|
@ -16,7 +16,11 @@
|
|||
<ClCompile Include="stringutils.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="dynamicmem.h" />
|
||||
<ClInclude Include="filehelper.h" />
|
||||
<ClInclude Include="handle.h" />
|
||||
<ClInclude Include="stringutils.h" />
|
||||
<ClInclude Include="testfiles.h" />
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{B0411C78-2F06-49E0-8DE9-5C52A466F5DE}</ProjectGuid>
|
||||
|
|
|
@ -29,5 +29,17 @@
|
|||
<ClInclude Include="filehelper.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="testfiles.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="dynamicmem.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="handle.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="stringutils.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
</Project>
|
159
cparser/main.cpp
159
cparser/main.cpp
|
@ -3,11 +3,20 @@
|
|||
#include <string>
|
||||
#include <stdint.h>
|
||||
#include <unordered_map>
|
||||
#include <functional>
|
||||
#include "filehelper.h"
|
||||
#include "stringutils.h"
|
||||
#include "testfiles.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
struct Lexer
|
||||
{
|
||||
explicit Lexer()
|
||||
{
|
||||
SetupKeywordMap();
|
||||
}
|
||||
|
||||
string Input;
|
||||
string ConsumedInput;
|
||||
size_t Index = 0;
|
||||
|
@ -31,17 +40,25 @@ enum Token
|
|||
tok_unsigned, //"unsigned"
|
||||
tok_int, //"int"
|
||||
tok_sizeof, //"sizeof"
|
||||
tok_BYTE, //"BYTE"
|
||||
tok_WORD, //"WORD"
|
||||
tok_DWORD, //"DWORD"
|
||||
tok_ushort, //"ushort"
|
||||
tok_uint, //"uint"
|
||||
tok_byte, //"byte"
|
||||
tok_double, //"double"
|
||||
tok_string, //"string"
|
||||
tok_return, //"return"
|
||||
tok_enum, //"enum"
|
||||
|
||||
//others
|
||||
tok_identifier, //[a-zA-Z][a-zA-Z0-9]
|
||||
tok_identifier, //[a-zA-Z_][a-zA-Z0-9_]
|
||||
tok_number //(0x[0-9a-fA-F]+)|([0-9]+)
|
||||
};
|
||||
|
||||
unordered_map<string, Token> KeywordMap;
|
||||
|
||||
void setup()
|
||||
void SetupKeywordMap()
|
||||
{
|
||||
KeywordMap["typedef"] = tok_typedef;
|
||||
KeywordMap["struct"] = tok_struct;
|
||||
|
@ -49,8 +66,16 @@ void setup()
|
|||
KeywordMap["unsigned"] = tok_unsigned;
|
||||
KeywordMap["int"] = tok_int;
|
||||
KeywordMap["sizeof"] = tok_sizeof;
|
||||
KeywordMap["BYTE"] = tok_BYTE;
|
||||
KeywordMap["WORD"] = tok_WORD;
|
||||
KeywordMap["DWORD"] = tok_DWORD;
|
||||
KeywordMap["byte"] = tok_byte;
|
||||
KeywordMap["ushort"] = tok_ushort;
|
||||
KeywordMap["uint"] = tok_uint;
|
||||
KeywordMap["double"] = tok_double;
|
||||
KeywordMap["string"] = tok_string;
|
||||
KeywordMap["return"] = tok_return;
|
||||
KeywordMap["enum"] = tok_enum;
|
||||
}
|
||||
|
||||
Token ReportError(const String & error)
|
||||
|
@ -59,7 +84,7 @@ Token ReportError(const String & error)
|
|||
return tok_error;
|
||||
}
|
||||
|
||||
String tokString(int tok)
|
||||
String TokString(int tok)
|
||||
{
|
||||
switch (Token(tok))
|
||||
{
|
||||
|
@ -83,7 +108,14 @@ String tokString(int tok)
|
|||
}
|
||||
}
|
||||
|
||||
int readChar()
|
||||
int PeekChar(int distance = 0)
|
||||
{
|
||||
if (Index + distance >= Input.length())
|
||||
return EOF;
|
||||
return Input[Index + distance];
|
||||
}
|
||||
|
||||
int ReadChar()
|
||||
{
|
||||
if (Index == Input.length())
|
||||
return EOF;
|
||||
|
@ -91,18 +123,22 @@ int readChar()
|
|||
return uint8_t(Input[Index++]); //do not sign-extend to support UTF-8
|
||||
}
|
||||
|
||||
int getToken()
|
||||
int GetToken()
|
||||
{
|
||||
//skip whitespace
|
||||
while (isspace(LastChar))
|
||||
LastChar = readChar();
|
||||
LastChar = ReadChar();
|
||||
|
||||
//identifier/keyword
|
||||
if (isalpha(LastChar)) //[a-zA-Z]
|
||||
if (isalpha(LastChar) || LastChar == '_') //[a-zA-Z_]
|
||||
{
|
||||
IdentifierStr = LastChar;
|
||||
while (isalnum(LastChar = readChar())) //[0-9a-zA-Z]
|
||||
LastChar = ReadChar();
|
||||
while (isalnum(LastChar) || LastChar == '_') //[0-9a-zA-Z_]
|
||||
{
|
||||
IdentifierStr += LastChar;
|
||||
LastChar = ReadChar();
|
||||
}
|
||||
|
||||
//keywords
|
||||
auto found = KeywordMap.find(IdentifierStr);
|
||||
|
@ -112,18 +148,13 @@ int getToken()
|
|||
return tok_identifier;
|
||||
}
|
||||
|
||||
//(hex) numbers
|
||||
if (isdigit(LastChar)) //[0-9]
|
||||
//hex numbers
|
||||
if (LastChar == '0' && PeekChar() == 'x') //0x
|
||||
{
|
||||
string NumStr;
|
||||
NumStr = LastChar;
|
||||
LastChar = readChar(); //this might not be a digit
|
||||
ReadChar(); //consume the 'x'
|
||||
|
||||
//hexadecimal numbers
|
||||
if (NumStr[0] == '0' && LastChar == 'x') //0x
|
||||
{
|
||||
NumStr = "";
|
||||
while (isxdigit(LastChar = readChar())) //[0-9a-fA-F]*
|
||||
while (isxdigit(LastChar = ReadChar())) //[0-9a-fA-F]*
|
||||
NumStr += LastChar;
|
||||
|
||||
if (!NumStr.length()) //check for error condition
|
||||
|
@ -133,13 +164,13 @@ int getToken()
|
|||
return ReportError("sscanf_s failed on hexadecimal number");
|
||||
return tok_number;
|
||||
}
|
||||
|
||||
//decimal numbers
|
||||
while (isdigit(LastChar)) //[0-9]*
|
||||
if (isdigit(LastChar)) //[0-9]
|
||||
{
|
||||
string NumStr;
|
||||
NumStr = LastChar;
|
||||
|
||||
while (isdigit(LastChar = ReadChar())) //[0-9]*
|
||||
NumStr += LastChar;
|
||||
LastChar = readChar();
|
||||
}
|
||||
|
||||
if (sscanf_s(NumStr.c_str(), "%llu", &NumberVal) != 1)
|
||||
return ReportError("sscanf_s failed on decimal number");
|
||||
|
@ -147,23 +178,19 @@ int getToken()
|
|||
}
|
||||
|
||||
//comments
|
||||
if (LastChar == '/')
|
||||
{
|
||||
LastChar = readChar();
|
||||
|
||||
//line comment
|
||||
if (LastChar == '/')
|
||||
if (LastChar == '/' && PeekChar() == '/') //line comment
|
||||
{
|
||||
do
|
||||
{
|
||||
LastChar = readChar();
|
||||
LastChar = ReadChar();
|
||||
} while (LastChar != EOF && LastChar != '\n');
|
||||
|
||||
if (LastChar == '\n')
|
||||
return getToken(); //interpret the next line
|
||||
return GetToken(); //interpret the next line
|
||||
}
|
||||
else
|
||||
return ReportError("invalid comment");
|
||||
else if (LastChar == '/' && PeekChar() == '*') //block comment
|
||||
{
|
||||
//TODO: implement this
|
||||
}
|
||||
|
||||
//end of file
|
||||
|
@ -172,39 +199,83 @@ int getToken()
|
|||
|
||||
//unknown character
|
||||
auto ThisChar = LastChar;
|
||||
LastChar = readChar();
|
||||
LastChar = ReadChar();
|
||||
return ThisChar;
|
||||
}
|
||||
|
||||
bool ReadInputFile(const char* filename)
|
||||
bool ReadInputFile(const string & filename)
|
||||
{
|
||||
return FileHelper::ReadAllText(filename, Input);
|
||||
}
|
||||
|
||||
void testLex()
|
||||
void TestLex(function<void(const string & line)> lexEnum)
|
||||
{
|
||||
int tok;
|
||||
do
|
||||
{
|
||||
tok = getToken();
|
||||
puts(tokString(tok).c_str());
|
||||
tok = GetToken();
|
||||
lexEnum(TokString(tok));
|
||||
} while (tok != tok_eof && tok != tok_error);
|
||||
}
|
||||
};
|
||||
|
||||
void test()
|
||||
bool TestLexer(const string & filename)
|
||||
{
|
||||
if (!ReadInputFile("test.bt"))
|
||||
Lexer lexer;
|
||||
if (!lexer.ReadInputFile("tests\\" + filename))
|
||||
{
|
||||
puts("failed to read input file");
|
||||
return;
|
||||
printf("failed to read \"%s\"\n", filename.c_str());
|
||||
return false;
|
||||
}
|
||||
setup();
|
||||
testLex();
|
||||
string expected;
|
||||
if (!FileHelper::ReadAllText(filename + ".lextest", expected)) //don't fail tests that we didn't specify yet
|
||||
return true;
|
||||
StringUtils::ReplaceAll(expected, "\r\n", "\n");
|
||||
expected = StringUtils::Trim(expected);
|
||||
string actual;
|
||||
lexer.TestLex([&](const string & line)
|
||||
{
|
||||
actual += line + "\n";
|
||||
});
|
||||
actual = StringUtils::Trim(actual);
|
||||
if (expected == actual)
|
||||
{
|
||||
printf("lexer test for \"%s\" success!\n", filename.c_str());
|
||||
return true;
|
||||
}
|
||||
printf("lexer test for \"%s\" failed\n", filename.c_str());
|
||||
FileHelper::WriteAllText("expected.out", expected);
|
||||
FileHelper::WriteAllText("actual.out", actual);
|
||||
return false;
|
||||
}
|
||||
|
||||
void RunLexerTests()
|
||||
{
|
||||
for (auto file : testFiles)
|
||||
TestLexer(file);
|
||||
}
|
||||
|
||||
bool DebugLexer(const string & filename)
|
||||
{
|
||||
printf("Debugging \"%s\"\n", filename.c_str());
|
||||
Lexer lexer;
|
||||
if (!lexer.ReadInputFile("tests\\" + filename))
|
||||
{
|
||||
printf("failed to read \"%s\"\n", filename.c_str());
|
||||
return false;
|
||||
}
|
||||
lexer.TestLex([](const string & line)
|
||||
{
|
||||
puts(line.c_str());
|
||||
});
|
||||
puts("");
|
||||
return true;
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
test();
|
||||
DebugLexer(testFiles[1]);
|
||||
RunLexerTests();
|
||||
system("pause");
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,95 @@
|
|||
static const char* testFiles[] =
|
||||
{
|
||||
"test.bt",
|
||||
"CDATemplate.bt",
|
||||
"NetflowVersion5.bt",
|
||||
"SHXTemplate.bt",
|
||||
"WinhexPosTemplate.bt",
|
||||
"Mifare1kTemplate.bt",
|
||||
"PALTemplate.bt",
|
||||
"GocleverTemplate.bt",
|
||||
"OGGTemplate.bt",
|
||||
"STLTemplate.bt",
|
||||
"SinclairMicrodriveImage.bt",
|
||||
"RDBTemplate.bt",
|
||||
"DBFTemplate.bt",
|
||||
"Mifare4kTemplate.bt",
|
||||
"GPTTemplate.bt",
|
||||
"SSPTemplate.bt",
|
||||
"SHPTemplate.bt",
|
||||
"SRecTemplate.bt",
|
||||
"FLVTemplate.bt",
|
||||
"LUKSTemplate.bt",
|
||||
"PCXTemplate.bt",
|
||||
"UTMPTemplate.bt",
|
||||
"ElTorito.bt",
|
||||
"DMPTemplate.bt",
|
||||
"OscarItemTemplate.bt",
|
||||
"EOTTemplate.bt",
|
||||
"ISOTemplate.bt",
|
||||
"CLASSTemplate2.bt",
|
||||
"EVSBTemplate.bt",
|
||||
"BMPTemplate.bt",
|
||||
"TGATemplate.bt",
|
||||
"TOCTemplate.bt",
|
||||
"CABTemplate.bt",
|
||||
"RIFFTemplate.bt",
|
||||
"AndroidManifestTemplate.bt",
|
||||
"InspectorWithMP4DateTime.bt",
|
||||
"FAT16Template.bt",
|
||||
"PNGTemplate.bt",
|
||||
"ICOTemplate.bt",
|
||||
"RegistryPolicyFileTemplate.bt",
|
||||
"VHDTemplate.bt",
|
||||
"ISOBMFTemplate.bt",
|
||||
"PCAPTemplate.bt",
|
||||
"AVITemplate.bt",
|
||||
"ZIPTemplate.bt",
|
||||
"CRXTemplate.bt",
|
||||
"MIDITemplate.bt",
|
||||
"GZipTemplate.bt",
|
||||
"GIFTemplate.bt",
|
||||
"InspectorDates.bt",
|
||||
"WAVTemplate.bt",
|
||||
"RegistryHive.bt",
|
||||
"EMFTemplate.bt",
|
||||
"ROMFS.bt",
|
||||
"OrCad3.20a_SCH.bt",
|
||||
"MP4Template.bt",
|
||||
"CLASSTemplate.bt",
|
||||
"WMFTemplate.bt",
|
||||
"LNKTemplate.bt",
|
||||
"OrCAD3.20a_LIB.bt",
|
||||
"PSFTemplate.bt",
|
||||
"RARTemplate.bt",
|
||||
"PYCTemplate.bt",
|
||||
"EXETemplate.bt",
|
||||
"PNG12Template.bt",
|
||||
"TacxTemplate.bt",
|
||||
"MFTRecord.bt",
|
||||
"MP3Template.bt",
|
||||
"MBRTemplate.bt",
|
||||
"WAVTemplateAdv.bt",
|
||||
"PDFTemplate.bt",
|
||||
"EXETemplate2.bt",
|
||||
"RESTemplate.bt",
|
||||
"ZIPTemplateAdv.bt",
|
||||
"SF2Template.bt",
|
||||
"MOBITemplate.bt",
|
||||
"MBRTemplateFAT.bt",
|
||||
"exFATTemplate.bt",
|
||||
"ELFTemplate.new.bt",
|
||||
"ELFTemplate.bt",
|
||||
"MachOTemplate.bt",
|
||||
"PETemplate.bt",
|
||||
"EDIDTemplate.bt",
|
||||
"GeoTIFTemplate.bt",
|
||||
"CLASSTemplate3.bt",
|
||||
"CAPTemplate.bt",
|
||||
"TIFTemplate.bt",
|
||||
"TTFTemplate.bt",
|
||||
"JPGTemplate.bt",
|
||||
"DEXTemplate.bt",
|
||||
"DEXTemplate.new.bt",
|
||||
"SWFTemplate.bt",
|
||||
};
|
|
@ -0,0 +1,53 @@
|
|||
tok_struct
|
||||
tok_identifier "DBZ"
|
||||
{
|
||||
tok_struct
|
||||
tok_identifier "HEADER"
|
||||
{
|
||||
tok_char
|
||||
tok_identifier "magic"
|
||||
[
|
||||
tok_number 4 (0x4)
|
||||
]
|
||||
;
|
||||
tok_unsigned
|
||||
tok_int
|
||||
tok_identifier "size"
|
||||
;
|
||||
tok_unsigned
|
||||
tok_int
|
||||
tok_identifier "dataStart"
|
||||
;
|
||||
tok_unsigned
|
||||
tok_int
|
||||
tok_identifier "numEntries"
|
||||
;
|
||||
}
|
||||
tok_identifier "header"
|
||||
;
|
||||
tok_char
|
||||
tok_identifier "empty"
|
||||
[
|
||||
tok_identifier "header"
|
||||
.
|
||||
tok_identifier "size"
|
||||
-
|
||||
tok_sizeof
|
||||
(
|
||||
tok_identifier "HEADER"
|
||||
)
|
||||
]
|
||||
;
|
||||
tok_unsigned
|
||||
tok_int
|
||||
tok_identifier "entryOffsets"
|
||||
[
|
||||
tok_identifier "header"
|
||||
.
|
||||
tok_identifier "numEntries"
|
||||
]
|
||||
;
|
||||
}
|
||||
tok_identifier "dbz"
|
||||
;
|
||||
tok_eof
|
Loading…
Reference in New Issue