Integrate x64dbg's hacky parser for types

This commit is contained in:
Duncan Ogilvie 2023-02-05 18:53:45 +01:00
parent fe6952a1e8
commit c7ad8a7f7f
9 changed files with 1285 additions and 19 deletions

View File

@ -23,6 +23,8 @@
<ClCompile Include="main.cpp" /> <ClCompile Include="main.cpp" />
<ClCompile Include="parser.cpp" /> <ClCompile Include="parser.cpp" />
<ClCompile Include="preprocessor.cpp" /> <ClCompile Include="preprocessor.cpp" />
<ClCompile Include="types.cpp" />
<ClCompile Include="typesparser.cpp" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ClInclude Include="ast.h" /> <ClInclude Include="ast.h" />
@ -32,6 +34,7 @@
<ClInclude Include="operators.h" /> <ClInclude Include="operators.h" />
<ClInclude Include="preprocessor.h" /> <ClInclude Include="preprocessor.h" />
<ClInclude Include="testfiles.h" /> <ClInclude Include="testfiles.h" />
<ClInclude Include="types.h" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<None Include="parser.h" /> <None Include="parser.h" />

View File

@ -27,6 +27,12 @@
<ClCompile Include="preprocessor.cpp"> <ClCompile Include="preprocessor.cpp">
<Filter>Source Files</Filter> <Filter>Source Files</Filter>
</ClCompile> </ClCompile>
<ClCompile Include="types.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="typesparser.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ClInclude Include="testfiles.h"> <ClInclude Include="testfiles.h">
@ -50,6 +56,9 @@
<ClInclude Include="preprocessor.h"> <ClInclude Include="preprocessor.h">
<Filter>Header Files</Filter> <Filter>Header Files</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="types.h">
<Filter>Header Files</Filter>
</ClInclude>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<None Include="parser.h"> <None Include="parser.h">

View File

@ -7,7 +7,7 @@
namespace StringUtils namespace StringUtils
{ {
static std::string sprintf(const char* format, ...) inline std::string sprintf(const char* format, ...)
{ {
va_list args; va_list args;
va_start(args, format); va_start(args, format);
@ -27,7 +27,7 @@ namespace StringUtils
return std::string(buffer.data()); return std::string(buffer.data());
} }
static std::string Escape(const std::string & s) inline std::string Escape(const std::string & s)
{ {
auto escape = [](unsigned char ch) -> std::string auto escape = [](unsigned char ch) -> std::string
{ {
@ -65,7 +65,7 @@ namespace StringUtils
return escaped; return escaped;
} }
static std::string Utf16ToUtf8(const std::wstring & wstr) inline std::string Utf16ToUtf8(const std::wstring & wstr)
{ {
std::string convertedString; std::string convertedString;
auto requiredSize = WideCharToMultiByte(CP_UTF8, 0, wstr.c_str(), -1, nullptr, 0, nullptr, nullptr); auto requiredSize = WideCharToMultiByte(CP_UTF8, 0, wstr.c_str(), -1, nullptr, 0, nullptr, nullptr);
@ -78,7 +78,7 @@ namespace StringUtils
return convertedString; return convertedString;
} }
static std::wstring Utf8ToUtf16(const std::string & str) inline std::wstring Utf8ToUtf16(const std::string & str)
{ {
std::wstring convertedString; std::wstring convertedString;
int requiredSize = MultiByteToWideChar(CP_UTF8, 0, str.c_str(), -1, nullptr, 0); int requiredSize = MultiByteToWideChar(CP_UTF8, 0, str.c_str(), -1, nullptr, 0);
@ -90,11 +90,38 @@ namespace StringUtils
} }
return convertedString; return convertedString;
} }
inline void Split(const std::string& s, char delim, std::vector<std::string>& elems)
{
elems.clear();
std::string item;
item.reserve(s.length());
for (size_t i = 0; i < s.length(); i++)
{
if (s[i] == delim)
{
if (!item.empty())
elems.push_back(item);
item.clear();
}
else
item.push_back(s[i]);
}
if (!item.empty())
elems.push_back(std::move(item));
}
inline std::vector<std::string> Split(const std::string& s, char delim)
{
std::vector<std::string> elems;
Split(s, delim, elems);
return elems;
}
}; };
namespace FileHelper namespace FileHelper
{ {
static bool ReadAllData(const std::string & fileName, std::vector<uint8_t> & content) inline bool ReadAllData(const std::string & fileName, std::vector<uint8_t> & content)
{ {
auto hFile = CreateFileW(StringUtils::Utf8ToUtf16(fileName).c_str(), GENERIC_READ, FILE_SHARE_READ, nullptr, OPEN_EXISTING, 0, nullptr); auto hFile = CreateFileW(StringUtils::Utf8ToUtf16(fileName).c_str(), GENERIC_READ, FILE_SHARE_READ, nullptr, OPEN_EXISTING, 0, nullptr);
auto result = false; auto result = false;
@ -117,7 +144,7 @@ namespace FileHelper
return result; return result;
} }
static bool WriteAllData(const std::string & fileName, const void* data, size_t size) inline bool WriteAllData(const std::string & fileName, const void* data, size_t size)
{ {
auto hFile = CreateFileW(StringUtils::Utf8ToUtf16(fileName).c_str(), GENERIC_WRITE, 0, nullptr, CREATE_ALWAYS, 0, nullptr); auto hFile = CreateFileW(StringUtils::Utf8ToUtf16(fileName).c_str(), GENERIC_WRITE, 0, nullptr, CREATE_ALWAYS, 0, nullptr);
auto result = false; auto result = false;
@ -130,7 +157,7 @@ namespace FileHelper
return result; return result;
} }
static bool ReadAllText(const std::string & fileName, std::string & content) inline bool ReadAllText(const std::string & fileName, std::string & content)
{ {
std::vector<unsigned char> data; std::vector<unsigned char> data;
if(!ReadAllData(fileName, data)) if(!ReadAllData(fileName, data))
@ -140,7 +167,7 @@ namespace FileHelper
return true; return true;
} }
static bool WriteAllText(const std::string & fileName, const std::string & content) inline bool WriteAllText(const std::string & fileName, const std::string & content)
{ {
return WriteAllData(fileName, content.c_str(), content.length()); return WriteAllData(fileName, content.c_str(), content.length());
} }

View File

@ -50,7 +50,10 @@ bool Lexer::DoLexing(std::vector<TokenState> & tokens, std::string & error)
{ {
while(true) while(true)
{ {
auto token = getToken(); size_t lineIndex = -1;
auto token = getToken(lineIndex);
if (lineIndex == -1)
__debugbreak();
mState.Token = token; mState.Token = token;
if(token == tok_error) if(token == tok_error)
{ {
@ -58,6 +61,8 @@ bool Lexer::DoLexing(std::vector<TokenState> & tokens, std::string & error)
return false; return false;
} }
tokens.push_back(mState); tokens.push_back(mState);
// Restore the line index from when we started parsing the token
tokens.back().LineIndex = lineIndex;
mState.Clear(); mState.Clear();
if(token == tok_eof) if(token == tok_eof)
break; break;
@ -76,7 +81,8 @@ bool Lexer::Test(const std::function<void(const std::string & line)> & lexEnum,
char newlineText[128] = ""; char newlineText[128] = "";
do do
{ {
tok = getToken(); size_t lineIndex = -1;
tok = getToken(lineIndex);
if(!output) if(!output)
continue; continue;
toks.clear(); toks.clear();
@ -97,7 +103,7 @@ bool Lexer::Test(const std::function<void(const std::string & line)> & lexEnum,
return tok != tok_error; return tok != tok_error;
} }
Lexer::Token Lexer::getToken() Lexer::Token Lexer::getToken(size_t & tokenLineIndex)
{ {
//skip whitespace //skip whitespace
while(isspace(mLastChar)) while(isspace(mLastChar))
@ -111,12 +117,13 @@ Lexer::Token Lexer::getToken()
if(mLastChar == '\\' && (peekChar() == '\r' || peekChar() == '\n')) if(mLastChar == '\\' && (peekChar() == '\r' || peekChar() == '\n'))
{ {
nextChar(); nextChar();
return getToken(); return getToken(tokenLineIndex);
} }
//character literal //character literal
if(mLastChar == '\'') if(mLastChar == '\'')
{ {
tokenLineIndex = mState.LineIndex - 1;
std::string charLit; std::string charLit;
while(true) while(true)
{ {
@ -186,6 +193,7 @@ Lexer::Token Lexer::getToken()
//string literal //string literal
if(mLastChar == '\"') if(mLastChar == '\"')
{ {
tokenLineIndex = mState.LineIndex - 1;
mState.StringLit.clear(); mState.StringLit.clear();
while(true) while(true)
{ {
@ -252,6 +260,7 @@ Lexer::Token Lexer::getToken()
//identifier/keyword //identifier/keyword
if(isalpha(mLastChar) || mLastChar == '_') //[a-zA-Z_] if(isalpha(mLastChar) || mLastChar == '_') //[a-zA-Z_]
{ {
tokenLineIndex = mState.LineIndex - 1;
mState.IdentifierStr = mLastChar; mState.IdentifierStr = mLastChar;
nextChar(); nextChar();
while(isalnum(mLastChar) || mLastChar == '_') //[0-9a-zA-Z_] while(isalnum(mLastChar) || mLastChar == '_') //[0-9a-zA-Z_]
@ -271,6 +280,7 @@ Lexer::Token Lexer::getToken()
//hex numbers //hex numbers
if(mLastChar == '0' && peekChar() == 'x') //0x if(mLastChar == '0' && peekChar() == 'x') //0x
{ {
tokenLineIndex = mState.LineIndex - 1;
nextChar(); //consume the 'x' nextChar(); //consume the 'x'
mNumStr.clear(); mNumStr.clear();
@ -286,8 +296,10 @@ Lexer::Token Lexer::getToken()
mIsHexNumberVal = true; mIsHexNumberVal = true;
return tok_number; return tok_number;
} }
if(isdigit(mLastChar)) //[0-9] if(isdigit(mLastChar)) //[0-9]
{ {
tokenLineIndex = mState.LineIndex - 1;
mNumStr = mLastChar; mNumStr = mLastChar;
while(isdigit(nextChar())) //[0-9]* while(isdigit(nextChar())) //[0-9]*
@ -311,8 +323,9 @@ Lexer::Token Lexer::getToken()
} }
while(!(mLastChar == EOF || mLastChar == '\n')); while(!(mLastChar == EOF || mLastChar == '\n'));
return getToken(); //interpret the next line return getToken(tokenLineIndex); //interpret the next line
} }
if(mLastChar == '/' && peekChar() == '*') //block comment if(mLastChar == '/' && peekChar() == '*') //block comment
{ {
do do
@ -331,9 +344,11 @@ Lexer::Token Lexer::getToken()
nextChar(); nextChar();
nextChar(); nextChar();
return getToken(); //get the next non-comment token return getToken(tokenLineIndex); //get the next non-comment token
} }
tokenLineIndex = mState.LineIndex - 1;
//operators //operators
auto opFound = mOpTripleMap.find(MAKE_OP_TRIPLE(mLastChar, peekChar(), peekChar(1))); auto opFound = mOpTripleMap.find(MAKE_OP_TRIPLE(mLastChar, peekChar(), peekChar(1)));
if(opFound != mOpTripleMap.end()) if(opFound != mOpTripleMap.end())
@ -359,7 +374,10 @@ Lexer::Token Lexer::getToken()
//end of file //end of file
if(mLastChar == EOF) if(mLastChar == EOF)
{
tokenLineIndex = 0;
return tok_eof; return tok_eof;
}
//unknown character //unknown character
return reportError(StringUtils::sprintf("unexpected character \'%c\'", mLastChar)); return reportError(StringUtils::sprintf("unexpected character \'%c\'", mLastChar));

View File

@ -93,5 +93,5 @@ private:
bool checkString(const std::string & expected); bool checkString(const std::string & expected);
int nextChar(); int nextChar();
void signalNewLine(); void signalNewLine();
Token getToken(); Token getToken(size_t & tokenLineIndex);
}; };

View File

@ -5,6 +5,7 @@
#include "parser.h" #include "parser.h"
#include "helpers.h" #include "helpers.h"
#include "preprocessor.h" #include "preprocessor.h"
#include "types.h"
bool TestLexer(Lexer & lexer, const std::string & filename) bool TestLexer(Lexer & lexer, const std::string & filename)
{ {
@ -106,13 +107,17 @@ bool DebugParser(const std::string & filename)
return false; return false;
} }
Parser parser; FileHelper::WriteAllText("tests\\" + filename + ".pp.h", ppData);
std::string error;
if(!parser.ParseString(ppData, error)) std::vector<std::string> errors;
if (!ParseTypes(ppData, filename, errors))
{ {
printf("ParseFile failed: %s\n", error.c_str()); puts("Failed to parse types:");
for (const auto& error : errors)
puts(error.c_str());
return false; return false;
} }
puts("ParseFile success!"); puts("ParseFile success!");
return true; return true;
} }

717
btparser/types.cpp Normal file
View File

@ -0,0 +1,717 @@
#include "types.h"
#include "helpers.h"
#include <algorithm>
using namespace Types;
static TypeManager typeManager;
#define EXCLUSIVE_ACQUIRE(x)
#define SHARED_ACQUIRE(x)
TypeManager::TypeManager()
{
auto p = [this](const std::string & n, Primitive p, int size)
{
primitivesizes[p] = size;
auto splits = StringUtils::Split(n, ',');
for(const auto & split : splits)
addType("", p, split);
};
p("int8_t,int8,char,byte,bool,signed char", Int8, sizeof(char));
p("uint8_t,uint8,uchar,unsigned char,ubyte", Uint8, sizeof(unsigned char));
p("int16_t,int16,wchar_t,char16_t,short", Int16, sizeof(short));
p("uint16_t,uint16,ushort,unsigned short", Int16, sizeof(unsigned short));
p("int32_t,int32,int,long", Int32, sizeof(int));
p("uint32_t,uint32,unsigned int,unsigned long", Uint32, sizeof(unsigned int));
p("int64_t,int64,long long", Int64, sizeof(long long));
p("uint64_t,uint64,unsigned long long", Uint64, sizeof(unsigned long long));
p("dsint", Dsint, sizeof(void*));
p("duint,size_t", Duint, sizeof(void*));
p("float", Float, sizeof(float));
p("double", Double, sizeof(double));
p("ptr,void*", Pointer, sizeof(void*));
p("char*,const char*", PtrString, sizeof(char*));
p("wchar_t*,const wchar_t*", PtrWString, sizeof(wchar_t*));
}
bool TypeManager::AddType(const std::string & owner, const std::string & type, const std::string & name)
{
if(owner.empty())
return false;
validPtr(type);
auto found = types.find(type);
if(found == types.end())
return false;
return addType(owner, found->second.primitive, name);
}
bool TypeManager::AddStruct(const std::string & owner, const std::string & name)
{
StructUnion s;
s.name = name;
s.owner = owner;
return addStructUnion(s);
}
bool TypeManager::AddUnion(const std::string & owner, const std::string & name)
{
StructUnion u;
u.owner = owner;
u.name = name;
u.isunion = true;
return addStructUnion(u);
}
bool TypeManager::AddMember(const std::string & parent, const std::string & type, const std::string & name, int arrsize, int offset)
{
if(!isDefined(type) && !validPtr(type))
return false;
auto found = structs.find(parent);
if(arrsize < 0 || found == structs.end() || !isDefined(type) || name.empty() || type.empty() || type == parent)
return false;
auto & s = found->second;
for(const auto & member : s.members)
if(member.name == name)
return false;
auto typeSize = Sizeof(type);
if(arrsize)
typeSize *= arrsize;
Member m;
m.name = name;
m.arrsize = arrsize;
m.type = type;
m.offset = offset;
if(offset >= 0) //user-defined offset
{
if(offset < s.size)
return false;
if(offset > s.size)
{
Member pad;
pad.type = "char";
pad.arrsize = offset - s.size;
char padname[32] = "";
sprintf_s(padname, "padding%d", pad.arrsize);
pad.name = padname;
s.members.push_back(pad);
s.size += pad.arrsize;
}
}
s.members.push_back(m);
if(s.isunion)
{
if(typeSize > s.size)
s.size = typeSize;
}
else
{
s.size += typeSize;
}
return true;
}
bool TypeManager::AppendMember(const std::string & type, const std::string & name, int arrsize, int offset)
{
return AddMember(laststruct, type, name, arrsize, offset);
}
bool TypeManager::AddFunction(const std::string & owner, const std::string & name, const std::string & rettype, CallingConvention callconv, bool noreturn)
{
auto found = functions.find(name);
if(found != functions.end() || name.empty() || owner.empty())
return false;
lastfunction = name;
Function f;
f.owner = owner;
f.name = name;
if(rettype != "void" && !isDefined(rettype) && !validPtr(rettype))
return false;
f.rettype = rettype;
f.callconv = callconv;
f.noreturn = noreturn;
functions.insert({f.name, f});
return true;
}
bool TypeManager::AddArg(const std::string & function, const std::string & type, const std::string & name)
{
if(!isDefined(type) && !validPtr(type))
return false;
auto found = functions.find(function);
if(found == functions.end() || function.empty() || name.empty() || !isDefined(type))
return false;
lastfunction = function;
Member arg;
arg.name = name;
arg.type = type;
found->second.args.push_back(arg);
return true;
}
bool TypeManager::AppendArg(const std::string & type, const std::string & name)
{
return AddArg(lastfunction, type, name);
}
int TypeManager::Sizeof(const std::string & type) const
{
auto foundT = types.find(type);
if(foundT != types.end())
return foundT->second.size;
auto foundS = structs.find(type);
if(foundS != structs.end())
return foundS->second.size;
auto foundF = functions.find(type);
if(foundF != functions.end())
{
const auto foundP = primitivesizes.find(Pointer);
if(foundP != primitivesizes.end())
return foundP->second;
return sizeof(void*);
}
return 0;
}
bool TypeManager::Visit(const std::string & type, const std::string & name, Visitor & visitor) const
{
Member m;
m.name = name;
m.type = type;
return visitMember(m, visitor);
}
template<typename K, typename V>
static void filterOwnerMap(std::unordered_map<K, V> & map, const std::string & owner)
{
for(auto i = map.begin(); i != map.end();)
{
auto j = i++;
if(j->second.owner.empty())
continue;
if(owner.empty() || j->second.owner == owner)
map.erase(j);
}
}
void TypeManager::Clear(const std::string & owner)
{
laststruct.clear();
lastfunction.clear();
filterOwnerMap(types, owner);
filterOwnerMap(structs, owner);
filterOwnerMap(functions, owner);
}
template<typename K, typename V>
static bool removeType(std::unordered_map<K, V> & map, const std::string & type)
{
auto found = map.find(type);
if(found == map.end())
return false;
if(found->second.owner.empty())
return false;
map.erase(found);
return true;
}
bool TypeManager::RemoveType(const std::string & type)
{
return removeType(types, type) || removeType(structs, type) || removeType(functions, type);
}
static std::string getKind(const StructUnion & su)
{
return su.isunion ? "union" : "struct";
}
static std::string getKind(const Type & t)
{
return "typedef";
}
static std::string getKind(const Function & f)
{
return "function";
}
template<typename K, typename V>
static void enumType(const std::unordered_map<K, V> & map, std::vector<TypeManager::Summary> & types)
{
for(auto i = map.begin(); i != map.end(); ++i)
{
TypeManager::Summary s;
s.kind = getKind(i->second);
s.name = i->second.name;
s.owner = i->second.owner;
s.size = SizeofType(s.name);
types.push_back(s);
}
}
void TypeManager::Enum(std::vector<Summary> & typeList) const
{
typeList.clear();
enumType(types, typeList);
enumType(structs, typeList);
enumType(functions, typeList);
//nasty hacks to sort in a nice way
std::sort(typeList.begin(), typeList.end(), [](const Summary & a, const Summary & b)
{
auto kindInt = [](const std::string & kind)
{
if(kind == "typedef")
return 0;
if(kind == "struct")
return 1;
if(kind == "union")
return 2;
if(kind == "function")
return 3;
__debugbreak();
return 4;
};
if(a.owner < b.owner)
return true;
else if(a.owner > b.owner)
return false;
auto ka = kindInt(a.kind), kb = kindInt(b.kind);
if(ka < kb)
return true;
else if(ka > kb)
return false;
if(a.name < b.name)
return true;
else if(a.name > b.name)
return false;
return a.size < b.size;
});
}
std::string Types::TypeManager::StructUnionPtrType(const std::string & pointto) const
{
auto itr = structs.find(pointto);
if(itr == structs.end())
return "";
return getKind(itr->second);
}
template<typename K, typename V>
static bool mapContains(const std::unordered_map<K, V> & map, const K & k)
{
return map.find(k) != map.end();
}
bool TypeManager::isDefined(const std::string & id) const
{
return mapContains(types, id) || mapContains(structs, id);
}
bool TypeManager::validPtr(const std::string & id)
{
if(id[id.length() - 1] == '*')
{
auto type = id.substr(0, id.length() - 1);
if(!isDefined(type) && !validPtr(type))
return false;
std::string owner("ptr");
auto foundT = types.find(type);
if(foundT != types.end())
owner = foundT->second.owner;
auto foundS = structs.find(type);
if(foundS != structs.end())
owner = foundS->second.owner;
return addType(owner, Pointer, id, type);
}
return false;
}
bool TypeManager::addStructUnion(const StructUnion & s)
{
laststruct = s.name;
if(s.owner.empty() || s.name.empty() || isDefined(s.name))
return false;
structs.insert({s.name, s});
return true;
}
bool TypeManager::addType(const Type & t)
{
if(t.name.empty() || isDefined(t.name))
return false;
types.insert({t.name, t});
return true;
}
bool TypeManager::addType(const std::string & owner, Primitive primitive, const std::string & name, const std::string & pointto)
{
if(name.empty() || isDefined(name))
return false;
Type t;
t.owner = owner;
t.name = name;
t.primitive = primitive;
t.size = primitivesizes[primitive];
t.pointto = pointto;
return addType(t);
}
bool TypeManager::visitMember(const Member & root, Visitor & visitor) const
{
auto foundT = types.find(root.type);
if(foundT != types.end())
{
const auto & t = foundT->second;
if(!t.pointto.empty())
{
if(!isDefined(t.pointto))
return false;
if(visitor.visitPtr(root, t)) //allow the visitor to bail out
{
if(!Visit(t.pointto, "*" + root.name, visitor))
return false;
return visitor.visitBack(root);
}
return true;
}
return visitor.visitType(root, t);
}
auto foundS = structs.find(root.type);
if(foundS != structs.end())
{
const auto & s = foundS->second;
if(!visitor.visitStructUnion(root, s))
return false;
for(const auto & child : s.members)
{
if(child.arrsize)
{
if(!visitor.visitArray(child))
return false;
for(auto i = 0; i < child.arrsize; i++)
if(!visitMember(child, visitor))
return false;
if(!visitor.visitBack(child))
return false;
}
else if(!visitMember(child, visitor))
return false;
}
return visitor.visitBack(root);
}
return false;
}
bool AddType(const std::string & owner, const std::string & type, const std::string & name)
{
EXCLUSIVE_ACQUIRE(LockTypeManager);
return typeManager.AddType(owner, type, name);
}
bool AddStruct(const std::string & owner, const std::string & name)
{
EXCLUSIVE_ACQUIRE(LockTypeManager);
return typeManager.AddStruct(owner, name);
}
bool AddUnion(const std::string & owner, const std::string & name)
{
EXCLUSIVE_ACQUIRE(LockTypeManager);
return typeManager.AddUnion(owner, name);
}
bool AddMember(const std::string & parent, const std::string & type, const std::string & name, int arrsize, int offset)
{
EXCLUSIVE_ACQUIRE(LockTypeManager);
return typeManager.AddMember(parent, type, name, arrsize, offset);
}
bool AppendMember(const std::string & type, const std::string & name, int arrsize, int offset)
{
EXCLUSIVE_ACQUIRE(LockTypeManager);
return typeManager.AppendMember(type, name, arrsize, offset);
}
bool AddFunction(const std::string & owner, const std::string & name, const std::string & rettype, Types::CallingConvention callconv, bool noreturn)
{
EXCLUSIVE_ACQUIRE(LockTypeManager);
return typeManager.AddFunction(owner, name, rettype, callconv, noreturn);
}
bool AddArg(const std::string & function, const std::string & type, const std::string & name)
{
EXCLUSIVE_ACQUIRE(LockTypeManager);
return typeManager.AddArg(function, type, name);
}
bool AppendArg(const std::string & type, const std::string & name)
{
EXCLUSIVE_ACQUIRE(LockTypeManager);
return typeManager.AppendArg(type, name);
}
int SizeofType(const std::string & type)
{
SHARED_ACQUIRE(LockTypeManager);
return typeManager.Sizeof(type);
}
bool VisitType(const std::string & type, const std::string & name, Types::TypeManager::Visitor & visitor)
{
SHARED_ACQUIRE(LockTypeManager);
return typeManager.Visit(type, name, visitor);
}
void ClearTypes(const std::string & owner)
{
EXCLUSIVE_ACQUIRE(LockTypeManager);
return typeManager.Clear(owner);
}
bool RemoveType(const std::string & type)
{
EXCLUSIVE_ACQUIRE(LockTypeManager);
return typeManager.RemoveType(type);
}
void EnumTypes(std::vector<Types::TypeManager::Summary> & typeList)
{
SHARED_ACQUIRE(LockTypeManager);
return typeManager.Enum(typeList);
}
#if 0
int json_default_int(const JSON object, const char* key, int defaultVal)
{
auto jint = json_object_get(object, key);
if(jint && json_is_integer(jint))
return int(json_integer_value(jint));
return defaultVal;
}
static void loadTypes(const JSON troot, std::vector<Member> & types)
{
if(!troot)
return;
size_t i;
JSON vali;
Member curType;
json_array_foreach(troot, i, vali)
{
auto type = json_string_value(json_object_get(vali, "type"));
auto name = json_string_value(json_object_get(vali, "name"));
if(!type || !*type || !name || !*name)
continue;
curType.type = type;
curType.name = name;
types.push_back(curType);
}
}
static void loadStructUnions(const JSON suroot, bool isunion, std::vector<StructUnion> & structUnions)
{
if(!suroot)
return;
size_t i;
JSON vali;
StructUnion curSu;
curSu.isunion = isunion;
json_array_foreach(suroot, i, vali)
{
auto suname = json_string_value(json_object_get(vali, "name"));
if(!suname || !*suname)
continue;
curSu.name = suname;
curSu.members.clear();
auto members = json_object_get(vali, "members");
size_t j;
JSON valj;
Member curMember;
json_array_foreach(members, j, valj)
{
auto type = json_string_value(json_object_get(valj, "type"));
auto name = json_string_value(json_object_get(valj, "name"));
if(!type || !*type || !name || !*name)
continue;
curMember.type = type;
curMember.name = name;
curMember.arrsize = json_default_int(valj, "arrsize", 0);
curMember.offset = json_default_int(valj, "offset", -1);
curSu.members.push_back(curMember);
}
structUnions.push_back(curSu);
}
}
static void loadFunctions(const JSON froot, std::vector<Function> & functions)
{
if(!froot)
return;
size_t i;
JSON vali;
Function curFunction;
json_array_foreach(froot, i, vali)
{
auto rettype = json_string_value(json_object_get(vali, "rettype"));
auto fname = json_string_value(json_object_get(vali, "name"));
if(!rettype || !*rettype || !fname || !*fname)
continue;
curFunction.rettype = rettype;
curFunction.name = fname;
curFunction.args.clear();
auto callconv = json_string_value(json_object_get(vali, "callconv"));
curFunction.noreturn = json_boolean_value(json_object_get(vali, "noreturn"));
if(scmp(callconv, "cdecl"))
curFunction.callconv = Cdecl;
else if(scmp(callconv, "stdcall"))
curFunction.callconv = Stdcall;
else if(scmp(callconv, "thiscall"))
curFunction.callconv = Thiscall;
else if(scmp(callconv, "delphi"))
curFunction.callconv = Delphi;
else
curFunction.callconv = Cdecl;
auto args = json_object_get(vali, "args");
size_t j;
JSON valj;
Member curArg;
json_array_foreach(args, j, valj)
{
auto type = json_string_value(json_object_get(valj, "type"));
auto name = json_string_value(json_object_get(valj, "name"));
if(!type || !*type || !name || !*name)
continue;
curArg.type = type;
curArg.name = name;
curFunction.args.push_back(curArg);
}
functions.push_back(curFunction);
}
}
#endif
#define dprintf printf
#define QT_TRANSLATE_NOOP(ctx, s) s
void LoadModel(const std::string & owner, Model & model)
{
//Add all base struct/union types first to avoid errors later
for(auto & su : model.structUnions)
{
auto success = su.isunion ? typeManager.AddUnion(owner, su.name) : typeManager.AddStruct(owner, su.name);
if(!success)
{
//TODO properly handle errors
dprintf(QT_TRANSLATE_NOOP("DBG", "Failed to add %s %s;\n"), su.isunion ? "union" : "struct", su.name.c_str());
su.name.clear(); //signal error
}
}
//Add simple typedefs
for(auto & type : model.types)
{
auto success = typeManager.AddType(owner, type.type, type.name);
if(!success)
{
//TODO properly handle errors
dprintf(QT_TRANSLATE_NOOP("DBG", "Failed to add typedef %s %s;\n"), type.type.c_str(), type.name.c_str());
}
}
//Add enums
for (auto& enumm : model.enums)
{
__debugbreak();
}
//Add base function types to avoid errors later
for(auto & function : model.functions)
{
auto success = typeManager.AddFunction(owner, function.name, function.rettype, function.callconv, function.noreturn);
if(!success)
{
//TODO properly handle errors
dprintf(QT_TRANSLATE_NOOP("DBG", "Failed to add function %s %s()\n"), function.rettype.c_str(), function.name.c_str());
function.name.clear(); //signal error
}
}
//Add struct/union members
for(auto & su : model.structUnions)
{
if(su.name.empty()) //skip error-signalled structs/unions
continue;
for(auto & member : su.members)
{
auto success = typeManager.AddMember(su.name, member.type, member.name, member.arrsize, member.offset);
if(!success)
{
//TODO properly handle errors
dprintf(QT_TRANSLATE_NOOP("DBG", "Failed to add member %s %s.%s;\n"), member.type.c_str(), su.name.c_str(), member.name.c_str());
}
}
}
//Add function arguments
for(auto & function : model.functions)
{
if(function.name.empty()) //skip error-signalled functions
continue;
for(auto & arg : function.args)
{
auto success = typeManager.AddArg(function.name, arg.type, arg.name);
if(!success)
{
//TODO properly handle errors
dprintf(QT_TRANSLATE_NOOP("DBG", "Failed to add argument %s %s.%s;\n"), arg.type.c_str(), function.name.c_str(), arg.name.c_str());
}
}
}
}
#if 0
bool LoadTypesJson(const std::string & json, const std::string & owner)
{
EXCLUSIVE_ACQUIRE(LockTypeManager);
auto root = json_loads(json.c_str(), 0, 0);
if(root)
{
Model model;
loadTypes(json_object_get(root, "types"), model.types);
loadTypes(json_object_get(root, ArchValue("types32", "types64")), model.types);
loadStructUnions(json_object_get(root, "structs"), false, model.structUnions);
loadStructUnions(json_object_get(root, ArchValue("structs32", "structs64")), false, model.structUnions);
loadStructUnions(json_object_get(root, "unions"), true, model.structUnions);
loadStructUnions(json_object_get(root, ArchValue("unions32", "unions64")), true, model.structUnions);
loadFunctions(json_object_get(root, "functions"), model.functions);
loadFunctions(json_object_get(root, ArchValue("functions32", "functions64")), model.functions);
LoadModel(owner, model);
// Free root
json_decref(root);
}
else
return false;
return true;
}
bool LoadTypesFile(const std::string & path, const std::string & owner)
{
std::string json;
if(!FileHelper::ReadAllText(path, json))
return false;
return LoadTypesJson(json, owner);
}
#endif
std::string StructUnionPtrType(const std::string & pointto)
{
return typeManager.StructUnionPtrType(pointto);
}

164
btparser/types.h Normal file
View File

@ -0,0 +1,164 @@
#pragma once
#include <string>
#include <vector>
#include <unordered_map>
namespace Types
{
enum Primitive
{
Void,
Int8,
Uint8,
Int16,
Uint16,
Int32,
Uint32,
Int64,
Uint64,
Dsint,
Duint,
Float,
Double,
Pointer,
PtrString, //char* (null-terminated)
PtrWString //wchar_t* (null-terminated)
};
struct Type
{
std::string owner; //Type owner
std::string name; //Type identifier.
std::string pointto; //Type identifier of *Type
Primitive primitive = Void; //Primitive type.
int size = 0; //Size in bytes.
};
struct Member
{
std::string name; //Member identifier
std::string type; //Type.name
int arrsize = 0; //Number of elements if Member is an array
int offset = -1; //Member offset (only stored for reference)
};
struct StructUnion
{
std::string owner; //StructUnion owner
std::string name; //StructUnion identifier
std::vector<Member> members; //StructUnion members
bool isunion = false; //Is this a union?
int size = 0;
};
struct EnumValue
{
std::string name;
uint64_t value = 0;
};
struct Enum
{
std::string owner; // Enum owner
std::string name; // Enum name
std::vector<EnumValue> values; // Enum values
Type type; // Enum value type
};
enum CallingConvention
{
Cdecl,
Stdcall,
Thiscall,
Delphi
};
struct Function
{
std::string owner; //Function owner
std::string name; //Function identifier
std::string rettype; //Function return type
CallingConvention callconv = Cdecl; //Function calling convention
bool noreturn = false; //Function does not return (ExitProcess, _exit)
std::vector<Member> args; //Function arguments
};
struct TypeManager
{
struct Visitor
{
virtual ~Visitor() { }
virtual bool visitType(const Member & member, const Type & type) = 0;
virtual bool visitStructUnion(const Member & member, const StructUnion & type) = 0;
virtual bool visitArray(const Member & member) = 0;
virtual bool visitPtr(const Member & member, const Type & type) = 0;
virtual bool visitBack(const Member & member) = 0;
};
struct Summary
{
std::string kind;
std::string name;
std::string owner;
int size = 0;
};
explicit TypeManager();
bool AddType(const std::string & owner, const std::string & type, const std::string & name);
bool AddStruct(const std::string & owner, const std::string & name);
bool AddUnion(const std::string & owner, const std::string & name);
bool AddMember(const std::string & parent, const std::string & type, const std::string & name, int arrsize = 0, int offset = -1);
bool AppendMember(const std::string & type, const std::string & name, int arrsize = 0, int offset = -1);
bool AddFunction(const std::string & owner, const std::string & name, const std::string & rettype, CallingConvention callconv = Cdecl, bool noreturn = false);
bool AddArg(const std::string & function, const std::string & type, const std::string & name);
bool AppendArg(const std::string & type, const std::string & name);
int Sizeof(const std::string & type) const;
bool Visit(const std::string & type, const std::string & name, Visitor & visitor) const;
void Clear(const std::string & owner = "");
bool RemoveType(const std::string & type);
void Enum(std::vector<Summary> & typeList) const;
std::string StructUnionPtrType(const std::string & pointto) const;
private:
std::unordered_map<Primitive, int> primitivesizes;
std::unordered_map<std::string, Type> types;
std::unordered_map<std::string, StructUnion> structs;
std::unordered_map<std::string, Function> functions;
std::string laststruct;
std::string lastfunction;
bool isDefined(const std::string & id) const;
bool validPtr(const std::string & id);
bool addStructUnion(const StructUnion & s);
bool addType(const std::string & owner, Primitive primitive, const std::string & name, const std::string & pointto = "");
bool addType(const Type & t);
bool visitMember(const Member & root, Visitor & visitor) const;
};
struct Model
{
std::vector<Member> types;
std::vector<Enum> enums;
std::vector<StructUnion> structUnions;
std::vector<Function> functions;
};
};
bool AddType(const std::string & owner, const std::string & type, const std::string & name);
bool AddStruct(const std::string & owner, const std::string & name);
bool AddUnion(const std::string & owner, const std::string & name);
bool AddMember(const std::string & parent, const std::string & type, const std::string & name, int arrsize = 0, int offset = -1);
bool AppendMember(const std::string & type, const std::string & name, int arrsize = 0, int offset = -1);
bool AddFunction(const std::string & owner, const std::string & name, const std::string & rettype, Types::CallingConvention callconv = Types::Cdecl, bool noreturn = false);
bool AddArg(const std::string & function, const std::string & type, const std::string & name);
bool AppendArg(const std::string & type, const std::string & name);
int SizeofType(const std::string & type);
bool VisitType(const std::string & type, const std::string & name, Types::TypeManager::Visitor & visitor);
void ClearTypes(const std::string & owner = "");
bool RemoveType(const std::string & type);
void EnumTypes(std::vector<Types::TypeManager::Summary> & typeList);
bool LoadTypesJson(const std::string & json, const std::string & owner);
bool LoadTypesFile(const std::string & path, const std::string & owner);
bool ParseTypes(const std::string & parse, const std::string & owner, std::vector<std::string> & errors);
std::string StructUnionPtrType(const std::string & pointto);

323
btparser/typesparser.cpp Normal file
View File

@ -0,0 +1,323 @@
#include "types.h"
#include "helpers.h"
using namespace Types;
#include "lexer.h"
void LoadModel(const std::string& owner, Model& model);
bool ParseTypes(const std::string& parse, const std::string& owner, std::vector<std::string>& errors)
{
Lexer lexer;
lexer.SetInputData(parse);
std::vector<Lexer::TokenState> tokens;
size_t index = 0;
auto getToken = [&](size_t i) -> Lexer::TokenState&
{
if (index >= tokens.size() - 1)
i = tokens.size() - 1;
return tokens[i];
};
auto curToken = [&]() -> Lexer::TokenState&
{
return getToken(index);
};
auto isToken = [&](Lexer::Token token)
{
return getToken(index).Token == token;
};
auto isTokenList = [&](std::initializer_list<Lexer::Token> il)
{
size_t i = 0;
for (auto l : il)
if (getToken(index + i++).Token != l)
return false;
return true;
};
std::string error;
if (!lexer.DoLexing(tokens, error))
{
errors.push_back(error);
return false;
}
Model model;
auto errLine = [&](const Lexer::TokenState& token, const std::string& message)
{
errors.push_back(StringUtils::sprintf("[line %zu:%zu] %s", token.CurLine + 1, token.LineIndex, message.c_str()));
};
auto eatSemic = [&]()
{
while (curToken().Token == Lexer::tok_semic)
index++;
};
auto parseMember = [&](StructUnion& su)
{
std::vector<Lexer::TokenState> memToks;
while (!isToken(Lexer::tok_semic))
{
if (isToken(Lexer::tok_eof))
{
errLine(curToken(), "unexpected eof in member");
return false;
}
memToks.push_back(curToken());
index++;
}
if (memToks.empty())
{
errLine(curToken(), "unexpected ; in member");
return false;
}
eatSemic();
if (memToks.size() >= 2) //at least type name;
{
Member m;
for (size_t i = 0; i < memToks.size(); i++)
{
const auto& t = memToks[i];
if (t.Token == Lexer::tok_subopen)
{
if (i + 1 >= memToks.size())
{
errLine(memToks.back(), "unexpected end after [");
return false;
}
if (memToks[i + 1].Token != Lexer::tok_number)
{
errLine(memToks[i + 1], "expected number token");
return false;
}
m.arrsize = int(memToks[i + 1].NumberVal);
if (i + 2 >= memToks.size())
{
errLine(memToks.back(), "unexpected end, expected ]");
return false;
}
if (memToks[i + 2].Token != Lexer::tok_subclose)
{
errLine(memToks[i + 2], StringUtils::sprintf("expected ], got %s", lexer.TokString(memToks[i + 2]).c_str()));
return false;
}
if (i + 2 != memToks.size() - 1)
{
errLine(memToks[i + 3], "too many tokens");
return false;
}
break;
}
else if (i + 1 == memToks.size() || memToks[i + 1].Token == Lexer::tok_subopen) //last = name
{
m.name = lexer.TokString(memToks[i]);
}
else if (!t.IsType() &&
t.Token != Lexer::tok_op_mul &&
t.Token != Lexer::tok_identifier &&
t.Token != Lexer::tok_void)
{
errLine(t, StringUtils::sprintf("token %s is not a type...", lexer.TokString(t).c_str()));
return false;
}
else
{
if (!m.type.empty() && t.Token != Lexer::tok_op_mul)
m.type.push_back(' ');
m.type += lexer.TokString(t);
}
}
//dprintf("member: %s %s;\n", m.type.c_str(), m.name.c_str());
su.members.push_back(m);
return true;
}
errLine(memToks.back(), "not enough tokens for member");
return false;
};
auto parseStructUnion = [&]()
{
if (isToken(Lexer::tok_struct) || isToken(Lexer::tok_union))
{
StructUnion su;
su.isunion = isToken(Lexer::tok_union);
index++;
if (isTokenList({ Lexer::tok_identifier, Lexer::tok_bropen }))
{
su.name = lexer.TokString(curToken());
index += 2;
while (!isToken(Lexer::tok_brclose))
{
if (isToken(Lexer::tok_eof))
{
errLine(curToken(), StringUtils::sprintf("unexpected eof in %s", su.isunion ? "union" : "struct"));
return false;
}
if (isToken(Lexer::tok_bropen))
{
errLine(curToken(), "nested blocks are not allowed!");
return false;
}
if (!parseMember(su))
return false;
}
index++; //eat tok_brclose
//dprintf("%s %s, members: %d\n", su.isunion ? "union" : "struct", su.name.c_str(), int(su.members.size()));
model.structUnions.push_back(su);
if (!isToken(Lexer::tok_semic))
{
errLine(curToken(), "expected semicolon!");
return false;
}
eatSemic();
return true;
}
else
{
errLine(curToken(), "invalid struct token sequence!");
return false;
}
}
return true;
};
auto parseEnum = [&]()
{
if (isToken(Lexer::tok_enum))
{
Enum e;
index++;
if (isTokenList({ Lexer::tok_identifier, Lexer::tok_bropen }))
{
e.name = lexer.TokString(curToken());
index += 2;
while (!isToken(Lexer::tok_brclose))
{
if (isToken(Lexer::tok_eof))
{
errLine(curToken(), "unexpected eof in enum");
return false;
}
if (isToken(Lexer::tok_bropen))
{
errLine(curToken(), "nested blocks are not allowed!");
return false;
}
if (!e.values.empty())
{
if (isToken(Lexer::tok_comma))
{
index++;
}
else
{
errLine(curToken(), "expected comma in enum");
return false;
}
}
if (!isToken(Lexer::tok_identifier))
{
errLine(curToken(), StringUtils::sprintf("expected identifier in enum, got '%s'", lexer.TokString(curToken()).c_str()));
return false;
}
EnumValue v;
v.name = lexer.TokString(curToken());
v.value = e.values.empty() ? 0 : e.values.back().value + 1;
e.values.push_back(v);
index++;
}
index++; //eat tok_brclose
model.enums.push_back(e);
if (!isToken(Lexer::tok_semic))
{
errLine(curToken(), "expected semicolon!");
return false;
}
eatSemic();
return true;
}
else
{
errLine(curToken(), "invalid enum token sequence!");
return false;
}
__debugbreak();
}
return true;
};
auto parseTypedef = [&]()
{
// TODO: support "typedef struct"
if (isToken(Lexer::tok_typedef))
{
index++;
std::vector<Lexer::TokenState> tdefToks;
while (!isToken(Lexer::tok_semic))
{
if (isToken(Lexer::tok_eof))
{
errLine(curToken(), "unexpected eof in typedef");
return false;
}
tdefToks.push_back(curToken());
index++;
}
if (tdefToks.empty())
{
errLine(curToken(), "unexpected ; in typedef");
return false;
}
eatSemic();
if (tdefToks.size() >= 2) //at least typedef a b;
{
Member tm;
tm.name = lexer.TokString(tdefToks[tdefToks.size() - 1]);
tdefToks.pop_back();
for (auto& t : tdefToks)
if (!t.IsType() &&
t.Token != Lexer::tok_op_mul &&
t.Token != Lexer::tok_identifier &&
t.Token != Lexer::tok_void)
{
errLine(t, StringUtils::sprintf("token %s is not a type...", lexer.TokString(t).c_str()));
return false;
}
else
{
if (!tm.type.empty() && t.Token != Lexer::tok_op_mul)
tm.type.push_back(' ');
tm.type += lexer.TokString(t);
}
//dprintf("typedef %s:%s\n", tm.type.c_str(), tm.name.c_str());
model.types.push_back(tm);
return true;
}
errLine(tdefToks.back(), "not enough tokens for typedef");
return false;
}
return true;
};
while (!isToken(Lexer::tok_eof))
{
auto curIndex = index;
if (!parseTypedef())
return false;
if (!parseStructUnion())
return false;
if (!parseEnum())
return false;
eatSemic();
if (curIndex == index)
{
errLine(curToken(), StringUtils::sprintf("unexpected token %s", lexer.TokString(curToken()).c_str()));
return false;
}
}
LoadModel(owner, model);
return true;
}