From c7ad8a7f7f2f2915d6e6ebac76a67ad85d1697b0 Mon Sep 17 00:00:00 2001 From: Duncan Ogilvie Date: Sun, 5 Feb 2023 18:53:45 +0100 Subject: [PATCH] Integrate x64dbg's hacky parser for types --- btparser/btparser.vcxproj | 3 + btparser/btparser.vcxproj.filters | 9 + btparser/helpers.h | 43 +- btparser/lexer.cpp | 30 +- btparser/lexer.h | 2 +- btparser/main.cpp | 13 +- btparser/types.cpp | 717 ++++++++++++++++++++++++++++++ btparser/types.h | 164 +++++++ btparser/typesparser.cpp | 323 ++++++++++++++ 9 files changed, 1285 insertions(+), 19 deletions(-) create mode 100644 btparser/types.cpp create mode 100644 btparser/types.h create mode 100644 btparser/typesparser.cpp diff --git a/btparser/btparser.vcxproj b/btparser/btparser.vcxproj index 4d9a011..d8e3e0e 100644 --- a/btparser/btparser.vcxproj +++ b/btparser/btparser.vcxproj @@ -23,6 +23,8 @@ + + @@ -32,6 +34,7 @@ + diff --git a/btparser/btparser.vcxproj.filters b/btparser/btparser.vcxproj.filters index b902403..0fb66b2 100644 --- a/btparser/btparser.vcxproj.filters +++ b/btparser/btparser.vcxproj.filters @@ -27,6 +27,12 @@ Source Files + + Source Files + + + Source Files + @@ -50,6 +56,9 @@ Header Files + + Header Files + diff --git a/btparser/helpers.h b/btparser/helpers.h index 9c9e81b..593006f 100644 --- a/btparser/helpers.h +++ b/btparser/helpers.h @@ -7,7 +7,7 @@ namespace StringUtils { - static std::string sprintf(const char* format, ...) + inline std::string sprintf(const char* format, ...) { va_list args; va_start(args, format); @@ -27,7 +27,7 @@ namespace StringUtils return std::string(buffer.data()); } - static std::string Escape(const std::string & s) + inline std::string Escape(const std::string & s) { auto escape = [](unsigned char ch) -> std::string { @@ -65,7 +65,7 @@ namespace StringUtils return escaped; } - static std::string Utf16ToUtf8(const std::wstring & wstr) + inline std::string Utf16ToUtf8(const std::wstring & wstr) { std::string convertedString; auto requiredSize = WideCharToMultiByte(CP_UTF8, 0, wstr.c_str(), -1, nullptr, 0, nullptr, nullptr); @@ -78,7 +78,7 @@ namespace StringUtils return convertedString; } - static std::wstring Utf8ToUtf16(const std::string & str) + inline std::wstring Utf8ToUtf16(const std::string & str) { std::wstring convertedString; int requiredSize = MultiByteToWideChar(CP_UTF8, 0, str.c_str(), -1, nullptr, 0); @@ -90,11 +90,38 @@ namespace StringUtils } return convertedString; } + + inline void Split(const std::string& s, char delim, std::vector& elems) + { + elems.clear(); + std::string item; + item.reserve(s.length()); + for (size_t i = 0; i < s.length(); i++) + { + if (s[i] == delim) + { + if (!item.empty()) + elems.push_back(item); + item.clear(); + } + else + item.push_back(s[i]); + } + if (!item.empty()) + elems.push_back(std::move(item)); + } + + inline std::vector Split(const std::string& s, char delim) + { + std::vector elems; + Split(s, delim, elems); + return elems; + } }; namespace FileHelper { - static bool ReadAllData(const std::string & fileName, std::vector & content) + inline bool ReadAllData(const std::string & fileName, std::vector & content) { auto hFile = CreateFileW(StringUtils::Utf8ToUtf16(fileName).c_str(), GENERIC_READ, FILE_SHARE_READ, nullptr, OPEN_EXISTING, 0, nullptr); auto result = false; @@ -117,7 +144,7 @@ namespace FileHelper return result; } - static bool WriteAllData(const std::string & fileName, const void* data, size_t size) + inline bool WriteAllData(const std::string & fileName, const void* data, size_t size) { auto hFile = CreateFileW(StringUtils::Utf8ToUtf16(fileName).c_str(), GENERIC_WRITE, 0, nullptr, CREATE_ALWAYS, 0, nullptr); auto result = false; @@ -130,7 +157,7 @@ namespace FileHelper return result; } - static bool ReadAllText(const std::string & fileName, std::string & content) + inline bool ReadAllText(const std::string & fileName, std::string & content) { std::vector data; if(!ReadAllData(fileName, data)) @@ -140,7 +167,7 @@ namespace FileHelper return true; } - static bool WriteAllText(const std::string & fileName, const std::string & content) + inline bool WriteAllText(const std::string & fileName, const std::string & content) { return WriteAllData(fileName, content.c_str(), content.length()); } diff --git a/btparser/lexer.cpp b/btparser/lexer.cpp index 2f06d74..ad8d50a 100644 --- a/btparser/lexer.cpp +++ b/btparser/lexer.cpp @@ -50,7 +50,10 @@ bool Lexer::DoLexing(std::vector & tokens, std::string & error) { while(true) { - auto token = getToken(); + size_t lineIndex = -1; + auto token = getToken(lineIndex); + if (lineIndex == -1) + __debugbreak(); mState.Token = token; if(token == tok_error) { @@ -58,6 +61,8 @@ bool Lexer::DoLexing(std::vector & tokens, std::string & error) return false; } tokens.push_back(mState); + // Restore the line index from when we started parsing the token + tokens.back().LineIndex = lineIndex; mState.Clear(); if(token == tok_eof) break; @@ -76,7 +81,8 @@ bool Lexer::Test(const std::function & lexEnum, char newlineText[128] = ""; do { - tok = getToken(); + size_t lineIndex = -1; + tok = getToken(lineIndex); if(!output) continue; toks.clear(); @@ -97,7 +103,7 @@ bool Lexer::Test(const std::function & lexEnum, return tok != tok_error; } -Lexer::Token Lexer::getToken() +Lexer::Token Lexer::getToken(size_t & tokenLineIndex) { //skip whitespace while(isspace(mLastChar)) @@ -111,12 +117,13 @@ Lexer::Token Lexer::getToken() if(mLastChar == '\\' && (peekChar() == '\r' || peekChar() == '\n')) { nextChar(); - return getToken(); + return getToken(tokenLineIndex); } //character literal if(mLastChar == '\'') { + tokenLineIndex = mState.LineIndex - 1; std::string charLit; while(true) { @@ -186,6 +193,7 @@ Lexer::Token Lexer::getToken() //string literal if(mLastChar == '\"') { + tokenLineIndex = mState.LineIndex - 1; mState.StringLit.clear(); while(true) { @@ -252,6 +260,7 @@ Lexer::Token Lexer::getToken() //identifier/keyword if(isalpha(mLastChar) || mLastChar == '_') //[a-zA-Z_] { + tokenLineIndex = mState.LineIndex - 1; mState.IdentifierStr = mLastChar; nextChar(); while(isalnum(mLastChar) || mLastChar == '_') //[0-9a-zA-Z_] @@ -271,6 +280,7 @@ Lexer::Token Lexer::getToken() //hex numbers if(mLastChar == '0' && peekChar() == 'x') //0x { + tokenLineIndex = mState.LineIndex - 1; nextChar(); //consume the 'x' mNumStr.clear(); @@ -286,8 +296,10 @@ Lexer::Token Lexer::getToken() mIsHexNumberVal = true; return tok_number; } + if(isdigit(mLastChar)) //[0-9] { + tokenLineIndex = mState.LineIndex - 1; mNumStr = mLastChar; while(isdigit(nextChar())) //[0-9]* @@ -311,8 +323,9 @@ Lexer::Token Lexer::getToken() } while(!(mLastChar == EOF || mLastChar == '\n')); - return getToken(); //interpret the next line + return getToken(tokenLineIndex); //interpret the next line } + if(mLastChar == '/' && peekChar() == '*') //block comment { do @@ -331,9 +344,11 @@ Lexer::Token Lexer::getToken() nextChar(); nextChar(); - return getToken(); //get the next non-comment token + return getToken(tokenLineIndex); //get the next non-comment token } + tokenLineIndex = mState.LineIndex - 1; + //operators auto opFound = mOpTripleMap.find(MAKE_OP_TRIPLE(mLastChar, peekChar(), peekChar(1))); if(opFound != mOpTripleMap.end()) @@ -359,7 +374,10 @@ Lexer::Token Lexer::getToken() //end of file if(mLastChar == EOF) + { + tokenLineIndex = 0; return tok_eof; + } //unknown character return reportError(StringUtils::sprintf("unexpected character \'%c\'", mLastChar)); diff --git a/btparser/lexer.h b/btparser/lexer.h index 6ef4098..9db2507 100644 --- a/btparser/lexer.h +++ b/btparser/lexer.h @@ -93,5 +93,5 @@ private: bool checkString(const std::string & expected); int nextChar(); void signalNewLine(); - Token getToken(); + Token getToken(size_t & tokenLineIndex); }; \ No newline at end of file diff --git a/btparser/main.cpp b/btparser/main.cpp index 091fc9e..b5e189d 100644 --- a/btparser/main.cpp +++ b/btparser/main.cpp @@ -5,6 +5,7 @@ #include "parser.h" #include "helpers.h" #include "preprocessor.h" +#include "types.h" bool TestLexer(Lexer & lexer, const std::string & filename) { @@ -106,13 +107,17 @@ bool DebugParser(const std::string & filename) return false; } - Parser parser; - std::string error; - if(!parser.ParseString(ppData, error)) + FileHelper::WriteAllText("tests\\" + filename + ".pp.h", ppData); + + std::vector errors; + if (!ParseTypes(ppData, filename, errors)) { - printf("ParseFile failed: %s\n", error.c_str()); + puts("Failed to parse types:"); + for (const auto& error : errors) + puts(error.c_str()); return false; } + puts("ParseFile success!"); return true; } diff --git a/btparser/types.cpp b/btparser/types.cpp new file mode 100644 index 0000000..a327626 --- /dev/null +++ b/btparser/types.cpp @@ -0,0 +1,717 @@ +#include "types.h" +#include "helpers.h" +#include + +using namespace Types; + +static TypeManager typeManager; + +#define EXCLUSIVE_ACQUIRE(x) +#define SHARED_ACQUIRE(x) + +TypeManager::TypeManager() +{ + auto p = [this](const std::string & n, Primitive p, int size) + { + primitivesizes[p] = size; + auto splits = StringUtils::Split(n, ','); + for(const auto & split : splits) + addType("", p, split); + }; + p("int8_t,int8,char,byte,bool,signed char", Int8, sizeof(char)); + p("uint8_t,uint8,uchar,unsigned char,ubyte", Uint8, sizeof(unsigned char)); + p("int16_t,int16,wchar_t,char16_t,short", Int16, sizeof(short)); + p("uint16_t,uint16,ushort,unsigned short", Int16, sizeof(unsigned short)); + p("int32_t,int32,int,long", Int32, sizeof(int)); + p("uint32_t,uint32,unsigned int,unsigned long", Uint32, sizeof(unsigned int)); + p("int64_t,int64,long long", Int64, sizeof(long long)); + p("uint64_t,uint64,unsigned long long", Uint64, sizeof(unsigned long long)); + p("dsint", Dsint, sizeof(void*)); + p("duint,size_t", Duint, sizeof(void*)); + p("float", Float, sizeof(float)); + p("double", Double, sizeof(double)); + p("ptr,void*", Pointer, sizeof(void*)); + p("char*,const char*", PtrString, sizeof(char*)); + p("wchar_t*,const wchar_t*", PtrWString, sizeof(wchar_t*)); +} + +bool TypeManager::AddType(const std::string & owner, const std::string & type, const std::string & name) +{ + if(owner.empty()) + return false; + validPtr(type); + auto found = types.find(type); + if(found == types.end()) + return false; + return addType(owner, found->second.primitive, name); +} + +bool TypeManager::AddStruct(const std::string & owner, const std::string & name) +{ + StructUnion s; + s.name = name; + s.owner = owner; + return addStructUnion(s); +} + +bool TypeManager::AddUnion(const std::string & owner, const std::string & name) +{ + StructUnion u; + u.owner = owner; + u.name = name; + u.isunion = true; + return addStructUnion(u); +} + +bool TypeManager::AddMember(const std::string & parent, const std::string & type, const std::string & name, int arrsize, int offset) +{ + if(!isDefined(type) && !validPtr(type)) + return false; + auto found = structs.find(parent); + if(arrsize < 0 || found == structs.end() || !isDefined(type) || name.empty() || type.empty() || type == parent) + return false; + auto & s = found->second; + + for(const auto & member : s.members) + if(member.name == name) + return false; + + auto typeSize = Sizeof(type); + if(arrsize) + typeSize *= arrsize; + + Member m; + m.name = name; + m.arrsize = arrsize; + m.type = type; + m.offset = offset; + + if(offset >= 0) //user-defined offset + { + if(offset < s.size) + return false; + if(offset > s.size) + { + Member pad; + pad.type = "char"; + pad.arrsize = offset - s.size; + char padname[32] = ""; + sprintf_s(padname, "padding%d", pad.arrsize); + pad.name = padname; + s.members.push_back(pad); + s.size += pad.arrsize; + } + } + + s.members.push_back(m); + + if(s.isunion) + { + if(typeSize > s.size) + s.size = typeSize; + } + else + { + s.size += typeSize; + } + return true; +} + +bool TypeManager::AppendMember(const std::string & type, const std::string & name, int arrsize, int offset) +{ + return AddMember(laststruct, type, name, arrsize, offset); +} + +bool TypeManager::AddFunction(const std::string & owner, const std::string & name, const std::string & rettype, CallingConvention callconv, bool noreturn) +{ + auto found = functions.find(name); + if(found != functions.end() || name.empty() || owner.empty()) + return false; + lastfunction = name; + Function f; + f.owner = owner; + f.name = name; + if(rettype != "void" && !isDefined(rettype) && !validPtr(rettype)) + return false; + f.rettype = rettype; + f.callconv = callconv; + f.noreturn = noreturn; + functions.insert({f.name, f}); + return true; +} + +bool TypeManager::AddArg(const std::string & function, const std::string & type, const std::string & name) +{ + if(!isDefined(type) && !validPtr(type)) + return false; + auto found = functions.find(function); + if(found == functions.end() || function.empty() || name.empty() || !isDefined(type)) + return false; + lastfunction = function; + Member arg; + arg.name = name; + arg.type = type; + found->second.args.push_back(arg); + return true; +} + +bool TypeManager::AppendArg(const std::string & type, const std::string & name) +{ + return AddArg(lastfunction, type, name); +} + +int TypeManager::Sizeof(const std::string & type) const +{ + auto foundT = types.find(type); + if(foundT != types.end()) + return foundT->second.size; + auto foundS = structs.find(type); + if(foundS != structs.end()) + return foundS->second.size; + auto foundF = functions.find(type); + if(foundF != functions.end()) + { + const auto foundP = primitivesizes.find(Pointer); + if(foundP != primitivesizes.end()) + return foundP->second; + return sizeof(void*); + } + return 0; +} + +bool TypeManager::Visit(const std::string & type, const std::string & name, Visitor & visitor) const +{ + Member m; + m.name = name; + m.type = type; + return visitMember(m, visitor); +} + +template +static void filterOwnerMap(std::unordered_map & map, const std::string & owner) +{ + for(auto i = map.begin(); i != map.end();) + { + auto j = i++; + if(j->second.owner.empty()) + continue; + if(owner.empty() || j->second.owner == owner) + map.erase(j); + } +} + +void TypeManager::Clear(const std::string & owner) +{ + laststruct.clear(); + lastfunction.clear(); + filterOwnerMap(types, owner); + filterOwnerMap(structs, owner); + filterOwnerMap(functions, owner); +} + +template +static bool removeType(std::unordered_map & map, const std::string & type) +{ + auto found = map.find(type); + if(found == map.end()) + return false; + if(found->second.owner.empty()) + return false; + map.erase(found); + return true; +} + +bool TypeManager::RemoveType(const std::string & type) +{ + return removeType(types, type) || removeType(structs, type) || removeType(functions, type); +} + +static std::string getKind(const StructUnion & su) +{ + return su.isunion ? "union" : "struct"; +} + +static std::string getKind(const Type & t) +{ + return "typedef"; +} + +static std::string getKind(const Function & f) +{ + return "function"; +} + +template +static void enumType(const std::unordered_map & map, std::vector & types) +{ + for(auto i = map.begin(); i != map.end(); ++i) + { + TypeManager::Summary s; + s.kind = getKind(i->second); + s.name = i->second.name; + s.owner = i->second.owner; + s.size = SizeofType(s.name); + types.push_back(s); + } +} + +void TypeManager::Enum(std::vector & typeList) const +{ + typeList.clear(); + enumType(types, typeList); + enumType(structs, typeList); + enumType(functions, typeList); + //nasty hacks to sort in a nice way + std::sort(typeList.begin(), typeList.end(), [](const Summary & a, const Summary & b) + { + auto kindInt = [](const std::string & kind) + { + if(kind == "typedef") + return 0; + if(kind == "struct") + return 1; + if(kind == "union") + return 2; + if(kind == "function") + return 3; + __debugbreak(); + return 4; + }; + if(a.owner < b.owner) + return true; + else if(a.owner > b.owner) + return false; + auto ka = kindInt(a.kind), kb = kindInt(b.kind); + if(ka < kb) + return true; + else if(ka > kb) + return false; + if(a.name < b.name) + return true; + else if(a.name > b.name) + return false; + return a.size < b.size; + }); +} + +std::string Types::TypeManager::StructUnionPtrType(const std::string & pointto) const +{ + auto itr = structs.find(pointto); + if(itr == structs.end()) + return ""; + return getKind(itr->second); +} + +template +static bool mapContains(const std::unordered_map & map, const K & k) +{ + return map.find(k) != map.end(); +} + +bool TypeManager::isDefined(const std::string & id) const +{ + return mapContains(types, id) || mapContains(structs, id); +} + +bool TypeManager::validPtr(const std::string & id) +{ + if(id[id.length() - 1] == '*') + { + auto type = id.substr(0, id.length() - 1); + if(!isDefined(type) && !validPtr(type)) + return false; + std::string owner("ptr"); + auto foundT = types.find(type); + if(foundT != types.end()) + owner = foundT->second.owner; + auto foundS = structs.find(type); + if(foundS != structs.end()) + owner = foundS->second.owner; + return addType(owner, Pointer, id, type); + } + return false; +} + +bool TypeManager::addStructUnion(const StructUnion & s) +{ + laststruct = s.name; + if(s.owner.empty() || s.name.empty() || isDefined(s.name)) + return false; + structs.insert({s.name, s}); + return true; +} + +bool TypeManager::addType(const Type & t) +{ + if(t.name.empty() || isDefined(t.name)) + return false; + types.insert({t.name, t}); + return true; +} + +bool TypeManager::addType(const std::string & owner, Primitive primitive, const std::string & name, const std::string & pointto) +{ + if(name.empty() || isDefined(name)) + return false; + Type t; + t.owner = owner; + t.name = name; + t.primitive = primitive; + t.size = primitivesizes[primitive]; + t.pointto = pointto; + return addType(t); +} + +bool TypeManager::visitMember(const Member & root, Visitor & visitor) const +{ + auto foundT = types.find(root.type); + if(foundT != types.end()) + { + const auto & t = foundT->second; + if(!t.pointto.empty()) + { + if(!isDefined(t.pointto)) + return false; + if(visitor.visitPtr(root, t)) //allow the visitor to bail out + { + if(!Visit(t.pointto, "*" + root.name, visitor)) + return false; + return visitor.visitBack(root); + } + return true; + } + return visitor.visitType(root, t); + } + auto foundS = structs.find(root.type); + if(foundS != structs.end()) + { + const auto & s = foundS->second; + if(!visitor.visitStructUnion(root, s)) + return false; + for(const auto & child : s.members) + { + if(child.arrsize) + { + if(!visitor.visitArray(child)) + return false; + for(auto i = 0; i < child.arrsize; i++) + if(!visitMember(child, visitor)) + return false; + if(!visitor.visitBack(child)) + return false; + } + else if(!visitMember(child, visitor)) + return false; + } + return visitor.visitBack(root); + } + return false; +} + +bool AddType(const std::string & owner, const std::string & type, const std::string & name) +{ + EXCLUSIVE_ACQUIRE(LockTypeManager); + return typeManager.AddType(owner, type, name); +} + +bool AddStruct(const std::string & owner, const std::string & name) +{ + EXCLUSIVE_ACQUIRE(LockTypeManager); + return typeManager.AddStruct(owner, name); +} + +bool AddUnion(const std::string & owner, const std::string & name) +{ + EXCLUSIVE_ACQUIRE(LockTypeManager); + return typeManager.AddUnion(owner, name); +} + +bool AddMember(const std::string & parent, const std::string & type, const std::string & name, int arrsize, int offset) +{ + EXCLUSIVE_ACQUIRE(LockTypeManager); + return typeManager.AddMember(parent, type, name, arrsize, offset); +} + +bool AppendMember(const std::string & type, const std::string & name, int arrsize, int offset) +{ + EXCLUSIVE_ACQUIRE(LockTypeManager); + return typeManager.AppendMember(type, name, arrsize, offset); +} + +bool AddFunction(const std::string & owner, const std::string & name, const std::string & rettype, Types::CallingConvention callconv, bool noreturn) +{ + EXCLUSIVE_ACQUIRE(LockTypeManager); + return typeManager.AddFunction(owner, name, rettype, callconv, noreturn); +} + +bool AddArg(const std::string & function, const std::string & type, const std::string & name) +{ + EXCLUSIVE_ACQUIRE(LockTypeManager); + return typeManager.AddArg(function, type, name); +} + +bool AppendArg(const std::string & type, const std::string & name) +{ + EXCLUSIVE_ACQUIRE(LockTypeManager); + return typeManager.AppendArg(type, name); +} + +int SizeofType(const std::string & type) +{ + SHARED_ACQUIRE(LockTypeManager); + return typeManager.Sizeof(type); +} + +bool VisitType(const std::string & type, const std::string & name, Types::TypeManager::Visitor & visitor) +{ + SHARED_ACQUIRE(LockTypeManager); + return typeManager.Visit(type, name, visitor); +} + +void ClearTypes(const std::string & owner) +{ + EXCLUSIVE_ACQUIRE(LockTypeManager); + return typeManager.Clear(owner); +} + +bool RemoveType(const std::string & type) +{ + EXCLUSIVE_ACQUIRE(LockTypeManager); + return typeManager.RemoveType(type); +} + +void EnumTypes(std::vector & typeList) +{ + SHARED_ACQUIRE(LockTypeManager); + return typeManager.Enum(typeList); +} + +#if 0 +int json_default_int(const JSON object, const char* key, int defaultVal) +{ + auto jint = json_object_get(object, key); + if(jint && json_is_integer(jint)) + return int(json_integer_value(jint)); + return defaultVal; +} + +static void loadTypes(const JSON troot, std::vector & types) +{ + if(!troot) + return; + size_t i; + JSON vali; + Member curType; + json_array_foreach(troot, i, vali) + { + auto type = json_string_value(json_object_get(vali, "type")); + auto name = json_string_value(json_object_get(vali, "name")); + if(!type || !*type || !name || !*name) + continue; + curType.type = type; + curType.name = name; + types.push_back(curType); + } +} + +static void loadStructUnions(const JSON suroot, bool isunion, std::vector & structUnions) +{ + if(!suroot) + return; + size_t i; + JSON vali; + StructUnion curSu; + curSu.isunion = isunion; + json_array_foreach(suroot, i, vali) + { + auto suname = json_string_value(json_object_get(vali, "name")); + if(!suname || !*suname) + continue; + curSu.name = suname; + curSu.members.clear(); + auto members = json_object_get(vali, "members"); + size_t j; + JSON valj; + Member curMember; + json_array_foreach(members, j, valj) + { + auto type = json_string_value(json_object_get(valj, "type")); + auto name = json_string_value(json_object_get(valj, "name")); + if(!type || !*type || !name || !*name) + continue; + curMember.type = type; + curMember.name = name; + curMember.arrsize = json_default_int(valj, "arrsize", 0); + curMember.offset = json_default_int(valj, "offset", -1); + curSu.members.push_back(curMember); + } + structUnions.push_back(curSu); + } +} + +static void loadFunctions(const JSON froot, std::vector & functions) +{ + if(!froot) + return; + size_t i; + JSON vali; + Function curFunction; + json_array_foreach(froot, i, vali) + { + auto rettype = json_string_value(json_object_get(vali, "rettype")); + auto fname = json_string_value(json_object_get(vali, "name")); + if(!rettype || !*rettype || !fname || !*fname) + continue; + curFunction.rettype = rettype; + curFunction.name = fname; + curFunction.args.clear(); + auto callconv = json_string_value(json_object_get(vali, "callconv")); + curFunction.noreturn = json_boolean_value(json_object_get(vali, "noreturn")); + if(scmp(callconv, "cdecl")) + curFunction.callconv = Cdecl; + else if(scmp(callconv, "stdcall")) + curFunction.callconv = Stdcall; + else if(scmp(callconv, "thiscall")) + curFunction.callconv = Thiscall; + else if(scmp(callconv, "delphi")) + curFunction.callconv = Delphi; + else + curFunction.callconv = Cdecl; + auto args = json_object_get(vali, "args"); + size_t j; + JSON valj; + Member curArg; + json_array_foreach(args, j, valj) + { + auto type = json_string_value(json_object_get(valj, "type")); + auto name = json_string_value(json_object_get(valj, "name")); + if(!type || !*type || !name || !*name) + continue; + curArg.type = type; + curArg.name = name; + curFunction.args.push_back(curArg); + } + functions.push_back(curFunction); + } +} +#endif + +#define dprintf printf +#define QT_TRANSLATE_NOOP(ctx, s) s + +void LoadModel(const std::string & owner, Model & model) +{ + //Add all base struct/union types first to avoid errors later + for(auto & su : model.structUnions) + { + auto success = su.isunion ? typeManager.AddUnion(owner, su.name) : typeManager.AddStruct(owner, su.name); + if(!success) + { + //TODO properly handle errors + dprintf(QT_TRANSLATE_NOOP("DBG", "Failed to add %s %s;\n"), su.isunion ? "union" : "struct", su.name.c_str()); + su.name.clear(); //signal error + } + } + + //Add simple typedefs + for(auto & type : model.types) + { + auto success = typeManager.AddType(owner, type.type, type.name); + if(!success) + { + //TODO properly handle errors + dprintf(QT_TRANSLATE_NOOP("DBG", "Failed to add typedef %s %s;\n"), type.type.c_str(), type.name.c_str()); + } + } + + //Add enums + for (auto& enumm : model.enums) + { + __debugbreak(); + } + + //Add base function types to avoid errors later + for(auto & function : model.functions) + { + auto success = typeManager.AddFunction(owner, function.name, function.rettype, function.callconv, function.noreturn); + if(!success) + { + //TODO properly handle errors + dprintf(QT_TRANSLATE_NOOP("DBG", "Failed to add function %s %s()\n"), function.rettype.c_str(), function.name.c_str()); + function.name.clear(); //signal error + } + } + + //Add struct/union members + for(auto & su : model.structUnions) + { + if(su.name.empty()) //skip error-signalled structs/unions + continue; + for(auto & member : su.members) + { + auto success = typeManager.AddMember(su.name, member.type, member.name, member.arrsize, member.offset); + if(!success) + { + //TODO properly handle errors + dprintf(QT_TRANSLATE_NOOP("DBG", "Failed to add member %s %s.%s;\n"), member.type.c_str(), su.name.c_str(), member.name.c_str()); + } + } + } + + //Add function arguments + for(auto & function : model.functions) + { + if(function.name.empty()) //skip error-signalled functions + continue; + for(auto & arg : function.args) + { + auto success = typeManager.AddArg(function.name, arg.type, arg.name); + if(!success) + { + //TODO properly handle errors + dprintf(QT_TRANSLATE_NOOP("DBG", "Failed to add argument %s %s.%s;\n"), arg.type.c_str(), function.name.c_str(), arg.name.c_str()); + } + } + } +} + +#if 0 +bool LoadTypesJson(const std::string & json, const std::string & owner) +{ + EXCLUSIVE_ACQUIRE(LockTypeManager); + auto root = json_loads(json.c_str(), 0, 0); + if(root) + { + Model model; + loadTypes(json_object_get(root, "types"), model.types); + loadTypes(json_object_get(root, ArchValue("types32", "types64")), model.types); + loadStructUnions(json_object_get(root, "structs"), false, model.structUnions); + loadStructUnions(json_object_get(root, ArchValue("structs32", "structs64")), false, model.structUnions); + loadStructUnions(json_object_get(root, "unions"), true, model.structUnions); + loadStructUnions(json_object_get(root, ArchValue("unions32", "unions64")), true, model.structUnions); + loadFunctions(json_object_get(root, "functions"), model.functions); + loadFunctions(json_object_get(root, ArchValue("functions32", "functions64")), model.functions); + + LoadModel(owner, model); + + // Free root + json_decref(root); + } + else + return false; + return true; +} + +bool LoadTypesFile(const std::string & path, const std::string & owner) +{ + std::string json; + if(!FileHelper::ReadAllText(path, json)) + return false; + return LoadTypesJson(json, owner); +} +#endif + +std::string StructUnionPtrType(const std::string & pointto) +{ + return typeManager.StructUnionPtrType(pointto); +} diff --git a/btparser/types.h b/btparser/types.h new file mode 100644 index 0000000..7597712 --- /dev/null +++ b/btparser/types.h @@ -0,0 +1,164 @@ +#pragma once + +#include +#include +#include + +namespace Types +{ + enum Primitive + { + Void, + Int8, + Uint8, + Int16, + Uint16, + Int32, + Uint32, + Int64, + Uint64, + Dsint, + Duint, + Float, + Double, + Pointer, + PtrString, //char* (null-terminated) + PtrWString //wchar_t* (null-terminated) + }; + + struct Type + { + std::string owner; //Type owner + std::string name; //Type identifier. + std::string pointto; //Type identifier of *Type + Primitive primitive = Void; //Primitive type. + int size = 0; //Size in bytes. + }; + + struct Member + { + std::string name; //Member identifier + std::string type; //Type.name + int arrsize = 0; //Number of elements if Member is an array + int offset = -1; //Member offset (only stored for reference) + }; + + struct StructUnion + { + std::string owner; //StructUnion owner + std::string name; //StructUnion identifier + std::vector members; //StructUnion members + bool isunion = false; //Is this a union? + int size = 0; + }; + + struct EnumValue + { + std::string name; + uint64_t value = 0; + }; + + struct Enum + { + std::string owner; // Enum owner + std::string name; // Enum name + std::vector values; // Enum values + Type type; // Enum value type + }; + + enum CallingConvention + { + Cdecl, + Stdcall, + Thiscall, + Delphi + }; + + struct Function + { + std::string owner; //Function owner + std::string name; //Function identifier + std::string rettype; //Function return type + CallingConvention callconv = Cdecl; //Function calling convention + bool noreturn = false; //Function does not return (ExitProcess, _exit) + std::vector args; //Function arguments + }; + + struct TypeManager + { + struct Visitor + { + virtual ~Visitor() { } + virtual bool visitType(const Member & member, const Type & type) = 0; + virtual bool visitStructUnion(const Member & member, const StructUnion & type) = 0; + virtual bool visitArray(const Member & member) = 0; + virtual bool visitPtr(const Member & member, const Type & type) = 0; + virtual bool visitBack(const Member & member) = 0; + }; + + struct Summary + { + std::string kind; + std::string name; + std::string owner; + int size = 0; + }; + + explicit TypeManager(); + bool AddType(const std::string & owner, const std::string & type, const std::string & name); + bool AddStruct(const std::string & owner, const std::string & name); + bool AddUnion(const std::string & owner, const std::string & name); + bool AddMember(const std::string & parent, const std::string & type, const std::string & name, int arrsize = 0, int offset = -1); + bool AppendMember(const std::string & type, const std::string & name, int arrsize = 0, int offset = -1); + bool AddFunction(const std::string & owner, const std::string & name, const std::string & rettype, CallingConvention callconv = Cdecl, bool noreturn = false); + bool AddArg(const std::string & function, const std::string & type, const std::string & name); + bool AppendArg(const std::string & type, const std::string & name); + int Sizeof(const std::string & type) const; + bool Visit(const std::string & type, const std::string & name, Visitor & visitor) const; + void Clear(const std::string & owner = ""); + bool RemoveType(const std::string & type); + void Enum(std::vector & typeList) const; + std::string StructUnionPtrType(const std::string & pointto) const; + + private: + std::unordered_map primitivesizes; + std::unordered_map types; + std::unordered_map structs; + std::unordered_map functions; + std::string laststruct; + std::string lastfunction; + + bool isDefined(const std::string & id) const; + bool validPtr(const std::string & id); + bool addStructUnion(const StructUnion & s); + bool addType(const std::string & owner, Primitive primitive, const std::string & name, const std::string & pointto = ""); + bool addType(const Type & t); + bool visitMember(const Member & root, Visitor & visitor) const; + }; + + struct Model + { + std::vector types; + std::vector enums; + std::vector structUnions; + std::vector functions; + }; +}; + +bool AddType(const std::string & owner, const std::string & type, const std::string & name); +bool AddStruct(const std::string & owner, const std::string & name); +bool AddUnion(const std::string & owner, const std::string & name); +bool AddMember(const std::string & parent, const std::string & type, const std::string & name, int arrsize = 0, int offset = -1); +bool AppendMember(const std::string & type, const std::string & name, int arrsize = 0, int offset = -1); +bool AddFunction(const std::string & owner, const std::string & name, const std::string & rettype, Types::CallingConvention callconv = Types::Cdecl, bool noreturn = false); +bool AddArg(const std::string & function, const std::string & type, const std::string & name); +bool AppendArg(const std::string & type, const std::string & name); +int SizeofType(const std::string & type); +bool VisitType(const std::string & type, const std::string & name, Types::TypeManager::Visitor & visitor); +void ClearTypes(const std::string & owner = ""); +bool RemoveType(const std::string & type); +void EnumTypes(std::vector & typeList); +bool LoadTypesJson(const std::string & json, const std::string & owner); +bool LoadTypesFile(const std::string & path, const std::string & owner); +bool ParseTypes(const std::string & parse, const std::string & owner, std::vector & errors); +std::string StructUnionPtrType(const std::string & pointto); \ No newline at end of file diff --git a/btparser/typesparser.cpp b/btparser/typesparser.cpp new file mode 100644 index 0000000..ee91aa7 --- /dev/null +++ b/btparser/typesparser.cpp @@ -0,0 +1,323 @@ +#include "types.h" +#include "helpers.h" + +using namespace Types; + +#include "lexer.h" + +void LoadModel(const std::string& owner, Model& model); + +bool ParseTypes(const std::string& parse, const std::string& owner, std::vector& errors) +{ + Lexer lexer; + lexer.SetInputData(parse); + std::vector tokens; + size_t index = 0; + auto getToken = [&](size_t i) -> Lexer::TokenState& + { + if (index >= tokens.size() - 1) + i = tokens.size() - 1; + return tokens[i]; + }; + auto curToken = [&]() -> Lexer::TokenState& + { + return getToken(index); + }; + auto isToken = [&](Lexer::Token token) + { + return getToken(index).Token == token; + }; + auto isTokenList = [&](std::initializer_list il) + { + size_t i = 0; + for (auto l : il) + if (getToken(index + i++).Token != l) + return false; + return true; + }; + std::string error; + if (!lexer.DoLexing(tokens, error)) + { + errors.push_back(error); + return false; + } + Model model; + + auto errLine = [&](const Lexer::TokenState& token, const std::string& message) + { + errors.push_back(StringUtils::sprintf("[line %zu:%zu] %s", token.CurLine + 1, token.LineIndex, message.c_str())); + }; + auto eatSemic = [&]() + { + while (curToken().Token == Lexer::tok_semic) + index++; + }; + auto parseMember = [&](StructUnion& su) + { + std::vector memToks; + while (!isToken(Lexer::tok_semic)) + { + if (isToken(Lexer::tok_eof)) + { + errLine(curToken(), "unexpected eof in member"); + return false; + } + memToks.push_back(curToken()); + index++; + } + if (memToks.empty()) + { + errLine(curToken(), "unexpected ; in member"); + return false; + } + eatSemic(); + if (memToks.size() >= 2) //at least type name; + { + Member m; + for (size_t i = 0; i < memToks.size(); i++) + { + const auto& t = memToks[i]; + if (t.Token == Lexer::tok_subopen) + { + if (i + 1 >= memToks.size()) + { + errLine(memToks.back(), "unexpected end after ["); + return false; + } + if (memToks[i + 1].Token != Lexer::tok_number) + { + errLine(memToks[i + 1], "expected number token"); + return false; + } + m.arrsize = int(memToks[i + 1].NumberVal); + if (i + 2 >= memToks.size()) + { + errLine(memToks.back(), "unexpected end, expected ]"); + return false; + } + if (memToks[i + 2].Token != Lexer::tok_subclose) + { + errLine(memToks[i + 2], StringUtils::sprintf("expected ], got %s", lexer.TokString(memToks[i + 2]).c_str())); + return false; + } + if (i + 2 != memToks.size() - 1) + { + errLine(memToks[i + 3], "too many tokens"); + return false; + } + break; + } + else if (i + 1 == memToks.size() || memToks[i + 1].Token == Lexer::tok_subopen) //last = name + { + m.name = lexer.TokString(memToks[i]); + } + else if (!t.IsType() && + t.Token != Lexer::tok_op_mul && + t.Token != Lexer::tok_identifier && + t.Token != Lexer::tok_void) + { + errLine(t, StringUtils::sprintf("token %s is not a type...", lexer.TokString(t).c_str())); + return false; + } + else + { + if (!m.type.empty() && t.Token != Lexer::tok_op_mul) + m.type.push_back(' '); + m.type += lexer.TokString(t); + } + } + //dprintf("member: %s %s;\n", m.type.c_str(), m.name.c_str()); + su.members.push_back(m); + return true; + } + errLine(memToks.back(), "not enough tokens for member"); + return false; + }; + auto parseStructUnion = [&]() + { + if (isToken(Lexer::tok_struct) || isToken(Lexer::tok_union)) + { + StructUnion su; + su.isunion = isToken(Lexer::tok_union); + index++; + if (isTokenList({ Lexer::tok_identifier, Lexer::tok_bropen })) + { + su.name = lexer.TokString(curToken()); + index += 2; + while (!isToken(Lexer::tok_brclose)) + { + if (isToken(Lexer::tok_eof)) + { + errLine(curToken(), StringUtils::sprintf("unexpected eof in %s", su.isunion ? "union" : "struct")); + return false; + } + if (isToken(Lexer::tok_bropen)) + { + errLine(curToken(), "nested blocks are not allowed!"); + return false; + } + if (!parseMember(su)) + return false; + } + index++; //eat tok_brclose + //dprintf("%s %s, members: %d\n", su.isunion ? "union" : "struct", su.name.c_str(), int(su.members.size())); + model.structUnions.push_back(su); + if (!isToken(Lexer::tok_semic)) + { + errLine(curToken(), "expected semicolon!"); + return false; + } + eatSemic(); + return true; + } + else + { + errLine(curToken(), "invalid struct token sequence!"); + return false; + } + } + return true; + }; + auto parseEnum = [&]() + { + if (isToken(Lexer::tok_enum)) + { + Enum e; + index++; + if (isTokenList({ Lexer::tok_identifier, Lexer::tok_bropen })) + { + e.name = lexer.TokString(curToken()); + index += 2; + while (!isToken(Lexer::tok_brclose)) + { + if (isToken(Lexer::tok_eof)) + { + errLine(curToken(), "unexpected eof in enum"); + return false; + } + if (isToken(Lexer::tok_bropen)) + { + errLine(curToken(), "nested blocks are not allowed!"); + return false; + } + + if (!e.values.empty()) + { + if (isToken(Lexer::tok_comma)) + { + index++; + } + else + { + errLine(curToken(), "expected comma in enum"); + return false; + } + } + + if (!isToken(Lexer::tok_identifier)) + { + errLine(curToken(), StringUtils::sprintf("expected identifier in enum, got '%s'", lexer.TokString(curToken()).c_str())); + return false; + } + + EnumValue v; + v.name = lexer.TokString(curToken()); + v.value = e.values.empty() ? 0 : e.values.back().value + 1; + e.values.push_back(v); + + index++; + } + index++; //eat tok_brclose + + model.enums.push_back(e); + if (!isToken(Lexer::tok_semic)) + { + errLine(curToken(), "expected semicolon!"); + return false; + } + eatSemic(); + return true; + } + else + { + errLine(curToken(), "invalid enum token sequence!"); + return false; + } + __debugbreak(); + } + return true; + }; + auto parseTypedef = [&]() + { + // TODO: support "typedef struct" + if (isToken(Lexer::tok_typedef)) + { + index++; + std::vector tdefToks; + while (!isToken(Lexer::tok_semic)) + { + if (isToken(Lexer::tok_eof)) + { + errLine(curToken(), "unexpected eof in typedef"); + return false; + } + tdefToks.push_back(curToken()); + index++; + } + if (tdefToks.empty()) + { + errLine(curToken(), "unexpected ; in typedef"); + return false; + } + eatSemic(); + if (tdefToks.size() >= 2) //at least typedef a b; + { + Member tm; + tm.name = lexer.TokString(tdefToks[tdefToks.size() - 1]); + tdefToks.pop_back(); + for (auto& t : tdefToks) + if (!t.IsType() && + t.Token != Lexer::tok_op_mul && + t.Token != Lexer::tok_identifier && + t.Token != Lexer::tok_void) + { + errLine(t, StringUtils::sprintf("token %s is not a type...", lexer.TokString(t).c_str())); + return false; + } + else + { + if (!tm.type.empty() && t.Token != Lexer::tok_op_mul) + tm.type.push_back(' '); + tm.type += lexer.TokString(t); + } + //dprintf("typedef %s:%s\n", tm.type.c_str(), tm.name.c_str()); + model.types.push_back(tm); + return true; + } + errLine(tdefToks.back(), "not enough tokens for typedef"); + return false; + } + return true; + }; + + while (!isToken(Lexer::tok_eof)) + { + auto curIndex = index; + if (!parseTypedef()) + return false; + if (!parseStructUnion()) + return false; + if (!parseEnum()) + return false; + eatSemic(); + if (curIndex == index) + { + errLine(curToken(), StringUtils::sprintf("unexpected token %s", lexer.TokString(curToken()).c_str())); + return false; + } + } + + LoadModel(owner, model); + + return true; +} \ No newline at end of file