mirror of https://github.com/x64dbg/btparser
basics of AST and parsing
This commit is contained in:
parent
d96b60b6b5
commit
0f4eb113a9
|
@ -0,0 +1,75 @@
|
|||
#pragma once
|
||||
|
||||
#include "lexer.h"
|
||||
#include <memory>
|
||||
|
||||
namespace AST
|
||||
{
|
||||
using namespace std;
|
||||
|
||||
template<class T>
|
||||
using uptr = unique_ptr<T>;
|
||||
|
||||
template <class T, class... Args>
|
||||
static typename enable_if<!is_array<T>::value, unique_ptr<T>>::type make_uptr(Args &&... args)
|
||||
{
|
||||
return uptr<T>(new T(std::forward<Args>(args)...));
|
||||
}
|
||||
|
||||
using Operator = Lexer::Token; //TODO
|
||||
using Type = Lexer::Token; //TODO
|
||||
|
||||
class StatDecl //base class for every node
|
||||
{
|
||||
public:
|
||||
virtual ~StatDecl() {}
|
||||
};
|
||||
|
||||
class Stat : public StatDecl //statement (expressions, control, block)
|
||||
{
|
||||
};
|
||||
|
||||
class Block : public Stat //block
|
||||
{
|
||||
vector<uptr<StatDecl>> mStatDecls;
|
||||
public:
|
||||
explicit Block(vector<uptr<StatDecl>> statDecls)
|
||||
: mStatDecls(move(statDecls)) {}
|
||||
};
|
||||
|
||||
class Expr : public Stat //expression
|
||||
{
|
||||
public:
|
||||
virtual ~Expr() {}
|
||||
};
|
||||
|
||||
class Return : public Stat
|
||||
{
|
||||
uptr<Expr> mExpr;
|
||||
public:
|
||||
explicit Return(uptr<Expr> expr)
|
||||
: mExpr(move(expr)) {}
|
||||
};
|
||||
|
||||
class Decl : public StatDecl //declaration (variables/types)
|
||||
{
|
||||
};
|
||||
|
||||
class Builtin : public Decl //built-in declaration (int x)
|
||||
{
|
||||
Type mType;
|
||||
string mId;
|
||||
public:
|
||||
explicit Builtin(Type type, const string & id)
|
||||
: mType(type), mId(id) {}
|
||||
};
|
||||
|
||||
class Struct : public Decl //struct (can contain code, not just declarations)
|
||||
{
|
||||
string mId;
|
||||
uptr<Block> mBlock;
|
||||
public:
|
||||
explicit Struct(const string & id, uptr<Block> block)
|
||||
: mId(id), mBlock(move(block)) {}
|
||||
};
|
||||
};
|
|
@ -22,9 +22,11 @@
|
|||
<ClCompile Include="filehelper.cpp" />
|
||||
<ClCompile Include="lexer.cpp" />
|
||||
<ClCompile Include="main.cpp" />
|
||||
<ClCompile Include="parser.cpp" />
|
||||
<ClCompile Include="stringutils.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="ast.h" />
|
||||
<ClInclude Include="dynamicmem.h" />
|
||||
<ClInclude Include="filehelper.h" />
|
||||
<ClInclude Include="handle.h" />
|
||||
|
@ -34,6 +36,9 @@
|
|||
<ClInclude Include="stringutils.h" />
|
||||
<ClInclude Include="testfiles.h" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="parser.h" />
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{B0411C78-2F06-49E0-8DE9-5C52A466F5DE}</ProjectGuid>
|
||||
<RootNamespace>btparser</RootNamespace>
|
||||
|
|
|
@ -27,6 +27,9 @@
|
|||
<ClCompile Include="lexer.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="parser.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="filehelper.h">
|
||||
|
@ -53,5 +56,13 @@
|
|||
<ClInclude Include="lexer.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="ast.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="parser.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</None>
|
||||
</ItemGroup>
|
||||
</Project>
|
|
@ -51,14 +51,14 @@ bool Lexer::DoLexing(std::vector<TokenState> & tokens, std::string & error)
|
|||
{
|
||||
auto token = getToken();
|
||||
mState.Token = token;
|
||||
if (token == tok_eof)
|
||||
break;
|
||||
if (token == tok_error)
|
||||
{
|
||||
error = StringUtils::sprintf("line %d, col %d: %s", mState.CurLine + 1, mState.LineIndex, mError.c_str());
|
||||
return false;
|
||||
}
|
||||
tokens.push_back(mState);
|
||||
if (token == tok_eof)
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -11,7 +11,7 @@ public:
|
|||
enum Token
|
||||
{
|
||||
//status tokens
|
||||
tok_eof = -10000,
|
||||
tok_eof,
|
||||
tok_error,
|
||||
|
||||
//keywords
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
#include "testfiles.h"
|
||||
#include "lexer.h"
|
||||
#include "filehelper.h"
|
||||
#include "parser.h"
|
||||
|
||||
bool TestLexer(Lexer & lexer, const std::string & filename)
|
||||
{
|
||||
|
@ -84,13 +85,27 @@ void DebugLexerTests(bool output = true)
|
|||
DebugLexer(lexer, file, output);
|
||||
}
|
||||
|
||||
bool DebugParser(const std::string & filename)
|
||||
{
|
||||
Parser parser;
|
||||
std::string error;
|
||||
if(!parser.ParseFile("tests\\" + filename, error))
|
||||
{
|
||||
printf("ParseFile failed: %s\n", error.c_str());
|
||||
return false;
|
||||
}
|
||||
puts("ParseFile success!");
|
||||
return true;
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
//GenerateExpectedTests();
|
||||
auto ticks = GetTickCount();
|
||||
DebugParser("simple.bt");
|
||||
//Lexer lexer;
|
||||
//DebugLexer(lexer, "AndroidManifestTemplate.bt", false);
|
||||
RunLexerTests();
|
||||
//RunLexerTests();
|
||||
printf("finished in %ums\n", GetTickCount() - ticks);
|
||||
system("pause");
|
||||
return 0;
|
||||
|
|
|
@ -0,0 +1,178 @@
|
|||
#include "parser.h"
|
||||
#include "stringutils.h"
|
||||
|
||||
using namespace AST;
|
||||
|
||||
Parser::Parser()
|
||||
: CurToken(Lexer::TokenState())
|
||||
{
|
||||
}
|
||||
|
||||
bool Parser::ParseFile(const string & filename, string & error)
|
||||
{
|
||||
if (!mLexer.ReadInputFile(filename))
|
||||
{
|
||||
error = "failed to read input file";
|
||||
return false;
|
||||
}
|
||||
if (!mLexer.DoLexing(mTokens, error))
|
||||
return false;
|
||||
CurToken = mTokens[0];
|
||||
mBinaryTemplate = ParseBinaryTemplate();
|
||||
return !!mBinaryTemplate;
|
||||
}
|
||||
|
||||
void Parser::NextToken()
|
||||
{
|
||||
if (mIndex < mTokens.size() - 1)
|
||||
{
|
||||
mIndex++;
|
||||
CurToken = mTokens[mIndex];
|
||||
}
|
||||
}
|
||||
|
||||
void Parser::ReportError(const std::string & error)
|
||||
{
|
||||
mErrors.push_back(Error(error));
|
||||
}
|
||||
|
||||
uptr<Block> Parser::ParseBinaryTemplate()
|
||||
{
|
||||
vector<uptr<StatDecl>> statDecls;
|
||||
while (true)
|
||||
{
|
||||
auto statDecl = ParseStatDecl();
|
||||
if (!statDecl)
|
||||
break;
|
||||
statDecls.push_back(move(statDecl));
|
||||
}
|
||||
return make_uptr<Block>(move(statDecls));
|
||||
}
|
||||
|
||||
uptr<StatDecl> Parser::ParseStatDecl()
|
||||
{
|
||||
auto decl = ParseDecl();
|
||||
if (decl)
|
||||
return move(decl);
|
||||
|
||||
auto stat = ParseStat();
|
||||
if (stat)
|
||||
return move(stat);
|
||||
|
||||
ReportError("failed to parse StatDecl");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
uptr<Stat> Parser::ParseStat()
|
||||
{
|
||||
auto block = ParseBlock();
|
||||
if (block)
|
||||
return move(block);
|
||||
|
||||
auto expr = ParseExpr();
|
||||
if (expr)
|
||||
return move(expr);
|
||||
|
||||
auto ret = ParseReturn();
|
||||
if (ret)
|
||||
return move(ret);
|
||||
|
||||
ReportError("failed to parse Stat");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
uptr<Block> Parser::ParseBlock()
|
||||
{
|
||||
if (CurToken.Token != Lexer::tok_bropen) //'{'
|
||||
return nullptr;
|
||||
NextToken();
|
||||
|
||||
vector<uptr<StatDecl>> statDecls;
|
||||
|
||||
if (CurToken.Token == Lexer::tok_brclose) //'}'
|
||||
{
|
||||
NextToken();
|
||||
return make_uptr<Block>(move(statDecls));
|
||||
}
|
||||
|
||||
ReportError("failed to parse Block");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
uptr<Expr> Parser::ParseExpr()
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
uptr<Return> Parser::ParseReturn()
|
||||
{
|
||||
if (CurToken.Token == Lexer::tok_return)
|
||||
{
|
||||
NextToken();
|
||||
auto expr = ParseExpr();
|
||||
if (!expr)
|
||||
{
|
||||
ReportError("failed to parse Return (ParseExpr failed)");
|
||||
return nullptr;
|
||||
}
|
||||
return make_uptr<Return>(move(expr));
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
uptr<Decl> Parser::ParseDecl()
|
||||
{
|
||||
auto builtin = ParseBuiltin();
|
||||
if (builtin)
|
||||
return move(builtin);
|
||||
auto stru = ParseStruct();
|
||||
if (stru)
|
||||
return move(stru);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
uptr<Builtin> Parser::ParseBuiltin()
|
||||
{
|
||||
if (CurToken.Token == Lexer::tok_uint) //TODO: properly handle types
|
||||
{
|
||||
auto type = CurToken.Token;
|
||||
NextToken();
|
||||
if (CurToken.Token != Lexer::tok_identifier)
|
||||
{
|
||||
ReportError("failed to parse Builtin (no identifier)");
|
||||
return nullptr;
|
||||
}
|
||||
auto id = CurToken.IdentifierStr;
|
||||
NextToken();
|
||||
if (CurToken.Token != Lexer::tok_semic)
|
||||
{
|
||||
ReportError("failed to parse Builtin (no semicolon)");
|
||||
return nullptr;
|
||||
}
|
||||
NextToken();
|
||||
return make_uptr<Builtin>(type, id);
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
uptr<Struct> Parser::ParseStruct()
|
||||
{
|
||||
if (CurToken.Token == Lexer::tok_struct)
|
||||
{
|
||||
NextToken();
|
||||
string id;
|
||||
if (CurToken.Token == Lexer::tok_identifier)
|
||||
{
|
||||
id = CurToken.IdentifierStr;
|
||||
NextToken();
|
||||
}
|
||||
auto block = ParseBlock();
|
||||
if (!block)
|
||||
{
|
||||
ReportError("failed to parse Struct (ParseBlock)");
|
||||
return nullptr;
|
||||
}
|
||||
return make_uptr<Struct>(id, move(block));
|
||||
}
|
||||
return nullptr;
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
#pragma once
|
||||
|
||||
#include "lexer.h"
|
||||
#include "ast.h"
|
||||
|
||||
class Parser
|
||||
{
|
||||
public:
|
||||
struct Error
|
||||
{
|
||||
explicit Error(const std::string & text)
|
||||
: text(text) {}
|
||||
|
||||
std::string text;
|
||||
};
|
||||
|
||||
explicit Parser();
|
||||
bool ParseFile(const std::string & filename, std::string & error);
|
||||
|
||||
private:
|
||||
Lexer mLexer;
|
||||
std::vector<Lexer::TokenState> mTokens;
|
||||
size_t mIndex = 0;
|
||||
AST::uptr<AST::Block> mBinaryTemplate = nullptr;
|
||||
std::vector<Error> mErrors;
|
||||
|
||||
Lexer::TokenState CurToken;
|
||||
void NextToken();
|
||||
void ReportError(const std::string & error);
|
||||
|
||||
AST::uptr<AST::Block> ParseBinaryTemplate();
|
||||
AST::uptr<AST::StatDecl> ParseStatDecl();
|
||||
|
||||
AST::uptr<AST::Stat> ParseStat();
|
||||
AST::uptr<AST::Block> ParseBlock();
|
||||
AST::uptr<AST::Expr> ParseExpr();
|
||||
AST::uptr<AST::Return> ParseReturn();
|
||||
|
||||
AST::uptr<AST::Decl> ParseDecl();
|
||||
AST::uptr<AST::Builtin> ParseBuiltin();
|
||||
AST::uptr<AST::Struct> ParseStruct();
|
||||
};
|
|
@ -0,0 +1 @@
|
|||
uint x;
|
Loading…
Reference in New Issue