From 0f4eb113a97c5f13b7e9ec2e1097408be655721c Mon Sep 17 00:00:00 2001 From: mrexodia Date: Tue, 7 Jun 2016 03:17:56 +0200 Subject: [PATCH] basics of AST and parsing --- btparser/ast.h | 75 +++++++++++++ btparser/btparser.vcxproj | 5 + btparser/btparser.vcxproj.filters | 11 ++ btparser/lexer.cpp | 4 +- btparser/lexer.h | 2 +- btparser/main.cpp | 17 ++- btparser/parser.cpp | 178 ++++++++++++++++++++++++++++++ btparser/parser.h | 42 +++++++ btparser/tests/simple.bt | 1 + 9 files changed, 331 insertions(+), 4 deletions(-) create mode 100644 btparser/ast.h create mode 100644 btparser/parser.cpp create mode 100644 btparser/parser.h create mode 100644 btparser/tests/simple.bt diff --git a/btparser/ast.h b/btparser/ast.h new file mode 100644 index 0000000..5d35d12 --- /dev/null +++ b/btparser/ast.h @@ -0,0 +1,75 @@ +#pragma once + +#include "lexer.h" +#include + +namespace AST +{ + using namespace std; + + template + using uptr = unique_ptr; + + template + static typename enable_if::value, unique_ptr>::type make_uptr(Args &&... args) + { + return uptr(new T(std::forward(args)...)); + } + + using Operator = Lexer::Token; //TODO + using Type = Lexer::Token; //TODO + + class StatDecl //base class for every node + { + public: + virtual ~StatDecl() {} + }; + + class Stat : public StatDecl //statement (expressions, control, block) + { + }; + + class Block : public Stat //block + { + vector> mStatDecls; + public: + explicit Block(vector> statDecls) + : mStatDecls(move(statDecls)) {} + }; + + class Expr : public Stat //expression + { + public: + virtual ~Expr() {} + }; + + class Return : public Stat + { + uptr mExpr; + public: + explicit Return(uptr expr) + : mExpr(move(expr)) {} + }; + + class Decl : public StatDecl //declaration (variables/types) + { + }; + + class Builtin : public Decl //built-in declaration (int x) + { + Type mType; + string mId; + public: + explicit Builtin(Type type, const string & id) + : mType(type), mId(id) {} + }; + + class Struct : public Decl //struct (can contain code, not just declarations) + { + string mId; + uptr mBlock; + public: + explicit Struct(const string & id, uptr block) + : mId(id), mBlock(move(block)) {} + }; +}; \ No newline at end of file diff --git a/btparser/btparser.vcxproj b/btparser/btparser.vcxproj index e72d0c2..af2e056 100644 --- a/btparser/btparser.vcxproj +++ b/btparser/btparser.vcxproj @@ -22,9 +22,11 @@ + + @@ -34,6 +36,9 @@ + + + {B0411C78-2F06-49E0-8DE9-5C52A466F5DE} btparser diff --git a/btparser/btparser.vcxproj.filters b/btparser/btparser.vcxproj.filters index 19271c0..526b5cc 100644 --- a/btparser/btparser.vcxproj.filters +++ b/btparser/btparser.vcxproj.filters @@ -27,6 +27,9 @@ Source Files + + Source Files + @@ -53,5 +56,13 @@ Header Files + + Header Files + + + + + Header Files + \ No newline at end of file diff --git a/btparser/lexer.cpp b/btparser/lexer.cpp index 81dba36..dd7daf5 100644 --- a/btparser/lexer.cpp +++ b/btparser/lexer.cpp @@ -51,14 +51,14 @@ bool Lexer::DoLexing(std::vector & tokens, std::string & error) { auto token = getToken(); mState.Token = token; - if (token == tok_eof) - break; if (token == tok_error) { error = StringUtils::sprintf("line %d, col %d: %s", mState.CurLine + 1, mState.LineIndex, mError.c_str()); return false; } tokens.push_back(mState); + if (token == tok_eof) + break; } return true; } diff --git a/btparser/lexer.h b/btparser/lexer.h index 3e55583..11c55aa 100644 --- a/btparser/lexer.h +++ b/btparser/lexer.h @@ -11,7 +11,7 @@ public: enum Token { //status tokens - tok_eof = -10000, + tok_eof, tok_error, //keywords diff --git a/btparser/main.cpp b/btparser/main.cpp index 365e73c..0a42ad1 100644 --- a/btparser/main.cpp +++ b/btparser/main.cpp @@ -3,6 +3,7 @@ #include "testfiles.h" #include "lexer.h" #include "filehelper.h" +#include "parser.h" bool TestLexer(Lexer & lexer, const std::string & filename) { @@ -84,13 +85,27 @@ void DebugLexerTests(bool output = true) DebugLexer(lexer, file, output); } +bool DebugParser(const std::string & filename) +{ + Parser parser; + std::string error; + if(!parser.ParseFile("tests\\" + filename, error)) + { + printf("ParseFile failed: %s\n", error.c_str()); + return false; + } + puts("ParseFile success!"); + return true; +} + int main() { //GenerateExpectedTests(); auto ticks = GetTickCount(); + DebugParser("simple.bt"); //Lexer lexer; //DebugLexer(lexer, "AndroidManifestTemplate.bt", false); - RunLexerTests(); + //RunLexerTests(); printf("finished in %ums\n", GetTickCount() - ticks); system("pause"); return 0; diff --git a/btparser/parser.cpp b/btparser/parser.cpp new file mode 100644 index 0000000..9c39115 --- /dev/null +++ b/btparser/parser.cpp @@ -0,0 +1,178 @@ +#include "parser.h" +#include "stringutils.h" + +using namespace AST; + +Parser::Parser() + : CurToken(Lexer::TokenState()) +{ +} + +bool Parser::ParseFile(const string & filename, string & error) +{ + if (!mLexer.ReadInputFile(filename)) + { + error = "failed to read input file"; + return false; + } + if (!mLexer.DoLexing(mTokens, error)) + return false; + CurToken = mTokens[0]; + mBinaryTemplate = ParseBinaryTemplate(); + return !!mBinaryTemplate; +} + +void Parser::NextToken() +{ + if (mIndex < mTokens.size() - 1) + { + mIndex++; + CurToken = mTokens[mIndex]; + } +} + +void Parser::ReportError(const std::string & error) +{ + mErrors.push_back(Error(error)); +} + +uptr Parser::ParseBinaryTemplate() +{ + vector> statDecls; + while (true) + { + auto statDecl = ParseStatDecl(); + if (!statDecl) + break; + statDecls.push_back(move(statDecl)); + } + return make_uptr(move(statDecls)); +} + +uptr Parser::ParseStatDecl() +{ + auto decl = ParseDecl(); + if (decl) + return move(decl); + + auto stat = ParseStat(); + if (stat) + return move(stat); + + ReportError("failed to parse StatDecl"); + return nullptr; +} + +uptr Parser::ParseStat() +{ + auto block = ParseBlock(); + if (block) + return move(block); + + auto expr = ParseExpr(); + if (expr) + return move(expr); + + auto ret = ParseReturn(); + if (ret) + return move(ret); + + ReportError("failed to parse Stat"); + return nullptr; +} + +uptr Parser::ParseBlock() +{ + if (CurToken.Token != Lexer::tok_bropen) //'{' + return nullptr; + NextToken(); + + vector> statDecls; + + if (CurToken.Token == Lexer::tok_brclose) //'}' + { + NextToken(); + return make_uptr(move(statDecls)); + } + + ReportError("failed to parse Block"); + return nullptr; +} + +uptr Parser::ParseExpr() +{ + return nullptr; +} + +uptr Parser::ParseReturn() +{ + if (CurToken.Token == Lexer::tok_return) + { + NextToken(); + auto expr = ParseExpr(); + if (!expr) + { + ReportError("failed to parse Return (ParseExpr failed)"); + return nullptr; + } + return make_uptr(move(expr)); + } + return nullptr; +} + +uptr Parser::ParseDecl() +{ + auto builtin = ParseBuiltin(); + if (builtin) + return move(builtin); + auto stru = ParseStruct(); + if (stru) + return move(stru); + return nullptr; +} + +uptr Parser::ParseBuiltin() +{ + if (CurToken.Token == Lexer::tok_uint) //TODO: properly handle types + { + auto type = CurToken.Token; + NextToken(); + if (CurToken.Token != Lexer::tok_identifier) + { + ReportError("failed to parse Builtin (no identifier)"); + return nullptr; + } + auto id = CurToken.IdentifierStr; + NextToken(); + if (CurToken.Token != Lexer::tok_semic) + { + ReportError("failed to parse Builtin (no semicolon)"); + return nullptr; + } + NextToken(); + return make_uptr(type, id); + } + return nullptr; +} + +uptr Parser::ParseStruct() +{ + if (CurToken.Token == Lexer::tok_struct) + { + NextToken(); + string id; + if (CurToken.Token == Lexer::tok_identifier) + { + id = CurToken.IdentifierStr; + NextToken(); + } + auto block = ParseBlock(); + if (!block) + { + ReportError("failed to parse Struct (ParseBlock)"); + return nullptr; + } + return make_uptr(id, move(block)); + } + return nullptr; +} diff --git a/btparser/parser.h b/btparser/parser.h new file mode 100644 index 0000000..90271a9 --- /dev/null +++ b/btparser/parser.h @@ -0,0 +1,42 @@ +#pragma once + +#include "lexer.h" +#include "ast.h" + +class Parser +{ +public: + struct Error + { + explicit Error(const std::string & text) + : text(text) {} + + std::string text; + }; + + explicit Parser(); + bool ParseFile(const std::string & filename, std::string & error); + +private: + Lexer mLexer; + std::vector mTokens; + size_t mIndex = 0; + AST::uptr mBinaryTemplate = nullptr; + std::vector mErrors; + + Lexer::TokenState CurToken; + void NextToken(); + void ReportError(const std::string & error); + + AST::uptr ParseBinaryTemplate(); + AST::uptr ParseStatDecl(); + + AST::uptr ParseStat(); + AST::uptr ParseBlock(); + AST::uptr ParseExpr(); + AST::uptr ParseReturn(); + + AST::uptr ParseDecl(); + AST::uptr ParseBuiltin(); + AST::uptr ParseStruct(); +}; \ No newline at end of file diff --git a/btparser/tests/simple.bt b/btparser/tests/simple.bt new file mode 100644 index 0000000..f3c4d04 --- /dev/null +++ b/btparser/tests/simple.bt @@ -0,0 +1 @@ +uint x; \ No newline at end of file