From 491057fbfc86a89b3000a3f2f836466da766d26c Mon Sep 17 00:00:00 2001 From: Duncan Ogilvie Date: Mon, 6 Feb 2023 04:17:25 +0100 Subject: [PATCH] Continue implementing more advanced C features --- btparser/btparser.vcxproj | 12 +- btparser/btparser.vcxproj.filters | 8 - btparser/keywords.h | 40 +- btparser/lexer.h | 7 +- btparser/operators.h | 2 + btparser/parser.cpp | 198 -------- btparser/parser.h | 44 -- btparser/types.h | 1 + btparser/typesparser.cpp | 785 ++++++++++++++++++++---------- 9 files changed, 545 insertions(+), 552 deletions(-) delete mode 100644 btparser/parser.cpp delete mode 100644 btparser/parser.h diff --git a/btparser/btparser.vcxproj b/btparser/btparser.vcxproj index d8e3e0e..dbca24d 100644 --- a/btparser/btparser.vcxproj +++ b/btparser/btparser.vcxproj @@ -21,7 +21,6 @@ - @@ -36,9 +35,6 @@ - - - {B0411C78-2F06-49E0-8DE9-5C52A466F5DE} btparser @@ -48,26 +44,26 @@ Application true - v143 + v142 MultiByte Application true - v143 + v142 MultiByte Application false - v143 + v142 true MultiByte Application false - v143 + v142 true MultiByte diff --git a/btparser/btparser.vcxproj.filters b/btparser/btparser.vcxproj.filters index 0fb66b2..7093496 100644 --- a/btparser/btparser.vcxproj.filters +++ b/btparser/btparser.vcxproj.filters @@ -21,9 +21,6 @@ Source Files - - Source Files - Source Files @@ -60,9 +57,4 @@ Header Files - - - Header Files - - \ No newline at end of file diff --git a/btparser/keywords.h b/btparser/keywords.h index 0bb0898..80b5eec 100644 --- a/btparser/keywords.h +++ b/btparser/keywords.h @@ -21,62 +21,26 @@ DEF_KEYWORD(enum) DEF_KEYWORD(struct) DEF_KEYWORD(typedef) DEF_KEYWORD(sizeof) -DEF_KEYWORD(void) DEF_KEYWORD(union) -DEF_KEYWORD(local) +DEF_KEYWORD(const) +DEF_KEYWORD(void) DEF_KEYWORD(signed) DEF_KEYWORD(unsigned) DEF_KEYWORD(bool) DEF_KEYWORD(char) -DEF_KEYWORD(uchar) DEF_KEYWORD(wchar_t) DEF_KEYWORD(char16_t) DEF_KEYWORD(char32_t) -DEF_KEYWORD(byte) -DEF_KEYWORD(ubyte) DEF_KEYWORD(short) -DEF_KEYWORD(ushort) DEF_KEYWORD(int) -DEF_KEYWORD(uint) DEF_KEYWORD(long) -DEF_KEYWORD(ulong) - -DEF_KEYWORD(int8) -DEF_KEYWORD(uint8) -DEF_KEYWORD(int16) -DEF_KEYWORD(uint16) -DEF_KEYWORD(int32) -DEF_KEYWORD(uint32) -DEF_KEYWORD(int64) -DEF_KEYWORD(uint64) - -DEF_KEYWORD(BOOL) -DEF_KEYWORD(CHAR) -DEF_KEYWORD(BYTE) -DEF_KEYWORD(WORD) -DEF_KEYWORD(DWORD) -DEF_KEYWORD(QWORD) DEF_KEYWORD(float) DEF_KEYWORD(double) -DEF_KEYWORD(string) -DEF_KEYWORD(time_t) -DEF_KEYWORD(quad) - -DEF_KEYWORD(DOSDATE) -DEF_KEYWORD(DOSTIME) -DEF_KEYWORD(FILETIME) -DEF_KEYWORD(OLETIME) -DEF_KEYWORD(UQUAD) -DEF_KEYWORD(LONGLONG) -DEF_KEYWORD(ULONG_PTR) -DEF_KEYWORD(VQUAD) -DEF_KEYWORD(UINT32) DEF_KEYWORD(true) DEF_KEYWORD(false) DEF_KEYWORD(nullptr) -DEF_KEYWORD(NULL) \ No newline at end of file diff --git a/btparser/lexer.h b/btparser/lexer.h index 9db2507..96f4acc 100644 --- a/btparser/lexer.h +++ b/btparser/lexer.h @@ -49,7 +49,12 @@ public: bool IsType() const { - return Token >= tok_signed && Token <= tok_UINT32; + return Token >= tok_void && Token <= tok_double; + } + + bool Is(Lexer::Token token) const + { + return Token == token; } void Clear() diff --git a/btparser/operators.h b/btparser/operators.h index eea8bfc..7733c27 100644 --- a/btparser/operators.h +++ b/btparser/operators.h @@ -10,6 +10,7 @@ DEF_OP_TRIPLE(ass_shl, '<', '<', '=') DEF_OP_TRIPLE(ass_shr, '>', '>', '=') +DEF_OP_TRIPLE(varargs, '.', '.', '.') DEF_OP_DOUBLE(op_inc, '+', '+') DEF_OP_DOUBLE(op_dec, '-', '-') @@ -44,6 +45,7 @@ DEF_OP_SINGLE(tenary, '?') DEF_OP_SINGLE(colon, ':') DEF_OP_SINGLE(assign, '=') DEF_OP_SINGLE(semic, ';') +DEF_OP_SINGLE(dot, '.') DEF_OP_SINGLE(op_mul, '*') DEF_OP_SINGLE(op_div, '/') diff --git a/btparser/parser.cpp b/btparser/parser.cpp deleted file mode 100644 index 08b786a..0000000 --- a/btparser/parser.cpp +++ /dev/null @@ -1,198 +0,0 @@ -#include "parser.h" - -using namespace AST; - -Parser::Parser() - : CurToken(Lexer::TokenState()) -{ -} - -bool Parser::ParseFile(const string & filename, string & error) -{ - if(!mLexer.ReadInputFile(filename)) - { - error = "failed to read input file"; - return false; - } - if (!mLexer.DoLexing(mTokens, error)) - return false; - CurToken = mTokens[0]; - mBinaryTemplate = ParseBinaryTemplate(); - return !!mBinaryTemplate; -} - -bool Parser::ParseString(const std::string& source, std::string& error) -{ - mLexer.SetInputData(source); - if (!mLexer.DoLexing(mTokens, error)) - return false; - CurToken = mTokens[0]; - mBinaryTemplate = ParseBinaryTemplate(); - return !!mBinaryTemplate; -} - -void Parser::NextToken() -{ - if(mIndex < mTokens.size() - 1) - { - mIndex++; - CurToken = mTokens[mIndex]; - } -} - -void Parser::ReportError(const std::string & error) -{ - mErrors.push_back(Error(error)); -} - -uptr Parser::ParseBinaryTemplate() -{ - vector> statDecls; - while(true) - { - auto statDecl = ParseStatDecl(); - if(!statDecl) - break; - statDecls.push_back(move(statDecl)); - } - auto binaryTemplate = make_uptr(move(statDecls)); - if(CurToken.Token != Lexer::tok_eof) - { - ReportError("last token is not EOF"); - return nullptr; - } - return move(binaryTemplate); -} - -uptr Parser::ParseStatDecl() -{ - auto decl = ParseDecl(); - if(decl) - return move(decl); - - auto stat = ParseStat(); - if(stat) - return move(stat); - - ReportError("failed to parse StatDecl"); - return nullptr; -} - -uptr Parser::ParseStat() -{ - auto block = ParseBlock(); - if(block) - return move(block); - - auto expr = ParseExpr(); - if(expr) - return move(expr); - - auto ret = ParseReturn(); - if(ret) - return move(ret); - - ReportError("failed to parse Stat"); - return nullptr; -} - -uptr Parser::ParseBlock() -{ - if(CurToken.Token != Lexer::tok_bropen) //'{' - return nullptr; - NextToken(); - - vector> statDecls; - - if(CurToken.Token == Lexer::tok_brclose) //'}' - { - NextToken(); - return make_uptr(move(statDecls)); - } - - ReportError("failed to parse Block"); - return nullptr; -} - -uptr Parser::ParseExpr() -{ - return nullptr; -} - -uptr Parser::ParseReturn() -{ - if(CurToken.Token == Lexer::tok_return) - { - NextToken(); - auto expr = ParseExpr(); - if(!expr) - { - ReportError("failed to parse Return (ParseExpr failed)"); - return nullptr; - } - return make_uptr(move(expr)); - } - return nullptr; -} - -uptr Parser::ParseDecl() -{ - auto builtin = ParseBuiltinVar(); - if(builtin) - return move(builtin); - auto stru = ParseStruct(); - if(stru) - return move(stru); - return nullptr; -} - -uptr Parser::ParseBuiltinVar() -{ - if(CurToken.Token == Lexer::tok_uint) //TODO: properly handle types - { - auto type = CurToken.Token; - NextToken(); - if(CurToken.Token != Lexer::tok_identifier) - { - ReportError("failed to parse BuiltinVar (no identifier)"); - return nullptr; - } - auto id = CurToken.IdentifierStr; - NextToken(); - if(CurToken.Token != Lexer::tok_semic) - { - ReportError("failed to parse BuiltinVar (no semicolon)"); - return nullptr; - } - NextToken(); - return make_uptr(type, id); - } - return nullptr; -} - -uptr Parser::ParseStruct() -{ - if(CurToken.Token == Lexer::tok_struct) - { - NextToken(); - string id; - if(CurToken.Token == Lexer::tok_identifier) - { - id = CurToken.IdentifierStr; - NextToken(); - } - auto block = ParseBlock(); - if(!block) - { - ReportError("failed to parse Struct (ParseBlock)"); - return nullptr; - } - return make_uptr(id, move(block)); - } - return nullptr; -} - -AST::uptr Parser::ParseStructVar() -{ - return nullptr; -} diff --git a/btparser/parser.h b/btparser/parser.h deleted file mode 100644 index 8f07883..0000000 --- a/btparser/parser.h +++ /dev/null @@ -1,44 +0,0 @@ -#pragma once - -#include "lexer.h" -#include "ast.h" - -class Parser -{ -public: - struct Error - { - explicit Error(const std::string & text) - : text(text) {} - - std::string text; - }; - - explicit Parser(); - bool ParseFile(const std::string & filename, std::string & error); - bool ParseString(const std::string& source, std::string& error); - -private: - Lexer mLexer; - std::vector mTokens; - size_t mIndex = 0; - AST::uptr mBinaryTemplate = nullptr; - std::vector mErrors; - - Lexer::TokenState CurToken; - void NextToken(); - void ReportError(const std::string & error); - - AST::uptr ParseBinaryTemplate(); - AST::uptr ParseStatDecl(); - - AST::uptr ParseStat(); - AST::uptr ParseBlock(); - AST::uptr ParseExpr(); - AST::uptr ParseReturn(); - - AST::uptr ParseDecl(); - AST::uptr ParseBuiltinVar(); - AST::uptr ParseStruct(); - AST::uptr ParseStructVar(); -}; \ No newline at end of file diff --git a/btparser/types.h b/btparser/types.h index 1f8d2bb..5e014d8 100644 --- a/btparser/types.h +++ b/btparser/types.h @@ -39,6 +39,7 @@ namespace Types { std::string name; //Member identifier std::string type; //Type.name + bool isConst = false; //Whether the member is marked as const int arrsize = 0; //Number of elements if Member is an array int offset = -1; //Member offset (only stored for reference) }; diff --git a/btparser/typesparser.cpp b/btparser/typesparser.cpp index 3c619e6..0956a7c 100644 --- a/btparser/typesparser.cpp +++ b/btparser/typesparser.cpp @@ -45,16 +45,311 @@ bool ParseTypes(const std::string& parse, const std::string& owner, std::vector< auto errLine = [&](const Lexer::TokenState& token, const std::string& message) { - errors.push_back(StringUtils::sprintf("[line %zu:%zu] %s", token.CurLine + 1, token.LineIndex, message.c_str())); + auto error = StringUtils::sprintf("[line %zu:%zu] %s", token.CurLine + 1, token.LineIndex, message.c_str()); + errors.push_back(std::move(error)); }; auto eatSemic = [&]() { while (curToken().Token == Lexer::tok_semic) index++; }; + auto parseVariable = [&](const std::vector& tlist, std::string& type, bool& isConst, std::string& name) + { + type.clear(); + isConst = false; + name.clear(); + + bool sawPointer = false; + bool isKeyword = true; + size_t i = 0; + for (; i < tlist.size(); i++) + { + const auto& t = tlist[i]; + if (t.Is(Lexer::tok_const)) + { + isConst = true; + continue; + } + + auto isType = t.IsType(); + if (!isType) + { + isKeyword = false; + } + + if (isType) + { + if (isKeyword) + { + if (!type.empty()) + type += ' '; + type += lexer.TokString(t); + } + else + { + errLine(t, "invalid keyword in type"); + return false; + } + } + else if (t.Is(Lexer::tok_identifier)) + { + if (type.empty()) + { + type = t.IdentifierStr; + } + else if (i + 1 == tlist.size()) + { + name = t.IdentifierStr; + } + else + { + errLine(t, "invalid identifier in type"); + return false; + } + } + else if (t.Is(Lexer::tok_op_mul)) + { + if (type.empty()) + { + errLine(t, "unexpected * in type"); + return false; + } + + if (sawPointer && type.back() != '*') + { + errLine(t, "unexpected * in type"); + return false; + } + + // Apply the pointer to the type on the left + type += '*'; + sawPointer = true; + } + else + { + errLine(t, "invalid token in type"); + return false; + } + } + if (type.empty()) + __debugbreak(); + return true; + }; + auto parseFunction = [&](std::vector& rettypes, Function& fn, bool ptr) + { + if (rettypes.empty()) + { + errLine(curToken(), "expected return type before function pointer type"); + return false; + } + + // TODO: calling conventions + + std::string retname; + bool retconst = false; + if (!parseVariable(rettypes, fn.rettype, retconst, retname)) + return false; + + if (ptr) + { + if (!retname.empty()) + { + errLine(rettypes.back(), "invalid return type in function pointer"); + return false; + } + + if (!isToken(Lexer::tok_op_mul)) + { + errLine(curToken(), "expected * in function pointer type"); + return false; + } + index++; + + if (!isToken(Lexer::tok_identifier)) + { + errLine(curToken(), "expected identifier in function pointer type"); + return false; + } + fn.name = lexer.TokString(curToken()); + index++; + + if (!isToken(Lexer::tok_parclose)) + { + errLine(curToken(), "expected ) after function pointer type name"); + return false; + } + index++; + + if (!isToken(Lexer::tok_paropen)) + { + errLine(curToken(), "expected ( for start of parameter list in function pointer type"); + return false; + } + index++; + } + else if (retname.empty()) + { + errLine(rettypes.back(), "function name cannot be empty"); + return false; + } + else + { + fn.name = retname; + } + + std::vector tlist; + auto startToken = curToken(); + auto finalizeArgument = [&]() + { + Member am; + if (!parseVariable(tlist, am.type, am.isConst, am.name)) + return false; + fn.args.push_back(am); + tlist.clear(); + startToken = curToken(); + return true; + }; + while (!isToken(Lexer::tok_parclose)) + { + if (isToken(Lexer::tok_comma)) + { + index++; + if (!finalizeArgument()) + return false; + } + + const auto& t = curToken(); + if (t.IsType() || t.Is(Lexer::tok_identifier) || t.Is(Lexer::tok_const)) + { + index++; + + // Primitive type + tlist.push_back(t); + } + else if (t.Is(Lexer::tok_op_mul)) + { + // Pointer to the type on the left + if (tlist.empty()) + { + errLine(curToken(), "unexpected * in function type argument list"); + return false; + } + index++; + + tlist.push_back(t); + } + else if (isTokenList({ Lexer::tok_subopen, Lexer::tok_subclose })) + { + if (tlist.empty()) + { + errLine(curToken(), "unexpected [ in function type argument list"); + return false; + } + index += 2; + + Lexer::TokenState fakePtr; + fakePtr.Token = Lexer::tok_op_mul; + fakePtr.CurLine = t.CurLine; + fakePtr.LineIndex = t.LineIndex; + if (tlist.size() > 1 && tlist.back().Is(Lexer::tok_identifier)) + { + tlist.insert(tlist.end() - 1, fakePtr); + } + else + { + tlist.push_back(fakePtr); + } + } + else if (t.Is(Lexer::tok_varargs)) + { + if (!tlist.empty()) + { + errLine(t, "unexpected ... in function type argument list"); + return false; + } + + index++; + if (!isToken(Lexer::tok_parclose)) + { + errLine(curToken(), "expected ) after ... in function type argument list"); + return false; + } + + Member am; + am.type = "..."; + fn.args.push_back(am); + break; + } + else if (t.Is(Lexer::tok_paropen)) + { + // TODO: support function pointers (requires recursion) + } + else + { + errLine(curToken(), "unsupported token in function type argument list"); + return false; + } + } + index++; + + if (tlist.empty()) + { + // Do nothing + } + else if (tlist.size() == 1 && tlist[0].Token == Lexer::tok_void) + { + if (!fn.args.empty()) + { + errLine(tlist[0], "invalid argument type: void"); + return false; + } + return true; + } + else if (!finalizeArgument()) + { + return false; + } + + if (!isToken(Lexer::tok_semic)) + { + errLine(curToken(), "expected ; after function type"); + return false; + } + eatSemic(); + + return true; + }; auto parseMember = [&](StructUnion& su) { - std::vector memToks; + Member m; + bool sawPointer = false; + std::vector tlist; + auto startToken = curToken(); + + auto finalizeMember = [&]() + { + if (tlist.size() < 2) + { + errLine(startToken, "not enough tokens in member"); + return false; + } + + if (!parseVariable(tlist, m.type, m.isConst, m.name)) + return false; + + if (m.type == "void") + { + errLine(startToken, "void is not a valid member type"); + return false; + } + + if (m.type.empty() || m.name.empty()) + __debugbreak(); + + su.members.push_back(m); + return true; + }; + while (!isToken(Lexer::tok_semic)) { if (isToken(Lexer::tok_eof)) @@ -62,88 +357,122 @@ bool ParseTypes(const std::string& parse, const std::string& owner, std::vector< errLine(curToken(), "unexpected eof in member"); return false; } - memToks.push_back(curToken()); - index++; - } - if (memToks.empty()) - { - errLine(curToken(), "unexpected ; in member"); - return false; - } - eatSemic(); - if (memToks.size() >= 2) //at least type name; - { - Member m; - for (size_t i = 0; i < memToks.size(); i++) + + if (isToken(Lexer::tok_struct) || isToken(Lexer::tok_union) || isToken(Lexer::tok_enum)) { - const auto& t = memToks[i]; - if (t.Token == Lexer::tok_subopen) + if (tlist.empty() && getToken(index + 1).Token == Lexer::tok_identifier) { - if (i + 1 >= memToks.size()) - { - errLine(memToks.back(), "unexpected end after ["); - return false; - } - if (memToks[i + 1].Token != Lexer::tok_number) - { - errLine(memToks[i + 1], "expected number token"); - return false; - } - m.arrsize = int(memToks[i + 1].NumberVal); - if (i + 2 >= memToks.size()) - { - errLine(memToks.back(), "unexpected end, expected ]"); - return false; - } - if (memToks[i + 2].Token != Lexer::tok_subclose) - { - errLine(memToks[i + 2], StringUtils::sprintf("expected ], got %s", lexer.TokString(memToks[i + 2]).c_str())); - return false; - } - if (i + 2 != memToks.size() - 1) - { - errLine(memToks[i + 3], "too many tokens"); - return false; - } - break; - } - else if (i + 1 == memToks.size() || - memToks[i + 1].Token == Lexer::tok_subopen || - memToks[i+1].Token == Lexer::tok_comma) - { - m.name = lexer.TokString(memToks[i]); - } - else if (t.Token == Lexer::tok_comma) //uint32_t a,b; - { - // Flush the current member, inherit the type and continue - su.members.push_back(m); - auto cm = Member(); - cm.type = m.type; - while (!cm.type.empty() && cm.type.back() == '*') - cm.type.pop_back(); - m = cm; - } - else if (!t.IsType() && - t.Token != Lexer::tok_op_mul && - t.Token != Lexer::tok_identifier && - t.Token != Lexer::tok_void) - { - errLine(t, StringUtils::sprintf("token %s is not a type...", lexer.TokString(t).c_str())); - return false; + index++; } else { - if (!m.type.empty() && t.Token != Lexer::tok_op_mul) - m.type.push_back(' '); - m.type += lexer.TokString(t); + errLine(curToken(), "unsupported struct/union/enum in member"); + return false; } } - //dprintf("member: %s %s;\n", m.type.c_str(), m.name.c_str()); - su.members.push_back(m); - return true; + + const auto& t = curToken(); + if (t.IsType() || t.Is(Lexer::tok_identifier) || t.Is(Lexer::tok_const)) + { + index++; + // Primitive type / name + tlist.push_back(t); + } + else if (t.Is(Lexer::tok_op_mul)) + { + // Pointer to the type on the left + if (tlist.empty()) + { + errLine(curToken(), "unexpected * in member"); + return false; + } + + if (sawPointer && tlist.back().Token != Lexer::tok_op_mul) + { + errLine(curToken(), "unexpected * in member"); + return false; + } + + index++; + + tlist.push_back(t); + sawPointer = true; + } + else if (t.Is(Lexer::tok_subopen)) + { + index++; + + // Array + if (!isToken(Lexer::tok_number)) + { + errLine(curToken(), "expected number token after array"); + return false; + } + m.arrsize = (int)curToken().NumberVal; + index++; + + if (!isToken(Lexer::tok_subclose)) + { + errLine(curToken(), "expected ] after array size"); + return false; + } + index++; + + break; + } + else if (t.Is(Lexer::tok_paropen)) + { + index++; + + // Function pointer type + Function fn; + if (!parseFunction(tlist, fn, true)) + { + return false; + } + // TODO: put the function somewhere + + printf("TODO function pointer: %s\n", fn.name.c_str()); + + return true; + } + else if (t.Is(Lexer::tok_comma)) + { + // Comma-separated members + index++; + + if (!finalizeMember()) + return false; + + // Remove the name from the type + if (tlist.back().Token != Lexer::tok_identifier) + __debugbreak(); + tlist.pop_back(); + + // Remove the pointer from the type + while (!tlist.empty() && tlist.back().Token == Lexer::tok_op_mul) + tlist.pop_back(); + sawPointer = false; + + m = Member(); + } + else + { + __debugbreak(); + } } - errLine(memToks.back(), "not enough tokens for member"); - return false; + + if (!isToken(Lexer::tok_semic)) + { + errLine(curToken(), "expected ; after member"); + return false; + } + eatSemic(); + + if (!finalizeMember()) + return false; + + return true; }; auto parseStructUnion = [&]() { @@ -177,8 +506,8 @@ bool ParseTypes(const std::string& parse, const std::string& owner, std::vector< if (!parseMember(su)) return false; } - index++; //eat tok_brclose - //dprintf("%s %s, members: %d\n", su.isunion ? "union" : "struct", su.name.c_str(), int(su.members.size())); + index++; + model.structUnions.push_back(su); if (!isToken(Lexer::tok_semic)) { @@ -213,8 +542,7 @@ bool ParseTypes(const std::string& parse, const std::string& owner, std::vector< { e.name = lexer.TokString(curToken()); index += 2; - if (e.name == "BNFunctionGraphType") - __debugbreak(); + while (!isToken(Lexer::tok_brclose)) { if (isToken(Lexer::tok_eof)) @@ -307,175 +635,77 @@ bool ParseTypes(const std::string& parse, const std::string& owner, std::vector< { // TODO: support "typedef struct foo { members... };" // TODO: support "typedef enum foo { members... };" + if (isToken(Lexer::tok_typedef)) { index++; - std::vector tdList; - while (true) + auto startToken = curToken(); + + bool sawPointer = false; + std::vector tlist; + while (!isToken(Lexer::tok_semic)) { if (isToken(Lexer::tok_eof)) { errLine(curToken(), "unexpected eof in typedef"); return false; } - if (isToken(Lexer::tok_semic)) + + if (isToken(Lexer::tok_struct) || isToken(Lexer::tok_union) || isToken(Lexer::tok_enum)) { - index++; - __debugbreak(); - break; - } - if (isToken(Lexer::tok_struct) || isToken(Lexer::tok_enum)) - { - // TODO - __debugbreak(); + if (tlist.empty() && getToken(index + 1).Token == Lexer::tok_identifier) + { + index++; + } + else + { + errLine(curToken(), "unsupported struct/union/enum in typedef"); + return false; + } } const auto& t = curToken(); - if (t.IsType() || t.Token == Lexer::tok_identifier || t.Token == Lexer::tok_void) + if (t.IsType() || t.Token == Lexer::tok_identifier || t.Token == Lexer::tok_const) { // Primitive type index++; - tdList.push_back(lexer.TokString(t)); + tlist.push_back(t); } else if (t.Token == Lexer::tok_op_mul) { // Pointer to the type on the left - if (tdList.empty()) + if (tlist.empty()) { - errLine(curToken(), "unexpected * in function typedef"); + errLine(curToken(), "unexpected * in member"); return false; } + + if (sawPointer && tlist.back().Token != Lexer::tok_op_mul) + { + errLine(curToken(), "unexpected * in member"); + return false; + } + + tlist.push_back(t); + sawPointer = true; + index++; - tdList.back().push_back('*'); } else if (t.Token == Lexer::tok_paropen) { // Function pointer type - if (tdList.empty()) - { - errLine(curToken(), "expected return type before function typedef"); - return false; - } - // TODO: calling conventions + index++; - if (!isToken(Lexer::tok_op_mul)) - { - errLine(curToken(), "expected * in function typedef"); - return false; - } - index++; - if (!isToken(Lexer::tok_identifier)) - { - errLine(curToken(), "expected identifier in function typedef"); - return false; - } Function fn; - fn.name = lexer.TokString(curToken()); - index++; - - if (!isToken(Lexer::tok_parclose)) + if (!parseFunction(tlist, fn, true)) { - errLine(curToken(), "expected ) after function typedef name"); return false; } - index++; - if (!isToken(Lexer::tok_paropen)) - { - errLine(curToken(), "expected ( for start of parameter list in function typedef"); - return false; - } - index++; + // TODO: put the function somewhere - for (const auto& type : tdList) - { - if (!fn.rettype.empty()) - fn.rettype += ' '; - fn.rettype += type; - } - - Member arg; - while (!isToken(Lexer::tok_parclose)) - { - if (!fn.args.empty()) - { - if (isToken(Lexer::tok_comma)) - { - index++; - fn.args.push_back(arg); - } - else - { - errLine(curToken(), "expected comma in function typedef argument list"); - return false; - } - } - - const auto& t = curToken(); - if (t.Token == Lexer::tok_void) - { - // empty argument list - index++; - if (!fn.args.empty()) - { - errLine(t, "void only allowed in an empty function typedef argument list"); - return false; - } - if (!isToken(Lexer::tok_parclose)) - { - errLine(curToken(), "expected ) after void in function typedef argument list"); - return false; - } - break; - } - else if (t.IsType() || t.Token == Lexer::tok_identifier) - { - // Primitive type - index++; - if (!arg.type.empty()) - { - if (arg.type.back() == '*') - { - errLine(t, "unexpected type after * in function typedef argument list"); - return false; - } - arg.type.push_back(' '); - } - arg.type += lexer.TokString(t); - } - else if (t.Token == Lexer::tok_op_mul) - { - // Pointer to the type on the left - if (arg.type.empty()) - { - errLine(curToken(), "unexpected * in function typedef argument list"); - return false; - } - index++; - fn.args.back().type.push_back('*'); - } - else - { - errLine(curToken(), "unsupported token in function typedef argument list"); - return false; - } - } - index++; - - if (!arg.type.empty()) - { - fn.args.push_back(arg); - } - - if (!isToken(Lexer::tok_semic)) - { - errLine(curToken(), "expected ; after function typedef"); - return false; - } - eatSemic(); - - // TODO: put the fn somewhere + printf("TODO function pointer: %s\n", fn.name.c_str()); return true; } @@ -484,53 +714,96 @@ bool ParseTypes(const std::string& parse, const std::string& owner, std::vector< __debugbreak(); } } + eatSemic(); - __debugbreak(); - - std::vector tdefToks; - while (!isToken(Lexer::tok_semic)) + if (tlist.size() < 2) { - - tdefToks.push_back(curToken()); - index++; - } - if (tdefToks.empty()) - { - errLine(curToken(), "unexpected ; in typedef"); + errLine(startToken, "not enough tokens in typedef"); return false; } - eatSemic(); - if (tdefToks.size() >= 2) //at least typedef a b; - { - Member tm; - tm.name = lexer.TokString(tdefToks[tdefToks.size() - 1]); - tdefToks.pop_back(); - for (auto& t : tdefToks) - { - if (!t.IsType() && - t.Token != Lexer::tok_op_mul && - t.Token != Lexer::tok_identifier && - t.Token != Lexer::tok_void) - { - errLine(t, StringUtils::sprintf("token %s is not a type...", lexer.TokString(t).c_str())); - return false; - } - else - { - if (!tm.type.empty() && t.Token != Lexer::tok_op_mul) - tm.type.push_back(' '); - tm.type += lexer.TokString(t); - } - } - //dprintf("typedef %s:%s\n", tm.type.c_str(), tm.name.c_str()); - model.types.push_back(tm); - return true; - } - errLine(tdefToks.back(), "not enough tokens for typedef"); - return false; + + Member tm; + if (!parseVariable(tlist, tm.type, tm.isConst, tm.name)) + return false; + model.types.push_back(tm); } return true; }; + auto parseFunctionTop = [&]() + { + bool sawPointer = false; + std::vector tlist; + while (!isToken(Lexer::tok_semic)) + { + if (isToken(Lexer::tok_eof)) + { + errLine(curToken(), "unexpected eof in function"); + return false; + } + + if (isToken(Lexer::tok_struct) || isToken(Lexer::tok_union) || isToken(Lexer::tok_enum)) + { + if (tlist.empty() && getToken(index + 1).Token == Lexer::tok_identifier) + { + index++; + } + else + { + errLine(curToken(), "unexpected struct/union/enum in function"); + return false; + } + } + + const auto& t = curToken(); + if (t.IsType() || t.Is(Lexer::tok_identifier) || t.Is(Lexer::tok_const)) + { + index++; + // Primitive type / name + tlist.push_back(t); + } + else if (t.Is(Lexer::tok_op_mul)) + { + // Pointer to the type on the left + if (tlist.empty()) + { + errLine(curToken(), "unexpected * in function"); + return false; + } + + if (sawPointer && tlist.back().Token != Lexer::tok_op_mul) + { + errLine(curToken(), "unexpected * in function"); + return false; + } + + index++; + + tlist.push_back(t); + sawPointer = true; + } + else if (t.Is(Lexer::tok_paropen)) + { + index++; + + // Function pointer type + Function fn; + if (!parseFunction(tlist, fn, false)) + { + return false; + } + // TODO: put the function somewhere + + printf("TODO function: %s\n", fn.name.c_str()); + + return true; + } + else + { + __debugbreak(); + } + } + return false; + }; while (!isToken(Lexer::tok_eof)) { @@ -544,8 +817,10 @@ bool ParseTypes(const std::string& parse, const std::string& owner, std::vector< eatSemic(); if (curIndex == index) { - errLine(curToken(), StringUtils::sprintf("unexpected token %s", lexer.TokString(curToken()).c_str())); - return false; + if (!parseFunctionTop()) + return false; + else + continue; } }