From 7b2e382f37fb291cc4ca7d930fba165a99abc4d6 Mon Sep 17 00:00:00 2001 From: Duncan Ogilvie Date: Mon, 27 Nov 2023 16:20:57 +0100 Subject: [PATCH] Add better error reporting in the TokenState --- btparser/lexer.cpp | 28 ++++++++++++++++------------ btparser/lexer.h | 36 +++++++++++++++++++++++++++--------- btparser/typesparser.cpp | 2 +- 3 files changed, 44 insertions(+), 22 deletions(-) diff --git a/btparser/lexer.cpp b/btparser/lexer.cpp index 71514ec..ff2ba19 100644 --- a/btparser/lexer.cpp +++ b/btparser/lexer.cpp @@ -57,7 +57,7 @@ bool Lexer::DoLexing(std::vector & tokens, std::string & error) mState.Token = token; if(token == tok_error) { - error = StringUtils::sprintf("line %d, col %d: %s", mState.CurLine + 1, mState.LineIndex, mError.c_str()); + error = StringUtils::sprintf("line %d, col %d: %s", mState.CurLine + 1, mState.LineIndex, mState.ErrorMessage.c_str()); return false; } tokens.push_back(mState); @@ -148,7 +148,7 @@ Lexer::Token Lexer::getToken(size_t & tokenLineIndex) if(mLastChar == '\r' || mLastChar == '\n') return reportError("unexpected newline in character literal (2)"); if(mLastChar == '\'' || mLastChar == '\"' || mLastChar == '?' || mLastChar == '\\') - mLastChar = mLastChar; + ; else if(mLastChar == 'a') mLastChar = '\a'; else if(mLastChar == 'b') @@ -215,7 +215,7 @@ Lexer::Token Lexer::getToken(size_t & tokenLineIndex) if(mLastChar == '\r' || mLastChar == '\n') return reportError("unexpected newline in string literal (2)"); if(mLastChar == '\'' || mLastChar == '\"' || mLastChar == '?' || mLastChar == '\\') - mLastChar = mLastChar; + ; else if(mLastChar == 'a') mLastChar = '\a'; else if(mLastChar == 'b') @@ -303,7 +303,7 @@ Lexer::Token Lexer::getToken(size_t & tokenLineIndex) auto error = convertNumber(mNumStr.c_str(), mState.NumberVal, 16); if(error) return reportError(StringUtils::sprintf("convertNumber failed (%s) on hexadecimal number", error)); - mIsHexNumberVal = true; + mState.IsHexNumber = true; return tok_number; } @@ -318,7 +318,7 @@ Lexer::Token Lexer::getToken(size_t & tokenLineIndex) auto error = convertNumber(mNumStr.c_str(), mState.NumberVal, 10); if(error) return reportError(StringUtils::sprintf("convertNumber failed (%s) on decimal number", error)); - mIsHexNumberVal = false; + mState.IsHexNumber = false; return tok_number; } @@ -395,7 +395,7 @@ Lexer::Token Lexer::getToken(size_t & tokenLineIndex) Lexer::Token Lexer::reportError(const std::string & error) { - mError = error; + mState.ErrorMessage = error; return tok_error; } @@ -414,16 +414,20 @@ void Lexer::resetLexerState() mInput.clear(); mInput.reserve(1024 * 1024); mIndex = 0; - mError.clear(); mWarnings.clear(); clearReserve(mState.IdentifierStr); - mIsHexNumberVal = false; clearReserve(mState.StringLit); clearReserve(mNumStr, 16); mLastChar = ' '; mState.Clear(); } +std::unordered_map Lexer::mKeywordMap; +std::unordered_map Lexer::mReverseTokenMap; +std::unordered_map Lexer::mOpTripleMap; +std::unordered_map Lexer::mOpDoubleMap; +std::unordered_map Lexer::mOpSingleMap; + void Lexer::setupTokenMaps() { //setup keyword map @@ -461,11 +465,11 @@ std::string Lexer::TokString(const TokenState & ts) case tok_eof: return "tok_eof"; case tok_error: - return StringUtils::sprintf("error(line %d, col %d, \"%s\")", ts.CurLine + 1, ts.LineIndex, mError.c_str()); + return StringUtils::sprintf("error(line %d, col %d, \"%s\")", ts.CurLine + 1, ts.LineIndex, ts.ErrorMessage.c_str()); case tok_identifier: return ts.IdentifierStr; case tok_number: - return StringUtils::sprintf(mIsHexNumberVal ? "0x%llX" : "%llu", ts.NumberVal); + return StringUtils::sprintf(ts.IsHexNumber ? "0x%llX" : "%llu", ts.NumberVal); case tok_stringlit: return StringUtils::sprintf("\"%s\"", StringUtils::Escape(ts.StringLit).c_str()); case tok_charlit: @@ -491,11 +495,11 @@ std::string Lexer::TokString(Token tok) case tok_eof: return "tok_eof"; case tok_error: - return StringUtils::sprintf("error(line %d, col %d, \"%s\")", mState.CurLine + 1, mState.LineIndex, mError.c_str()); + return StringUtils::sprintf("error(line %d, col %d, \"%s\")", mState.CurLine + 1, mState.LineIndex, mState.ErrorMessage.c_str()); case tok_identifier: return mState.IdentifierStr; case tok_number: - return StringUtils::sprintf(mIsHexNumberVal ? "0x%llX" : "%llu", mState.NumberVal); + return StringUtils::sprintf(mState.IsHexNumber ? "0x%llX" : "%llu", mState.NumberVal); case tok_stringlit: return StringUtils::sprintf("\"%s\"", StringUtils::Escape(mState.StringLit).c_str()); case tok_charlit: diff --git a/btparser/lexer.h b/btparser/lexer.h index 96f4acc..c79094e 100644 --- a/btparser/lexer.h +++ b/btparser/lexer.h @@ -41,8 +41,10 @@ public: Token Token = tok_eof; std::string IdentifierStr; //tok_identifier uint64_t NumberVal = 0; //tok_number + bool IsHexNumber = false; //tok_number std::string StringLit; //tok_stringlit char CharLit = '\0'; //tok_charlit + std::string ErrorMessage; //tok_error size_t CurLine = 0; size_t LineIndex = 0; @@ -61,8 +63,24 @@ public: { IdentifierStr.clear(); NumberVal = 0; + IsHexNumber = false; StringLit.clear(); CharLit = '\0'; + ErrorMessage.clear(); + } + + void Throw(const std::string& reason) const + { + std::string message; + message += "["; + message += std::to_string(CurLine + 1); + message += ":"; + message += std::to_string(LineIndex); + message += " "; + message += reason; + message += "] "; + message += Lexer::TokString(*this); + throw std::runtime_error(message); } }; @@ -72,25 +90,25 @@ public: bool DoLexing(std::vector & tokens, std::string & error); bool Test(const std::function & lexEnum, bool output = true); std::string TokString(Token tok); - std::string TokString(const TokenState & ts); + static std::string TokString(const TokenState & ts); private: TokenState mState; std::vector mWarnings; - std::string mError; + //std::string mError; std::vector mInput; size_t mIndex = 0; - bool mIsHexNumberVal = false; std::string mNumStr; int mLastChar = ' '; - std::unordered_map mKeywordMap; - std::unordered_map mReverseTokenMap; - std::unordered_map mOpTripleMap; - std::unordered_map mOpDoubleMap; - std::unordered_map mOpSingleMap; + + static std::unordered_map mKeywordMap; + static std::unordered_map mReverseTokenMap; + static std::unordered_map mOpTripleMap; + static std::unordered_map mOpDoubleMap; + static std::unordered_map mOpSingleMap; void resetLexerState(); - void setupTokenMaps(); + static void setupTokenMaps(); Token reportError(const std::string & error); void reportWarning(const std::string & warning); int peekChar(size_t distance = 0); diff --git a/btparser/typesparser.cpp b/btparser/typesparser.cpp index 8f691d1..fde5ced 100644 --- a/btparser/typesparser.cpp +++ b/btparser/typesparser.cpp @@ -901,7 +901,7 @@ struct Parser } else { - __debugbreak(); + t.Throw("unsupported token in typedef"); } } eatSemic();