Add better error reporting in the TokenState

This commit is contained in:
Duncan Ogilvie 2023-11-27 16:20:57 +01:00
parent 962b648019
commit 7b2e382f37
3 changed files with 44 additions and 22 deletions

View File

@ -57,7 +57,7 @@ bool Lexer::DoLexing(std::vector<TokenState> & tokens, std::string & error)
mState.Token = token; mState.Token = token;
if(token == tok_error) if(token == tok_error)
{ {
error = StringUtils::sprintf("line %d, col %d: %s", mState.CurLine + 1, mState.LineIndex, mError.c_str()); error = StringUtils::sprintf("line %d, col %d: %s", mState.CurLine + 1, mState.LineIndex, mState.ErrorMessage.c_str());
return false; return false;
} }
tokens.push_back(mState); tokens.push_back(mState);
@ -148,7 +148,7 @@ Lexer::Token Lexer::getToken(size_t & tokenLineIndex)
if(mLastChar == '\r' || mLastChar == '\n') if(mLastChar == '\r' || mLastChar == '\n')
return reportError("unexpected newline in character literal (2)"); return reportError("unexpected newline in character literal (2)");
if(mLastChar == '\'' || mLastChar == '\"' || mLastChar == '?' || mLastChar == '\\') if(mLastChar == '\'' || mLastChar == '\"' || mLastChar == '?' || mLastChar == '\\')
mLastChar = mLastChar; ;
else if(mLastChar == 'a') else if(mLastChar == 'a')
mLastChar = '\a'; mLastChar = '\a';
else if(mLastChar == 'b') else if(mLastChar == 'b')
@ -215,7 +215,7 @@ Lexer::Token Lexer::getToken(size_t & tokenLineIndex)
if(mLastChar == '\r' || mLastChar == '\n') if(mLastChar == '\r' || mLastChar == '\n')
return reportError("unexpected newline in string literal (2)"); return reportError("unexpected newline in string literal (2)");
if(mLastChar == '\'' || mLastChar == '\"' || mLastChar == '?' || mLastChar == '\\') if(mLastChar == '\'' || mLastChar == '\"' || mLastChar == '?' || mLastChar == '\\')
mLastChar = mLastChar; ;
else if(mLastChar == 'a') else if(mLastChar == 'a')
mLastChar = '\a'; mLastChar = '\a';
else if(mLastChar == 'b') else if(mLastChar == 'b')
@ -303,7 +303,7 @@ Lexer::Token Lexer::getToken(size_t & tokenLineIndex)
auto error = convertNumber(mNumStr.c_str(), mState.NumberVal, 16); auto error = convertNumber(mNumStr.c_str(), mState.NumberVal, 16);
if(error) if(error)
return reportError(StringUtils::sprintf("convertNumber failed (%s) on hexadecimal number", error)); return reportError(StringUtils::sprintf("convertNumber failed (%s) on hexadecimal number", error));
mIsHexNumberVal = true; mState.IsHexNumber = true;
return tok_number; return tok_number;
} }
@ -318,7 +318,7 @@ Lexer::Token Lexer::getToken(size_t & tokenLineIndex)
auto error = convertNumber(mNumStr.c_str(), mState.NumberVal, 10); auto error = convertNumber(mNumStr.c_str(), mState.NumberVal, 10);
if(error) if(error)
return reportError(StringUtils::sprintf("convertNumber failed (%s) on decimal number", error)); return reportError(StringUtils::sprintf("convertNumber failed (%s) on decimal number", error));
mIsHexNumberVal = false; mState.IsHexNumber = false;
return tok_number; return tok_number;
} }
@ -395,7 +395,7 @@ Lexer::Token Lexer::getToken(size_t & tokenLineIndex)
Lexer::Token Lexer::reportError(const std::string & error) Lexer::Token Lexer::reportError(const std::string & error)
{ {
mError = error; mState.ErrorMessage = error;
return tok_error; return tok_error;
} }
@ -414,16 +414,20 @@ void Lexer::resetLexerState()
mInput.clear(); mInput.clear();
mInput.reserve(1024 * 1024); mInput.reserve(1024 * 1024);
mIndex = 0; mIndex = 0;
mError.clear();
mWarnings.clear(); mWarnings.clear();
clearReserve(mState.IdentifierStr); clearReserve(mState.IdentifierStr);
mIsHexNumberVal = false;
clearReserve(mState.StringLit); clearReserve(mState.StringLit);
clearReserve(mNumStr, 16); clearReserve(mNumStr, 16);
mLastChar = ' '; mLastChar = ' ';
mState.Clear(); mState.Clear();
} }
std::unordered_map<std::string, Lexer::Token> Lexer::mKeywordMap;
std::unordered_map<Lexer::Token, std::string> Lexer::mReverseTokenMap;
std::unordered_map<int, Lexer::Token> Lexer::mOpTripleMap;
std::unordered_map<int, Lexer::Token> Lexer::mOpDoubleMap;
std::unordered_map<int, Lexer::Token> Lexer::mOpSingleMap;
void Lexer::setupTokenMaps() void Lexer::setupTokenMaps()
{ {
//setup keyword map //setup keyword map
@ -461,11 +465,11 @@ std::string Lexer::TokString(const TokenState & ts)
case tok_eof: case tok_eof:
return "tok_eof"; return "tok_eof";
case tok_error: case tok_error:
return StringUtils::sprintf("error(line %d, col %d, \"%s\")", ts.CurLine + 1, ts.LineIndex, mError.c_str()); return StringUtils::sprintf("error(line %d, col %d, \"%s\")", ts.CurLine + 1, ts.LineIndex, ts.ErrorMessage.c_str());
case tok_identifier: case tok_identifier:
return ts.IdentifierStr; return ts.IdentifierStr;
case tok_number: case tok_number:
return StringUtils::sprintf(mIsHexNumberVal ? "0x%llX" : "%llu", ts.NumberVal); return StringUtils::sprintf(ts.IsHexNumber ? "0x%llX" : "%llu", ts.NumberVal);
case tok_stringlit: case tok_stringlit:
return StringUtils::sprintf("\"%s\"", StringUtils::Escape(ts.StringLit).c_str()); return StringUtils::sprintf("\"%s\"", StringUtils::Escape(ts.StringLit).c_str());
case tok_charlit: case tok_charlit:
@ -491,11 +495,11 @@ std::string Lexer::TokString(Token tok)
case tok_eof: case tok_eof:
return "tok_eof"; return "tok_eof";
case tok_error: case tok_error:
return StringUtils::sprintf("error(line %d, col %d, \"%s\")", mState.CurLine + 1, mState.LineIndex, mError.c_str()); return StringUtils::sprintf("error(line %d, col %d, \"%s\")", mState.CurLine + 1, mState.LineIndex, mState.ErrorMessage.c_str());
case tok_identifier: case tok_identifier:
return mState.IdentifierStr; return mState.IdentifierStr;
case tok_number: case tok_number:
return StringUtils::sprintf(mIsHexNumberVal ? "0x%llX" : "%llu", mState.NumberVal); return StringUtils::sprintf(mState.IsHexNumber ? "0x%llX" : "%llu", mState.NumberVal);
case tok_stringlit: case tok_stringlit:
return StringUtils::sprintf("\"%s\"", StringUtils::Escape(mState.StringLit).c_str()); return StringUtils::sprintf("\"%s\"", StringUtils::Escape(mState.StringLit).c_str());
case tok_charlit: case tok_charlit:

View File

@ -41,8 +41,10 @@ public:
Token Token = tok_eof; Token Token = tok_eof;
std::string IdentifierStr; //tok_identifier std::string IdentifierStr; //tok_identifier
uint64_t NumberVal = 0; //tok_number uint64_t NumberVal = 0; //tok_number
bool IsHexNumber = false; //tok_number
std::string StringLit; //tok_stringlit std::string StringLit; //tok_stringlit
char CharLit = '\0'; //tok_charlit char CharLit = '\0'; //tok_charlit
std::string ErrorMessage; //tok_error
size_t CurLine = 0; size_t CurLine = 0;
size_t LineIndex = 0; size_t LineIndex = 0;
@ -61,8 +63,24 @@ public:
{ {
IdentifierStr.clear(); IdentifierStr.clear();
NumberVal = 0; NumberVal = 0;
IsHexNumber = false;
StringLit.clear(); StringLit.clear();
CharLit = '\0'; CharLit = '\0';
ErrorMessage.clear();
}
void Throw(const std::string& reason) const
{
std::string message;
message += "[";
message += std::to_string(CurLine + 1);
message += ":";
message += std::to_string(LineIndex);
message += " ";
message += reason;
message += "] ";
message += Lexer::TokString(*this);
throw std::runtime_error(message);
} }
}; };
@ -72,25 +90,25 @@ public:
bool DoLexing(std::vector<TokenState> & tokens, std::string & error); bool DoLexing(std::vector<TokenState> & tokens, std::string & error);
bool Test(const std::function<void(const std::string & line)> & lexEnum, bool output = true); bool Test(const std::function<void(const std::string & line)> & lexEnum, bool output = true);
std::string TokString(Token tok); std::string TokString(Token tok);
std::string TokString(const TokenState & ts); static std::string TokString(const TokenState & ts);
private: private:
TokenState mState; TokenState mState;
std::vector<std::string> mWarnings; std::vector<std::string> mWarnings;
std::string mError; //std::string mError;
std::vector<uint8_t> mInput; std::vector<uint8_t> mInput;
size_t mIndex = 0; size_t mIndex = 0;
bool mIsHexNumberVal = false;
std::string mNumStr; std::string mNumStr;
int mLastChar = ' '; int mLastChar = ' ';
std::unordered_map<std::string, Token> mKeywordMap;
std::unordered_map<Token, std::string> mReverseTokenMap; static std::unordered_map<std::string, Token> mKeywordMap;
std::unordered_map<int, Token> mOpTripleMap; static std::unordered_map<Token, std::string> mReverseTokenMap;
std::unordered_map<int, Token> mOpDoubleMap; static std::unordered_map<int, Token> mOpTripleMap;
std::unordered_map<int, Token> mOpSingleMap; static std::unordered_map<int, Token> mOpDoubleMap;
static std::unordered_map<int, Token> mOpSingleMap;
void resetLexerState(); void resetLexerState();
void setupTokenMaps(); static void setupTokenMaps();
Token reportError(const std::string & error); Token reportError(const std::string & error);
void reportWarning(const std::string & warning); void reportWarning(const std::string & warning);
int peekChar(size_t distance = 0); int peekChar(size_t distance = 0);

View File

@ -901,7 +901,7 @@ struct Parser
} }
else else
{ {
__debugbreak(); t.Throw("unsupported token in typedef");
} }
} }
eatSemic(); eatSemic();