Add better error reporting in the TokenState

This commit is contained in:
Duncan Ogilvie 2023-11-27 16:20:57 +01:00
parent 962b648019
commit 7b2e382f37
3 changed files with 44 additions and 22 deletions

View File

@ -57,7 +57,7 @@ bool Lexer::DoLexing(std::vector<TokenState> & tokens, std::string & error)
mState.Token = token;
if(token == tok_error)
{
error = StringUtils::sprintf("line %d, col %d: %s", mState.CurLine + 1, mState.LineIndex, mError.c_str());
error = StringUtils::sprintf("line %d, col %d: %s", mState.CurLine + 1, mState.LineIndex, mState.ErrorMessage.c_str());
return false;
}
tokens.push_back(mState);
@ -148,7 +148,7 @@ Lexer::Token Lexer::getToken(size_t & tokenLineIndex)
if(mLastChar == '\r' || mLastChar == '\n')
return reportError("unexpected newline in character literal (2)");
if(mLastChar == '\'' || mLastChar == '\"' || mLastChar == '?' || mLastChar == '\\')
mLastChar = mLastChar;
;
else if(mLastChar == 'a')
mLastChar = '\a';
else if(mLastChar == 'b')
@ -215,7 +215,7 @@ Lexer::Token Lexer::getToken(size_t & tokenLineIndex)
if(mLastChar == '\r' || mLastChar == '\n')
return reportError("unexpected newline in string literal (2)");
if(mLastChar == '\'' || mLastChar == '\"' || mLastChar == '?' || mLastChar == '\\')
mLastChar = mLastChar;
;
else if(mLastChar == 'a')
mLastChar = '\a';
else if(mLastChar == 'b')
@ -303,7 +303,7 @@ Lexer::Token Lexer::getToken(size_t & tokenLineIndex)
auto error = convertNumber(mNumStr.c_str(), mState.NumberVal, 16);
if(error)
return reportError(StringUtils::sprintf("convertNumber failed (%s) on hexadecimal number", error));
mIsHexNumberVal = true;
mState.IsHexNumber = true;
return tok_number;
}
@ -318,7 +318,7 @@ Lexer::Token Lexer::getToken(size_t & tokenLineIndex)
auto error = convertNumber(mNumStr.c_str(), mState.NumberVal, 10);
if(error)
return reportError(StringUtils::sprintf("convertNumber failed (%s) on decimal number", error));
mIsHexNumberVal = false;
mState.IsHexNumber = false;
return tok_number;
}
@ -395,7 +395,7 @@ Lexer::Token Lexer::getToken(size_t & tokenLineIndex)
Lexer::Token Lexer::reportError(const std::string & error)
{
mError = error;
mState.ErrorMessage = error;
return tok_error;
}
@ -414,16 +414,20 @@ void Lexer::resetLexerState()
mInput.clear();
mInput.reserve(1024 * 1024);
mIndex = 0;
mError.clear();
mWarnings.clear();
clearReserve(mState.IdentifierStr);
mIsHexNumberVal = false;
clearReserve(mState.StringLit);
clearReserve(mNumStr, 16);
mLastChar = ' ';
mState.Clear();
}
std::unordered_map<std::string, Lexer::Token> Lexer::mKeywordMap;
std::unordered_map<Lexer::Token, std::string> Lexer::mReverseTokenMap;
std::unordered_map<int, Lexer::Token> Lexer::mOpTripleMap;
std::unordered_map<int, Lexer::Token> Lexer::mOpDoubleMap;
std::unordered_map<int, Lexer::Token> Lexer::mOpSingleMap;
void Lexer::setupTokenMaps()
{
//setup keyword map
@ -461,11 +465,11 @@ std::string Lexer::TokString(const TokenState & ts)
case tok_eof:
return "tok_eof";
case tok_error:
return StringUtils::sprintf("error(line %d, col %d, \"%s\")", ts.CurLine + 1, ts.LineIndex, mError.c_str());
return StringUtils::sprintf("error(line %d, col %d, \"%s\")", ts.CurLine + 1, ts.LineIndex, ts.ErrorMessage.c_str());
case tok_identifier:
return ts.IdentifierStr;
case tok_number:
return StringUtils::sprintf(mIsHexNumberVal ? "0x%llX" : "%llu", ts.NumberVal);
return StringUtils::sprintf(ts.IsHexNumber ? "0x%llX" : "%llu", ts.NumberVal);
case tok_stringlit:
return StringUtils::sprintf("\"%s\"", StringUtils::Escape(ts.StringLit).c_str());
case tok_charlit:
@ -491,11 +495,11 @@ std::string Lexer::TokString(Token tok)
case tok_eof:
return "tok_eof";
case tok_error:
return StringUtils::sprintf("error(line %d, col %d, \"%s\")", mState.CurLine + 1, mState.LineIndex, mError.c_str());
return StringUtils::sprintf("error(line %d, col %d, \"%s\")", mState.CurLine + 1, mState.LineIndex, mState.ErrorMessage.c_str());
case tok_identifier:
return mState.IdentifierStr;
case tok_number:
return StringUtils::sprintf(mIsHexNumberVal ? "0x%llX" : "%llu", mState.NumberVal);
return StringUtils::sprintf(mState.IsHexNumber ? "0x%llX" : "%llu", mState.NumberVal);
case tok_stringlit:
return StringUtils::sprintf("\"%s\"", StringUtils::Escape(mState.StringLit).c_str());
case tok_charlit:

View File

@ -41,8 +41,10 @@ public:
Token Token = tok_eof;
std::string IdentifierStr; //tok_identifier
uint64_t NumberVal = 0; //tok_number
bool IsHexNumber = false; //tok_number
std::string StringLit; //tok_stringlit
char CharLit = '\0'; //tok_charlit
std::string ErrorMessage; //tok_error
size_t CurLine = 0;
size_t LineIndex = 0;
@ -61,8 +63,24 @@ public:
{
IdentifierStr.clear();
NumberVal = 0;
IsHexNumber = false;
StringLit.clear();
CharLit = '\0';
ErrorMessage.clear();
}
void Throw(const std::string& reason) const
{
std::string message;
message += "[";
message += std::to_string(CurLine + 1);
message += ":";
message += std::to_string(LineIndex);
message += " ";
message += reason;
message += "] ";
message += Lexer::TokString(*this);
throw std::runtime_error(message);
}
};
@ -72,25 +90,25 @@ public:
bool DoLexing(std::vector<TokenState> & tokens, std::string & error);
bool Test(const std::function<void(const std::string & line)> & lexEnum, bool output = true);
std::string TokString(Token tok);
std::string TokString(const TokenState & ts);
static std::string TokString(const TokenState & ts);
private:
TokenState mState;
std::vector<std::string> mWarnings;
std::string mError;
//std::string mError;
std::vector<uint8_t> mInput;
size_t mIndex = 0;
bool mIsHexNumberVal = false;
std::string mNumStr;
int mLastChar = ' ';
std::unordered_map<std::string, Token> mKeywordMap;
std::unordered_map<Token, std::string> mReverseTokenMap;
std::unordered_map<int, Token> mOpTripleMap;
std::unordered_map<int, Token> mOpDoubleMap;
std::unordered_map<int, Token> mOpSingleMap;
static std::unordered_map<std::string, Token> mKeywordMap;
static std::unordered_map<Token, std::string> mReverseTokenMap;
static std::unordered_map<int, Token> mOpTripleMap;
static std::unordered_map<int, Token> mOpDoubleMap;
static std::unordered_map<int, Token> mOpSingleMap;
void resetLexerState();
void setupTokenMaps();
static void setupTokenMaps();
Token reportError(const std::string & error);
void reportWarning(const std::string & warning);
int peekChar(size_t distance = 0);

View File

@ -901,7 +901,7 @@ struct Parser
}
else
{
__debugbreak();
t.Throw("unsupported token in typedef");
}
}
eatSemic();