diff --git a/btparser/btparser.vcxproj b/btparser/btparser.vcxproj
index f8d4fef..e72d0c2 100644
--- a/btparser/btparser.vcxproj
+++ b/btparser/btparser.vcxproj
@@ -20,6 +20,7 @@
+
@@ -28,6 +29,7 @@
+
diff --git a/btparser/btparser.vcxproj.filters b/btparser/btparser.vcxproj.filters
index b220af6..19271c0 100644
--- a/btparser/btparser.vcxproj.filters
+++ b/btparser/btparser.vcxproj.filters
@@ -24,6 +24,9 @@
Source Files
+
+ Source Files
+
@@ -47,5 +50,8 @@
Header Files
+
+ Header Files
+
\ No newline at end of file
diff --git a/btparser/lexer.cpp b/btparser/lexer.cpp
new file mode 100644
index 0000000..a0ba1ce
--- /dev/null
+++ b/btparser/lexer.cpp
@@ -0,0 +1,499 @@
+#include "lexer.h"
+#include "stringutils.h"
+#include
+#include "filehelper.h"
+
+#define MAKE_OP_TRIPLE(ch1, ch2, ch3) (ch3 << 16 | ch2 << 8 | ch1)
+#define MAKE_OP_DOUBLE(ch1, ch2) (ch2 << 8 | ch1)
+#define MAKE_OP_SINGLE(ch1) (ch1)
+#define DEFAULT_STRING_BUFFER 65536
+
+static void clearReserve(std::string & str, size_t reserve = DEFAULT_STRING_BUFFER)
+{
+ str.clear();
+ str.reserve(reserve);
+}
+
+static void appendCh(std::string & str, char ch)
+{
+ str.resize(str.size() + 1);
+ str[str.size() - 1] = ch;
+}
+
+static const char* convertNumber(const char* str, uint64_t & result, int radix)
+{
+ errno = 0;
+ char* end;
+ result = strtoull(str, &end, radix);
+ if (!result && end == str)
+ return "not a number";
+ if (result == ULLONG_MAX && errno)
+ return "does not fit";
+ if (*end)
+ return "str not completely consumed";
+ return nullptr;
+}
+
+Lexer::Lexer()
+{
+ setupTokenMaps();
+}
+
+bool Lexer::ReadInputFile(const std::string & filename)
+{
+ resetLexerState();
+ return FileHelper::ReadAllData(filename, mInput);
+}
+
+bool Lexer::DoLexing(std::vector & tokens, std::string & error)
+{
+ while (true)
+ {
+ auto token = getToken();
+ mState.Token = token;
+ if (token == tok_eof)
+ break;
+ if (token == tok_error)
+ {
+ error = StringUtils::sprintf("line %d, col %d: %s", mState.CurLine + 1, mState.LineIndex, mError.c_str());
+ return false;
+ }
+ tokens.push_back(mState);
+ }
+ return true;
+}
+
+bool Lexer::Test(const std::function & lexEnum, bool output)
+{
+ size_t line = 0;
+ if (output)
+ lexEnum("1: ");
+ Token tok;
+ std::string toks;
+ clearReserve(toks);
+ char newlineText[128] = "";
+ do
+ {
+ tok = getToken();
+ if (!output)
+ continue;
+ toks.clear();
+ while (line < mState.CurLine)
+ {
+ line++;
+ sprintf_s(newlineText, "\n%d: ", line + 1);
+ toks.append(newlineText);
+ }
+ toks.append(tokString(tok));
+ appendCh(toks, ' ');
+ lexEnum(toks);
+ } while (tok != tok_eof && tok != tok_error);
+ if (tok != tok_error && tok != tok_eof)
+ tok = reportError("lexer did not finish at the end of the file");
+ for (const auto& warning : mWarnings)
+ if (output)
+ lexEnum("\nwarning: " + warning);
+ return tok != tok_error;
+}
+
+Lexer::Token Lexer::getToken()
+{
+ //skip whitespace
+ while (isspace(mLastChar))
+ {
+ if (mLastChar == '\n')
+ signalNewLine();
+ nextChar();
+ }
+
+ //skip \\[\r\n]
+ if (mLastChar == '\\' && (peekChar() == '\r' || peekChar() == '\n'))
+ {
+ nextChar();
+ return getToken();
+ }
+
+ //character literal
+ if (mLastChar == '\'')
+ {
+ std::string charLit;
+ while (true)
+ {
+ nextChar();
+ if (mLastChar == EOF) //end of file
+ return reportError("unexpected end of file in character literal (1)");
+ if (mLastChar == '\r' || mLastChar == '\n')
+ return reportError("unexpected newline in character literal (1)");
+ if (mLastChar == '\'') //end of character literal
+ {
+ if (charLit.length() != 1)
+ return reportError(StringUtils::sprintf("invalid character literal '%s'", charLit.c_str()));
+ mState.CharLit = charLit[0];
+ nextChar();
+ return tok_charlit;
+ }
+ if (mLastChar == '\\') //escape sequence
+ {
+ nextChar();
+ if (mLastChar == EOF)
+ return reportError("unexpected end of file in character literal (2)");
+ if (mLastChar == '\r' || mLastChar == '\n')
+ return reportError("unexpected newline in character literal (2)");
+ if (mLastChar == '\'' || mLastChar == '\"' || mLastChar == '?' || mLastChar == '\\')
+ mLastChar = mLastChar;
+ else if (mLastChar == 'a')
+ mLastChar = '\a';
+ else if (mLastChar == 'b')
+ mLastChar = '\b';
+ else if (mLastChar == 'f')
+ mLastChar = '\f';
+ else if (mLastChar == 'n')
+ mLastChar = '\n';
+ else if (mLastChar == 'r')
+ mLastChar = '\r';
+ else if (mLastChar == 't')
+ mLastChar = '\t';
+ else if (mLastChar == 'v')
+ mLastChar = '\v';
+ else if (mLastChar == '0')
+ mLastChar = '\0';
+ else if (mLastChar == 'x') //\xHH
+ {
+ auto ch1 = nextChar();
+ auto ch2 = nextChar();
+ if (isxdigit(ch1) && isxdigit(ch2))
+ {
+ char byteStr[3] = "";
+ byteStr[0] = ch1;
+ byteStr[1] = ch2;
+ uint64_t hexData;
+ auto error = convertNumber(byteStr, hexData, 16);
+ if (error)
+ return reportError(StringUtils::sprintf("convertNumber failed (%s) for hex sequence \"\\x%c%c\" in character literal", error, ch1, ch2));
+ mLastChar = hexData & 0xFF;
+ }
+ else
+ return reportError(StringUtils::sprintf("invalid hex sequence \"\\x%c%c\" in character literal", ch1, ch2));
+ }
+ else
+ return reportError(StringUtils::sprintf("invalid escape sequence \"\\%c\" in character literal", mLastChar));
+ }
+ charLit += mLastChar;
+ }
+ }
+
+ //string literal
+ if (mLastChar == '\"')
+ {
+ mState.StringLit.clear();
+ while (true)
+ {
+ nextChar();
+ if (mLastChar == EOF) //end of file
+ return reportError("unexpected end of file in string literal (1)");
+ if (mLastChar == '\r' || mLastChar == '\n')
+ return reportError("unexpected newline in string literal (1)");
+ if (mLastChar == '\"') //end of string literal
+ {
+ nextChar();
+ return tok_stringlit;
+ }
+ if (mLastChar == '\\') //escape sequence
+ {
+ nextChar();
+ if (mLastChar == EOF)
+ return reportError("unexpected end of file in string literal (2)");
+ if (mLastChar == '\r' || mLastChar == '\n')
+ return reportError("unexpected newline in string literal (2)");
+ if (mLastChar == '\'' || mLastChar == '\"' || mLastChar == '?' || mLastChar == '\\')
+ mLastChar = mLastChar;
+ else if (mLastChar == 'a')
+ mLastChar = '\a';
+ else if (mLastChar == 'b')
+ mLastChar = '\b';
+ else if (mLastChar == 'f')
+ mLastChar = '\f';
+ else if (mLastChar == 'n')
+ mLastChar = '\n';
+ else if (mLastChar == 'r')
+ mLastChar = '\r';
+ else if (mLastChar == 't')
+ mLastChar = '\t';
+ else if (mLastChar == 'v')
+ mLastChar = '\v';
+ else if (mLastChar == '0')
+ mLastChar = '\0';
+ else if (mLastChar == 'x') //\xHH
+ {
+ auto ch1 = nextChar();
+ auto ch2 = nextChar();
+ if (isxdigit(ch1) && isxdigit(ch2))
+ {
+ char byteStr[3] = "";
+ byteStr[0] = ch1;
+ byteStr[1] = ch2;
+ uint64_t hexData;
+ auto error = convertNumber(byteStr, hexData, 16);
+ if (error)
+ return reportError(StringUtils::sprintf("convertNumber failed (%s) for hex sequence \"\\x%c%c\" in string literal", error, ch1, ch2));
+ mLastChar = hexData & 0xFF;
+ }
+ else
+ return reportError(StringUtils::sprintf("invalid hex sequence \"\\x%c%c\" in string literal", ch1, ch2));
+ }
+ else
+ return reportError(StringUtils::sprintf("invalid escape sequence \"\\%c\" in string literal", mLastChar));
+ }
+ appendCh(mState.StringLit, mLastChar);
+ }
+ }
+
+ //identifier/keyword
+ if (isalpha(mLastChar) || mLastChar == '_') //[a-zA-Z_]
+ {
+ mState.IdentifierStr = mLastChar;
+ nextChar();
+ while (isalnum(mLastChar) || mLastChar == '_') //[0-9a-zA-Z_]
+ {
+ appendCh(mState.IdentifierStr, mLastChar);
+ nextChar();
+ }
+
+ //keywords
+ auto found = mKeywordMap.find(mState.IdentifierStr);
+ if (found != mKeywordMap.end())
+ return found->second;
+
+ return tok_identifier;
+ }
+
+ //hex numbers
+ if (mLastChar == '0' && peekChar() == 'x') //0x
+ {
+ nextChar(); //consume the 'x'
+ mNumStr.clear();
+
+ while (isxdigit(nextChar())) //[0-9a-fA-F]*
+ appendCh(mNumStr, mLastChar);
+
+ if (!mNumStr.length()) //check for error condition
+ return reportError("no hex digits after \"0x\" prefix");
+
+ auto error = convertNumber(mNumStr.c_str(), mState.NumberVal, 16);
+ if (error)
+ return reportError(StringUtils::sprintf("convertNumber failed (%s) on hexadecimal number", error));
+ mIsHexNumberVal = true;
+ return tok_number;
+ }
+ if (isdigit(mLastChar)) //[0-9]
+ {
+ mNumStr = mLastChar;
+
+ while (isdigit(nextChar())) //[0-9]*
+ mNumStr += mLastChar;
+
+ auto error = convertNumber(mNumStr.c_str(), mState.NumberVal, 10);
+ if (error)
+ return reportError(StringUtils::sprintf("convertNumber failed (%s) on decimal number", error));
+ mIsHexNumberVal = false;
+ return tok_number;
+ }
+
+ //comments
+ if (mLastChar == '/' && peekChar() == '/') //line comment
+ {
+ do
+ {
+ if (mLastChar == '\n')
+ signalNewLine();
+ nextChar();
+ } while (!(mLastChar == EOF || mLastChar == '\n'));
+
+ return getToken(); //interpret the next line
+ }
+ if (mLastChar == '/' && peekChar() == '*') //block comment
+ {
+ do
+ {
+ if (mLastChar == '\n')
+ signalNewLine();
+ nextChar();
+ } while (!(mLastChar == EOF || mLastChar == '*' && peekChar() == '/'));
+
+ if (mLastChar == EOF) //unexpected end of file
+ {
+ mState.LineIndex++;
+ return reportError("unexpected end of file in block comment");
+ }
+
+ nextChar();
+ nextChar();
+ return getToken(); //get the next non-comment token
+ }
+
+ //operators
+ auto opFound = mOpTripleMap.find(MAKE_OP_TRIPLE(mLastChar, peekChar(), peekChar(1)));
+ if (opFound != mOpTripleMap.end())
+ {
+ nextChar();
+ nextChar();
+ nextChar();
+ return opFound->second;
+ }
+ opFound = mOpDoubleMap.find(MAKE_OP_DOUBLE(mLastChar, peekChar()));
+ if (opFound != mOpDoubleMap.end())
+ {
+ nextChar();
+ nextChar();
+ return opFound->second;
+ }
+ opFound = mOpSingleMap.find(MAKE_OP_SINGLE(mLastChar));
+ if (opFound != mOpSingleMap.end())
+ {
+ nextChar();
+ return opFound->second;
+ }
+
+ //end of file
+ if (mLastChar == EOF)
+ return tok_eof;
+
+ //unknown character
+ return reportError(StringUtils::sprintf("unexpected character \'%c\'", mLastChar));
+}
+
+Lexer::Token Lexer::reportError(const std::string & error)
+{
+ mError = error;
+ return tok_error;
+}
+
+int Lexer::nextChar()
+{
+ return mLastChar = readChar();
+}
+
+void Lexer::reportWarning(const std::string & warning)
+{
+ mWarnings.push_back(warning);
+}
+
+void Lexer::resetLexerState()
+{
+ mInput.clear();
+ mInput.reserve(1024 * 1024);
+ mIndex = 0;
+ mError.clear();
+ mWarnings.clear();
+ clearReserve(mState.IdentifierStr);
+ mState.NumberVal = 0;
+ mIsHexNumberVal = false;
+ clearReserve(mState.StringLit);
+ clearReserve(mNumStr, 16);
+ mState.CharLit = '\0';
+ mLastChar = ' ';
+ mState.CurLine = 0;
+ mState.LineIndex = 0;
+}
+
+void Lexer::setupTokenMaps()
+{
+ //setup keyword map
+#define DEF_KEYWORD(keyword) mKeywordMap[#keyword] = tok_##keyword;
+#include "keywords.h"
+#undef DEF_KEYWORD
+
+ //setup token maps
+#define DEF_OP_TRIPLE(enumval, ch1, ch2, ch3) mOpTripleMap[MAKE_OP_TRIPLE(ch1, ch2, ch3)] = tok_##enumval;
+#define DEF_OP_DOUBLE(enumval, ch1, ch2) mOpDoubleMap[MAKE_OP_DOUBLE(ch1, ch2)] = tok_##enumval;
+#define DEF_OP_SINGLE(enumval, ch1) mOpSingleMap[MAKE_OP_SINGLE(ch1)] = tok_##enumval;
+#include "operators.h"
+#undef DEF_OP_TRIPLE
+#undef DEF_OP_DOUBLE
+#undef DEF_OP_SINGLE
+
+ //setup reverse token maps
+#define DEF_KEYWORD(keyword) mReverseTokenMap[tok_##keyword] = #keyword;
+#include "keywords.h"
+#undef DEF_KEYWORD
+
+#define DEF_OP_TRIPLE(enumval, ch1, ch2, ch3) mReverseTokenMap[tok_##enumval] = std::string({ch1, ch2, ch3});
+#define DEF_OP_DOUBLE(enumval, ch1, ch2) mReverseTokenMap[tok_##enumval] = std::string({ch1, ch2});
+#define DEF_OP_SINGLE(enumval, ch1) mReverseTokenMap[tok_##enumval] = std::string({ch1});
+#include "operators.h"
+#undef DEF_OP_TRIPLE
+#undef DEF_OP_DOUBLE
+#undef DEF_OP_SINGLE
+}
+
+std::string Lexer::tokString(Token tok)
+{
+ switch (tok)
+ {
+ case tok_eof: return "tok_eof";
+ case tok_error: return StringUtils::sprintf("error(line %d, col %d, \"%s\")", mState.CurLine + 1, mState.LineIndex, mError.c_str());
+ case tok_identifier: return mState.IdentifierStr;
+ case tok_number: return StringUtils::sprintf(mIsHexNumberVal ? "0x%llX" : "%llu", mState.NumberVal);
+ case tok_stringlit: return StringUtils::sprintf("\"%s\"", StringUtils::Escape(mState.StringLit).c_str());
+ case tok_charlit:
+ {
+ std::string s;
+ s = mState.CharLit;
+ return StringUtils::sprintf("'%s'", StringUtils::Escape(s).c_str());
+ }
+ default:
+ {
+ auto found = mReverseTokenMap.find(Token(tok));
+ if (found != mReverseTokenMap.end())
+ return found->second;
+ return "";
+ }
+ }
+}
+
+int Lexer::peekChar(size_t distance)
+{
+ if (mIndex + distance >= mInput.size())
+ return EOF;
+ auto ch = mInput[mIndex + distance];
+ if (ch == '\0')
+ {
+ reportWarning(StringUtils::sprintf("\\0 character in file data"));
+ return peekChar(distance + 1);
+ }
+ return ch;
+}
+
+int Lexer::readChar()
+{
+ if (mIndex == mInput.size())
+ return EOF;
+ auto ch = mInput[mIndex++];
+ mState.LineIndex++;
+ if (ch == '\0')
+ {
+ reportWarning(StringUtils::sprintf("\\0 character in file data"));
+ return readChar();
+ }
+ return ch;
+}
+
+bool Lexer::checkString(const std::string & expected)
+{
+ for (size_t i = 0; i < expected.size(); i++)
+ {
+ auto ch = peekChar(i);
+ if (ch == EOF)
+ return false;
+ if (ch != uint8_t(expected[i]))
+ return false;
+ }
+ mIndex += expected.size();
+ return true;
+}
+
+void Lexer::signalNewLine()
+{
+ mState.CurLine++;
+ mState.LineIndex = 0;
+}
diff --git a/btparser/lexer.h b/btparser/lexer.h
new file mode 100644
index 0000000..3e55583
--- /dev/null
+++ b/btparser/lexer.h
@@ -0,0 +1,81 @@
+#pragma once
+
+#include
+#include
+#include
+#include
+
+class Lexer
+{
+public:
+ enum Token
+ {
+ //status tokens
+ tok_eof = -10000,
+ tok_error,
+
+ //keywords
+#define DEF_KEYWORD(keyword) tok_##keyword,
+#include "keywords.h"
+#undef DEF_KEYWORD
+
+ //others
+ tok_identifier, //[a-zA-Z_][a-zA-Z0-9_]
+ tok_number, //(0x[0-9a-fA-F]+)|([0-9]+)
+ tok_stringlit, //"([^\\"]|\\([\\"'?abfnrtv0]|x[0-9a-fA-f]{2}))*"
+ tok_charlit, //'([^\\]|\\([\\"'?abfnrtv0]|x[0-9a-fA-f]{2}))'
+
+ //operators
+#define DEF_OP_TRIPLE(enumval, ch1, ch2, ch3) tok_##enumval,
+#define DEF_OP_DOUBLE(enumval, ch1, ch2) tok_##enumval,
+#define DEF_OP_SINGLE(enumval, ch1) tok_##enumval,
+#include "operators.h"
+#undef DEF_OP_TRIPLE
+#undef DEF_OP_DOUBLE
+#undef DEF_OP_SINGLE
+ };
+
+ struct TokenState
+ {
+ Token Token;
+ std::string IdentifierStr; //tok_identifier
+ uint64_t NumberVal = 0; //tok_number
+ std::string StringLit; //tok_stringlit
+ char CharLit = '\0'; //tok_charlit
+
+ size_t CurLine = 0;
+ size_t LineIndex = 0;
+ };
+
+ explicit Lexer();
+ bool ReadInputFile(const std::string & filename);
+ bool DoLexing(std::vector & tokens, std::string & error);
+ bool Test(const std::function & lexEnum, bool output = true);
+
+private:
+ TokenState mState;
+ std::vector mWarnings;
+ std::string mError;
+ std::vector mInput;
+ size_t mIndex = 0;
+ bool mIsHexNumberVal = false;
+ std::string mNumStr;
+ int mLastChar = ' ';
+ std::unordered_map mKeywordMap;
+ std::unordered_map mReverseTokenMap;
+ std::unordered_map mOpTripleMap;
+ std::unordered_map mOpDoubleMap;
+ std::unordered_map mOpSingleMap;
+
+ void resetLexerState();
+ void setupTokenMaps();
+ Token reportError(const std::string & error);
+ void reportWarning(const std::string & warning);
+ std::string tokString(Token tok);
+ int peekChar(size_t distance = 0);
+ int readChar();
+ bool checkString(const std::string & expected);
+ int nextChar();
+ void signalNewLine();
+ Token getToken();
+};
\ No newline at end of file
diff --git a/btparser/main.cpp b/btparser/main.cpp
index c4f71e7..365e73c 100644
--- a/btparser/main.cpp
+++ b/btparser/main.cpp
@@ -1,567 +1,27 @@
#include
#include
-#include
-#include
-#include
-#include
-#include
-#include "filehelper.h"
-#include "stringutils.h"
#include "testfiles.h"
+#include "lexer.h"
+#include "filehelper.h"
-#define MAKE_OP_TRIPLE(ch1, ch2, ch3) (ch3 << 16 | ch2 << 8 | ch1)
-#define MAKE_OP_DOUBLE(ch1, ch2) (ch2 << 8 | ch1)
-#define MAKE_OP_SINGLE(ch1) (ch1)
-
-#define DEFAULT_STRING_BUFFER 65536
-
-using namespace std;
-
-struct Lexer
-{
- explicit Lexer()
- {
- SetupTokenMaps();
- }
-
- enum Token
- {
- //status tokens
- tok_eof = -10000,
- tok_error,
-
- //keywords
-#define DEF_KEYWORD(keyword) tok_##keyword,
-#include "keywords.h"
-#undef DEF_KEYWORD
-
- //others
- tok_identifier, //[a-zA-Z_][a-zA-Z0-9_]
- tok_number, //(0x[0-9a-fA-F]+)|([0-9]+)
- tok_stringlit, //"([^\\"]|\\([\\"'?abfnrtv0]|x[0-9a-fA-f]{2}))*"
- tok_charlit, //'([^\\]|\\([\\"'?abfnrtv0]|x[0-9a-fA-f]{2}))'
-
- //operators
-#define DEF_OP_TRIPLE(enumval, ch1, ch2, ch3) tok_##enumval,
-#define DEF_OP_DOUBLE(enumval, ch1, ch2) tok_##enumval,
-#define DEF_OP_SINGLE(enumval, ch1) tok_##enumval,
-#include "operators.h"
-#undef DEF_OP_TRIPLE
-#undef DEF_OP_DOUBLE
-#undef DEF_OP_SINGLE
- };
-
- vector Input;
- size_t Index = 0;
- string Error;
- vector Warnings;
-
- //lexer state
- string IdentifierStr;
- uint64_t NumberVal = 0;
- bool IsHexNumberVal = false;
- string StringLit;
- string NumStr;
- char CharLit = '\0';
- int LastChar = ' ';
- size_t CurLine = 0;
- size_t LineIndex = 0;
-
- static void clearReserve(string & str, size_t reserve = DEFAULT_STRING_BUFFER)
- {
- str.clear();
- str.reserve(reserve);
- }
-
- static void appendCh(string & str, char ch)
- {
- str.resize(str.size() + 1);
- str[str.size() - 1] = ch;
- }
-
- void ResetLexerState()
- {
- Input.clear();
- Input.reserve(1024 * 1024);
- Index = 0;
- Error.clear();
- Warnings.clear();
- clearReserve(IdentifierStr);
- NumberVal = 0;
- IsHexNumberVal = false;
- clearReserve(StringLit);
- clearReserve(NumStr, 16);
- CharLit = '\0';
- LastChar = ' ';
- CurLine = 0;
- LineIndex = 0;
- }
-
- unordered_map KeywordMap;
- unordered_map ReverseTokenMap;
- unordered_map OpTripleMap;
- unordered_map OpDoubleMap;
- unordered_map OpSingleMap;
-
- void SetupTokenMaps()
- {
- //setup keyword map
-#define DEF_KEYWORD(keyword) KeywordMap[#keyword] = tok_##keyword;
-#include "keywords.h"
-#undef DEF_KEYWORD
-
- //setup token maps
-#define DEF_OP_TRIPLE(enumval, ch1, ch2, ch3) OpTripleMap[MAKE_OP_TRIPLE(ch1, ch2, ch3)] = tok_##enumval;
-#define DEF_OP_DOUBLE(enumval, ch1, ch2) OpDoubleMap[MAKE_OP_DOUBLE(ch1, ch2)] = tok_##enumval;
-#define DEF_OP_SINGLE(enumval, ch1) OpSingleMap[MAKE_OP_SINGLE(ch1)] = tok_##enumval;
-#include "operators.h"
-#undef DEF_OP_TRIPLE
-#undef DEF_OP_DOUBLE
-#undef DEF_OP_SINGLE
-
- //setup reverse token maps
-#define DEF_KEYWORD(keyword) ReverseTokenMap[tok_##keyword] = #keyword;
-#include "keywords.h"
-#undef DEF_KEYWORD
-
-#define DEF_OP_TRIPLE(enumval, ch1, ch2, ch3) ReverseTokenMap[tok_##enumval] = string({ch1, ch2, ch3});
-#define DEF_OP_DOUBLE(enumval, ch1, ch2) ReverseTokenMap[tok_##enumval] = string({ch1, ch2});
-#define DEF_OP_SINGLE(enumval, ch1) ReverseTokenMap[tok_##enumval] = string({ch1});
-#include "operators.h"
-#undef DEF_OP_TRIPLE
-#undef DEF_OP_DOUBLE
-#undef DEF_OP_SINGLE
- }
-
- Token ReportError(const String & error)
- {
- Error = error;
- return tok_error;
- }
-
- void ReportWarning(const String & warning)
- {
- Warnings.push_back(warning);
- }
-
- String TokString(int tok)
- {
- switch (Token(tok))
- {
- case tok_eof: return "tok_eof";
- case tok_error: return StringUtils::sprintf("error(line %d, col %d, \"%s\")", CurLine + 1, LineIndex, Error.c_str());
- case tok_identifier: return IdentifierStr;
- case tok_number: return StringUtils::sprintf(IsHexNumberVal ? "0x%llX" : "%llu", NumberVal);
- case tok_stringlit: return StringUtils::sprintf("\"%s\"", StringUtils::Escape(StringLit).c_str());
- case tok_charlit:
- {
- String s;
- s = CharLit;
- return StringUtils::sprintf("'%s'", StringUtils::Escape(s).c_str());
- }
- default:
- {
- auto found = ReverseTokenMap.find(Token(tok));
- if (found != ReverseTokenMap.end())
- return found->second;
- return "";
- }
- }
- }
-
- int PeekChar(size_t distance = 0)
- {
- if (Index + distance >= Input.size())
- return EOF;
- auto ch = Input[Index + distance];
- if (ch == '\0')
- {
- ReportWarning(StringUtils::sprintf("\\0 character in file data"));
- return PeekChar(distance + 1);
- }
- return ch;
- }
-
- int ReadChar()
- {
- if (Index == Input.size())
- return EOF;
- auto ch = Input[Index++];
- LineIndex++;
- if (ch == '\0')
- {
- ReportWarning(StringUtils::sprintf("\\0 character in file data"));
- return ReadChar();
- }
- return ch;
- }
-
- bool CheckString(const string & expected)
- {
- for (size_t i = 0; i < expected.size(); i++)
- {
- auto ch = PeekChar(i);
- if (ch == EOF)
- return false;
- if (ch != uint8_t(expected[i]))
- return false;
- }
- Index += expected.size();
- return true;
- }
-
- int NextChar()
- {
- return LastChar = ReadChar();
- }
-
- void SignalNextLine()
- {
- CurLine++;
- LineIndex = 0;
- }
-
- static const char* ConvertNumber(const char* str, uint64_t & result, int radix)
- {
- errno = 0;
- char* end;
- result = strtoull(str, &end, radix);
- if (!result && end == str)
- return "not a number";
- if (result == ULLONG_MAX && errno)
- return "does not fit";
- if (*end)
- return "str not completely consumed";
- return nullptr;
- }
-
- int GetToken()
- {
- //skip whitespace
- while (isspace(LastChar))
- {
- if (LastChar == '\n')
- SignalNextLine();
- NextChar();
- }
-
- //skip \\[\r\n]
- if (LastChar == '\\' && (PeekChar() == '\r' || PeekChar() == '\n'))
- {
- NextChar();
- return GetToken();
- }
-
- //character literal
- if (LastChar == '\'')
- {
- string charLit;
- while (true)
- {
- NextChar();
- if (LastChar == EOF) //end of file
- return ReportError("unexpected end of file in character literal (1)");
- if (LastChar == '\r' || LastChar == '\n')
- return ReportError("unexpected newline in character literal (1)");
- if (LastChar == '\'') //end of character literal
- {
- if (charLit.length() != 1)
- return ReportError(StringUtils::sprintf("invalid character literal '%s'", charLit.c_str()));
- CharLit = charLit[0];
- NextChar();
- return tok_charlit;
- }
- if (LastChar == '\\') //escape sequence
- {
- NextChar();
- if (LastChar == EOF)
- return ReportError("unexpected end of file in character literal (2)");
- if (LastChar == '\r' || LastChar == '\n')
- return ReportError("unexpected newline in character literal (2)");
- if (LastChar == '\'' || LastChar == '\"' || LastChar == '?' || LastChar == '\\')
- LastChar = LastChar;
- else if (LastChar == 'a')
- LastChar = '\a';
- else if (LastChar == 'b')
- LastChar = '\b';
- else if (LastChar == 'f')
- LastChar = '\f';
- else if (LastChar == 'n')
- LastChar = '\n';
- else if (LastChar == 'r')
- LastChar = '\r';
- else if (LastChar == 't')
- LastChar = '\t';
- else if (LastChar == 'v')
- LastChar = '\v';
- else if (LastChar == '0')
- LastChar = '\0';
- else if (LastChar == 'x') //\xHH
- {
- auto ch1 = NextChar();
- auto ch2 = NextChar();
- if (isxdigit(ch1) && isxdigit(ch2))
- {
- char byteStr[3] = "";
- byteStr[0] = ch1;
- byteStr[1] = ch2;
- uint64_t hexData;
- auto error = ConvertNumber(byteStr, hexData, 16);
- if (error)
- return ReportError(StringUtils::sprintf("ConvertNumber failed (%s) for hex sequence \"\\x%c%c\" in character literal", error, ch1, ch2));
- LastChar = hexData & 0xFF;
- }
- else
- return ReportError(StringUtils::sprintf("invalid hex sequence \"\\x%c%c\" in character literal", ch1, ch2));
- }
- else
- return ReportError(StringUtils::sprintf("invalid escape sequence \"\\%c\" in character literal", LastChar));
- }
- charLit += LastChar;
- }
- }
-
- //string literal
- if (LastChar == '\"')
- {
- StringLit.clear();
- while (true)
- {
- NextChar();
- if (LastChar == EOF) //end of file
- return ReportError("unexpected end of file in string literal (1)");
- if (LastChar == '\r' || LastChar == '\n')
- return ReportError("unexpected newline in string literal (1)");
- if (LastChar == '\"') //end of string literal
- {
- NextChar();
- return tok_stringlit;
- }
- if (LastChar == '\\') //escape sequence
- {
- NextChar();
- if (LastChar == EOF)
- return ReportError("unexpected end of file in string literal (2)");
- if (LastChar == '\r' || LastChar == '\n')
- return ReportError("unexpected newline in string literal (2)");
- if (LastChar == '\'' || LastChar == '\"' || LastChar == '?' || LastChar == '\\')
- LastChar = LastChar;
- else if (LastChar == 'a')
- LastChar = '\a';
- else if (LastChar == 'b')
- LastChar = '\b';
- else if (LastChar == 'f')
- LastChar = '\f';
- else if (LastChar == 'n')
- LastChar = '\n';
- else if (LastChar == 'r')
- LastChar = '\r';
- else if (LastChar == 't')
- LastChar = '\t';
- else if (LastChar == 'v')
- LastChar = '\v';
- else if (LastChar == '0')
- LastChar = '\0';
- else if (LastChar == 'x') //\xHH
- {
- auto ch1 = NextChar();
- auto ch2 = NextChar();
- if (isxdigit(ch1) && isxdigit(ch2))
- {
- char byteStr[3] = "";
- byteStr[0] = ch1;
- byteStr[1] = ch2;
- uint64_t hexData;
- auto error = ConvertNumber(byteStr, hexData, 16);
- if (error)
- return ReportError(StringUtils::sprintf("ConvertNumber failed (%s) for hex sequence \"\\x%c%c\" in string literal", error, ch1, ch2));
- LastChar = hexData & 0xFF;
- }
- else
- return ReportError(StringUtils::sprintf("invalid hex sequence \"\\x%c%c\" in string literal", ch1, ch2));
- }
- else
- return ReportError(StringUtils::sprintf("invalid escape sequence \"\\%c\" in string literal", LastChar));
- }
- appendCh(StringLit, LastChar);
- }
- }
-
- //identifier/keyword
- if (isalpha(LastChar) || LastChar == '_') //[a-zA-Z_]
- {
- IdentifierStr = LastChar;
- NextChar();
- while (isalnum(LastChar) || LastChar == '_') //[0-9a-zA-Z_]
- {
- appendCh(IdentifierStr, LastChar);
- NextChar();
- }
-
- //keywords
- auto found = KeywordMap.find(IdentifierStr);
- if (found != KeywordMap.end())
- return found->second;
-
- return tok_identifier;
- }
-
- //hex numbers
- if (LastChar == '0' && PeekChar() == 'x') //0x
- {
- NextChar(); //consume the 'x'
- NumStr.clear();
-
- while (isxdigit(NextChar())) //[0-9a-fA-F]*
- appendCh(NumStr, LastChar);
-
- if (!NumStr.length()) //check for error condition
- return ReportError("no hex digits after \"0x\" prefix");
-
- auto error = ConvertNumber(NumStr.c_str(), NumberVal, 16);
- if (error)
- return ReportError(StringUtils::sprintf("ConvertNumber failed (%s) on hexadecimal number", error));
- IsHexNumberVal = true;
- return tok_number;
- }
- if (isdigit(LastChar)) //[0-9]
- {
- NumStr = LastChar;
-
- while (isdigit(NextChar())) //[0-9]*
- NumStr += LastChar;
-
- auto error = ConvertNumber(NumStr.c_str(), NumberVal, 10);
- if (error)
- return ReportError(StringUtils::sprintf("ConvertNumber failed (%s) on decimal number", error));
- IsHexNumberVal = false;
- return tok_number;
- }
-
- //comments
- if (LastChar == '/' && PeekChar() == '/') //line comment
- {
- do
- {
- if (LastChar == '\n')
- SignalNextLine();
- NextChar();
- } while (!(LastChar == EOF || LastChar == '\n'));
-
- return GetToken(); //interpret the next line
- }
- if (LastChar == '/' && PeekChar() == '*') //block comment
- {
- do
- {
- if (LastChar == '\n')
- SignalNextLine();
- NextChar();
- } while (!(LastChar == EOF || LastChar == '*' && PeekChar() == '/'));
-
- if (LastChar == EOF) //unexpected end of file
- {
- LineIndex++;
- return ReportError("unexpected end of file in block comment");
- }
-
- NextChar();
- NextChar();
- return GetToken(); //get the next non-comment token
- }
-
- //operators
- auto opFound = OpTripleMap.find(MAKE_OP_TRIPLE(LastChar, PeekChar(), PeekChar(1)));
- if (opFound != OpTripleMap.end())
- {
- NextChar();
- NextChar();
- NextChar();
- return opFound->second;
- }
- opFound = OpDoubleMap.find(MAKE_OP_DOUBLE(LastChar, PeekChar()));
- if (opFound != OpDoubleMap.end())
- {
- NextChar();
- NextChar();
- return opFound->second;
- }
- opFound = OpSingleMap.find(MAKE_OP_SINGLE(LastChar));
- if (opFound != OpSingleMap.end())
- {
- NextChar();
- return opFound->second;
- }
-
- //end of file
- if (LastChar == EOF)
- return tok_eof;
-
- //unknown character
- return ReportError(StringUtils::sprintf("unexpected character \'%c\'", LastChar));
- }
-
- bool ReadInputFile(const string & filename)
- {
- ResetLexerState();
- return FileHelper::ReadAllData(filename, Input);
- }
-
- bool TestLex(const function & lexEnum, bool output = true)
- {
- size_t line = 0;
- if (output)
- lexEnum("1: ");
- int tok;
- string toks;
- clearReserve(toks);
- char newlineText[128] = "";
- do
- {
- tok = GetToken();
- if (!output)
- continue;
- toks.clear();
- while (line < CurLine)
- {
- line++;
- sprintf_s(newlineText, "\n%d: ", line + 1);
- toks.append(newlineText);
- }
- toks.append(TokString(tok));
- appendCh(toks, ' ');
- lexEnum(toks);
- } while (tok != tok_eof && tok != tok_error);
- if (tok != tok_error && tok != tok_eof)
- tok = ReportError("lexer did not finish at the end of the file");
- for (const auto & warning : Warnings)
- if (output)
- lexEnum("\nwarning: " + warning);
- return tok != tok_error;
- }
-};
-
-bool TestLexer(Lexer & lexer, const string & filename)
+bool TestLexer(Lexer & lexer, const std::string & filename)
{
if (!lexer.ReadInputFile("tests\\" + filename))
{
printf("failed to read \"%s\"\n", filename.c_str());
return false;
}
- string actual;
- Lexer::clearReserve(actual);
- auto success = lexer.TestLex([&](const string & line)
+ std::string actual;
+ actual.reserve(65536);
+ auto success = lexer.Test([&](const std::string & line)
{
actual.append(line);
});
- string expected;
- if (FileHelper::ReadAllText("tests\\exp_lex\\" + filename, expected))
+ std::string expected;
+ if (FileHelper::ReadAllText("tests\\exp_lex\\" + filename, expected) && expected == actual)
{
- if (expected == actual)
- {
- printf("lexer test for \"%s\" success!\n", filename.c_str());
- return true;
- }
+ printf("lexer test for \"%s\" success!\n", filename.c_str());
+ return true;
}
if (success)
return true;
@@ -571,14 +31,14 @@ bool TestLexer(Lexer & lexer, const string & filename)
return false;
}
-bool DebugLexer(Lexer & lexer, const string & filename, bool output)
+bool DebugLexer(Lexer & lexer, const std::string & filename, bool output)
{
if (!lexer.ReadInputFile("tests\\" + filename))
{
printf("failed to read \"%s\"\n", filename.c_str());
return false;
}
- auto success = lexer.TestLex([](const string & line)
+ auto success = lexer.Test([](const std::string & line)
{
printf("%s", line.c_str());
}, output);
@@ -587,16 +47,16 @@ bool DebugLexer(Lexer & lexer, const string & filename, bool output)
return success;
}
-void GenerateExpected(Lexer & lexer, const string & filename)
+void GenerateExpected(Lexer & lexer, const std::string & filename)
{
if (!lexer.ReadInputFile("tests\\" + filename))
{
printf("failed to read \"%s\"\n", filename.c_str());
return;
}
- string actual;
- Lexer::clearReserve(actual);
- lexer.TestLex([&](const string & line)
+ std::string actual;
+ actual.reserve(65536);
+ lexer.Test([&](const std::string & line)
{
actual.append(line);
});