refactor (moved Lexer to separate file)

2016-06-05 19:24:49 +02:00 · 2016-06-05 19:24:49 +02:00 · 547b65b284
parent 8365d3444b
commit 547b65b284
5 changed files with 604 additions and 556 deletions
--- a/btparser/btparser.vcxproj
+++ b/btparser/btparser.vcxproj
@ -20,6 +20,7 @@
  </ItemGroup>
  <ItemGroup>
    <ClCompile Include="filehelper.cpp" />
    <ClCompile Include="lexer.cpp" />
    <ClCompile Include="main.cpp" />
    <ClCompile Include="stringutils.cpp" />
  </ItemGroup>
@ -28,6 +29,7 @@
    <ClInclude Include="filehelper.h" />
    <ClInclude Include="handle.h" />
    <ClInclude Include="keywords.h" />
    <ClInclude Include="lexer.h" />
    <ClInclude Include="operators.h" />
    <ClInclude Include="stringutils.h" />
    <ClInclude Include="testfiles.h" />
--- a/btparser/btparser.vcxproj.filters
+++ b/btparser/btparser.vcxproj.filters
@ -24,6 +24,9 @@
    <ClCompile Include="stringutils.cpp">
      <Filter>Source Files</Filter>
    </ClCompile>
    <ClCompile Include="lexer.cpp">
      <Filter>Source Files</Filter>
    </ClCompile>
  </ItemGroup>
  <ItemGroup>
    <ClInclude Include="filehelper.h">
@ -47,5 +50,8 @@
    <ClInclude Include="operators.h">
      <Filter>Header Files</Filter>
    </ClInclude>
    <ClInclude Include="lexer.h">
      <Filter>Header Files</Filter>
    </ClInclude>
  </ItemGroup>
 </Project>
--- a/btparser/lexer.cpp
+++ b/btparser/lexer.cpp
@ -0,0 +1,499 @@
 #include "lexer.h"
 #include "stringutils.h"
 #include <cctype>
 #include "filehelper.h"
 #define MAKE_OP_TRIPLE(ch1, ch2, ch3) (ch3 << 16 | ch2 << 8 | ch1)
 #define MAKE_OP_DOUBLE(ch1, ch2) (ch2 << 8 | ch1)
 #define MAKE_OP_SINGLE(ch1) (ch1)
 #define DEFAULT_STRING_BUFFER 65536
 static void clearReserve(std::string & str, size_t reserve = DEFAULT_STRING_BUFFER)
 {
    str.clear();
    str.reserve(reserve);
 }
 static void appendCh(std::string & str, char ch)
 {
    str.resize(str.size() + 1);
    str[str.size() - 1] = ch;
 }
 static const char* convertNumber(const char* str, uint64_t & result, int radix)
 {
    errno = 0;
    char* end;
    result = strtoull(str, &end, radix);
    if (!result && end == str)
        return "not a number";
    if (result == ULLONG_MAX && errno)
        return "does not fit";
    if (*end)
        return "str not completely consumed";
    return nullptr;
 }
 Lexer::Lexer()
 {
    setupTokenMaps();
 }
 bool Lexer::ReadInputFile(const std::string & filename)
 {
    resetLexerState();
    return FileHelper::ReadAllData(filename, mInput);
 }
 bool Lexer::DoLexing(std::vector<TokenState> & tokens, std::string & error)
 {
    while (true)
    {
        auto token = getToken();
        mState.Token = token;
        if (token == tok_eof)
            break;
        if (token == tok_error)
        {
            error = StringUtils::sprintf("line %d, col %d: %s", mState.CurLine + 1, mState.LineIndex, mError.c_str());
            return false;
        }
        tokens.push_back(mState);
    }
    return true;
 }
 bool Lexer::Test(const std::function<void(const std::string & line)> & lexEnum, bool output)
 {
    size_t line = 0;
    if (output)
        lexEnum("1: ");
    Token tok;
    std::string toks;
    clearReserve(toks);
    char newlineText[128] = "";
    do
    {
        tok = getToken();
        if (!output)
            continue;
        toks.clear();
        while (line < mState.CurLine)
        {
            line++;
            sprintf_s(newlineText, "\n%d: ", line + 1);
            toks.append(newlineText);
        }
        toks.append(tokString(tok));
        appendCh(toks, ' ');
        lexEnum(toks);
    } while (tok != tok_eof && tok != tok_error);
    if (tok != tok_error && tok != tok_eof)
        tok = reportError("lexer did not finish at the end of the file");
    for (const auto& warning : mWarnings)
        if (output)
            lexEnum("\nwarning: " + warning);
    return tok != tok_error;
 }
 Lexer::Token Lexer::getToken()
 {
    //skip whitespace
    while (isspace(mLastChar))
    {
        if (mLastChar == '\n')
            signalNewLine();
        nextChar();
    }
    //skip \\[\r\n]
    if (mLastChar == '\\' && (peekChar() == '\r' || peekChar() == '\n'))
    {
        nextChar();
        return getToken();
    }
    //character literal
    if (mLastChar == '\'')
    {
        std::string charLit;
        while (true)
        {
            nextChar();
            if (mLastChar == EOF) //end of file
                return reportError("unexpected end of file in character literal (1)");
            if (mLastChar == '\r' || mLastChar == '\n')
                return reportError("unexpected newline in character literal (1)");
            if (mLastChar == '\'') //end of character literal
            {
                if (charLit.length() != 1)
                    return reportError(StringUtils::sprintf("invalid character literal '%s'", charLit.c_str()));
                mState.CharLit = charLit[0];
                nextChar();
                return tok_charlit;
            }
            if (mLastChar == '\\') //escape sequence
            {
                nextChar();
                if (mLastChar == EOF)
                    return reportError("unexpected end of file in character literal (2)");
                if (mLastChar == '\r' || mLastChar == '\n')
                    return reportError("unexpected newline in character literal (2)");
                if (mLastChar == '\'' || mLastChar == '\"' || mLastChar == '?' || mLastChar == '\\')
                    mLastChar = mLastChar;
                else if (mLastChar == 'a')
                    mLastChar = '\a';
                else if (mLastChar == 'b')
                    mLastChar = '\b';
                else if (mLastChar == 'f')
                    mLastChar = '\f';
                else if (mLastChar == 'n')
                    mLastChar = '\n';
                else if (mLastChar == 'r')
                    mLastChar = '\r';
                else if (mLastChar == 't')
                    mLastChar = '\t';
                else if (mLastChar == 'v')
                    mLastChar = '\v';
                else if (mLastChar == '0')
                    mLastChar = '\0';
                else if (mLastChar == 'x') //\xHH
                {
                    auto ch1 = nextChar();
                    auto ch2 = nextChar();
                    if (isxdigit(ch1) && isxdigit(ch2))
                    {
                        char byteStr[3] = "";
                        byteStr[0] = ch1;
                        byteStr[1] = ch2;
                        uint64_t hexData;
                        auto error = convertNumber(byteStr, hexData, 16);
                        if (error)
                            return reportError(StringUtils::sprintf("convertNumber failed (%s) for hex sequence \"\\x%c%c\" in character literal", error, ch1, ch2));
                        mLastChar = hexData & 0xFF;
                    }
                    else
                        return reportError(StringUtils::sprintf("invalid hex sequence \"\\x%c%c\" in character literal", ch1, ch2));
                }
                else
                    return reportError(StringUtils::sprintf("invalid escape sequence \"\\%c\" in character literal", mLastChar));
            }
            charLit += mLastChar;
        }
    }
    //string literal
    if (mLastChar == '\"')
    {
        mState.StringLit.clear();
        while (true)
        {
            nextChar();
            if (mLastChar == EOF) //end of file
                return reportError("unexpected end of file in string literal (1)");
            if (mLastChar == '\r' || mLastChar == '\n')
                return reportError("unexpected newline in string literal (1)");
            if (mLastChar == '\"') //end of string literal
            {
                nextChar();
                return tok_stringlit;
            }
            if (mLastChar == '\\') //escape sequence
            {
                nextChar();
                if (mLastChar == EOF)
                    return reportError("unexpected end of file in string literal (2)");
                if (mLastChar == '\r' || mLastChar == '\n')
                    return reportError("unexpected newline in string literal (2)");
                if (mLastChar == '\'' || mLastChar == '\"' || mLastChar == '?' || mLastChar == '\\')
                    mLastChar = mLastChar;
                else if (mLastChar == 'a')
                    mLastChar = '\a';
                else if (mLastChar == 'b')
                    mLastChar = '\b';
                else if (mLastChar == 'f')
                    mLastChar = '\f';
                else if (mLastChar == 'n')
                    mLastChar = '\n';
                else if (mLastChar == 'r')
                    mLastChar = '\r';
                else if (mLastChar == 't')
                    mLastChar = '\t';
                else if (mLastChar == 'v')
                    mLastChar = '\v';
                else if (mLastChar == '0')
                    mLastChar = '\0';
                else if (mLastChar == 'x') //\xHH
                {
                    auto ch1 = nextChar();
                    auto ch2 = nextChar();
                    if (isxdigit(ch1) && isxdigit(ch2))
                    {
                        char byteStr[3] = "";
                        byteStr[0] = ch1;
                        byteStr[1] = ch2;
                        uint64_t hexData;
                        auto error = convertNumber(byteStr, hexData, 16);
                        if (error)
                            return reportError(StringUtils::sprintf("convertNumber failed (%s) for hex sequence \"\\x%c%c\" in string literal", error, ch1, ch2));
                        mLastChar = hexData & 0xFF;
                    }
                    else
                        return reportError(StringUtils::sprintf("invalid hex sequence \"\\x%c%c\" in string literal", ch1, ch2));
                }
                else
                    return reportError(StringUtils::sprintf("invalid escape sequence \"\\%c\" in string literal", mLastChar));
            }
            appendCh(mState.StringLit, mLastChar);
        }
    }
    //identifier/keyword
    if (isalpha(mLastChar) || mLastChar == '_') //[a-zA-Z_]
    {
        mState.IdentifierStr = mLastChar;
        nextChar();
        while (isalnum(mLastChar) || mLastChar == '_') //[0-9a-zA-Z_]
        {
            appendCh(mState.IdentifierStr, mLastChar);
            nextChar();
        }
        //keywords
        auto found = mKeywordMap.find(mState.IdentifierStr);
        if (found != mKeywordMap.end())
            return found->second;
        return tok_identifier;
    }
    //hex numbers
    if (mLastChar == '0' && peekChar() == 'x') //0x
    {
        nextChar(); //consume the 'x'
        mNumStr.clear();
        while (isxdigit(nextChar())) //[0-9a-fA-F]*
            appendCh(mNumStr, mLastChar);
        if (!mNumStr.length()) //check for error condition
            return reportError("no hex digits after \"0x\" prefix");
        auto error = convertNumber(mNumStr.c_str(), mState.NumberVal, 16);
        if (error)
            return reportError(StringUtils::sprintf("convertNumber failed (%s) on hexadecimal number", error));
        mIsHexNumberVal = true;
        return tok_number;
    }
    if (isdigit(mLastChar)) //[0-9]
    {
        mNumStr = mLastChar;
        while (isdigit(nextChar())) //[0-9]*
            mNumStr += mLastChar;
        auto error = convertNumber(mNumStr.c_str(), mState.NumberVal, 10);
        if (error)
            return reportError(StringUtils::sprintf("convertNumber failed (%s) on decimal number", error));
        mIsHexNumberVal = false;
        return tok_number;
    }
    //comments
    if (mLastChar == '/' && peekChar() == '/') //line comment
    {
        do
        {
            if (mLastChar == '\n')
                signalNewLine();
            nextChar();
        } while (!(mLastChar == EOF || mLastChar == '\n'));
        return getToken(); //interpret the next line
    }
    if (mLastChar == '/' && peekChar() == '*') //block comment
    {
        do
        {
            if (mLastChar == '\n')
                signalNewLine();
            nextChar();
        } while (!(mLastChar == EOF || mLastChar == '*' && peekChar() == '/'));
        if (mLastChar == EOF) //unexpected end of file
        {
            mState.LineIndex++;
            return reportError("unexpected end of file in block comment");
        }
        nextChar();
        nextChar();
        return getToken(); //get the next non-comment token
    }
    //operators
    auto opFound = mOpTripleMap.find(MAKE_OP_TRIPLE(mLastChar, peekChar(), peekChar(1)));
    if (opFound != mOpTripleMap.end())
    {
        nextChar();
        nextChar();
        nextChar();
        return opFound->second;
    }
    opFound = mOpDoubleMap.find(MAKE_OP_DOUBLE(mLastChar, peekChar()));
    if (opFound != mOpDoubleMap.end())
    {
        nextChar();
        nextChar();
        return opFound->second;
    }
    opFound = mOpSingleMap.find(MAKE_OP_SINGLE(mLastChar));
    if (opFound != mOpSingleMap.end())
    {
        nextChar();
        return opFound->second;
    }
    //end of file
    if (mLastChar == EOF)
        return tok_eof;
    //unknown character
    return reportError(StringUtils::sprintf("unexpected character \'%c\'", mLastChar));
 }
 Lexer::Token Lexer::reportError(const std::string & error)
 {
    mError = error;
    return tok_error;
 }
 int Lexer::nextChar()
 {
    return mLastChar = readChar();
 }
 void Lexer::reportWarning(const std::string & warning)
 {
    mWarnings.push_back(warning);
 }
 void Lexer::resetLexerState()
 {
    mInput.clear();
    mInput.reserve(1024 * 1024);
    mIndex = 0;
    mError.clear();
    mWarnings.clear();
    clearReserve(mState.IdentifierStr);
    mState.NumberVal = 0;
    mIsHexNumberVal = false;
    clearReserve(mState.StringLit);
    clearReserve(mNumStr, 16);
    mState.CharLit = '\0';
    mLastChar = ' ';
    mState.CurLine = 0;
    mState.LineIndex = 0;
 }
 void Lexer::setupTokenMaps()
 {
    //setup keyword map
 #define DEF_KEYWORD(keyword) mKeywordMap[#keyword] = tok_##keyword;
 #include "keywords.h"
 #undef DEF_KEYWORD
    //setup token maps
 #define DEF_OP_TRIPLE(enumval, ch1, ch2, ch3) mOpTripleMap[MAKE_OP_TRIPLE(ch1, ch2, ch3)] = tok_##enumval;
 #define DEF_OP_DOUBLE(enumval, ch1, ch2) mOpDoubleMap[MAKE_OP_DOUBLE(ch1, ch2)] = tok_##enumval;
 #define DEF_OP_SINGLE(enumval, ch1) mOpSingleMap[MAKE_OP_SINGLE(ch1)] = tok_##enumval;
 #include "operators.h"
 #undef DEF_OP_TRIPLE
 #undef DEF_OP_DOUBLE
 #undef DEF_OP_SINGLE
    //setup reverse token maps
 #define DEF_KEYWORD(keyword) mReverseTokenMap[tok_##keyword] = #keyword;
 #include "keywords.h"
 #undef DEF_KEYWORD
 #define DEF_OP_TRIPLE(enumval, ch1, ch2, ch3) mReverseTokenMap[tok_##enumval] = std::string({ch1, ch2, ch3});
 #define DEF_OP_DOUBLE(enumval, ch1, ch2) mReverseTokenMap[tok_##enumval] = std::string({ch1, ch2});
 #define DEF_OP_SINGLE(enumval, ch1) mReverseTokenMap[tok_##enumval] = std::string({ch1});
 #include "operators.h"
 #undef DEF_OP_TRIPLE
 #undef DEF_OP_DOUBLE
 #undef DEF_OP_SINGLE
 }
 std::string Lexer::tokString(Token tok)
 {
    switch (tok)
    {
    case tok_eof: return "tok_eof";
    case tok_error: return StringUtils::sprintf("error(line %d, col %d, \"%s\")", mState.CurLine + 1, mState.LineIndex, mError.c_str());
    case tok_identifier: return mState.IdentifierStr;
    case tok_number: return StringUtils::sprintf(mIsHexNumberVal ? "0x%llX" : "%llu", mState.NumberVal);
    case tok_stringlit: return StringUtils::sprintf("\"%s\"", StringUtils::Escape(mState.StringLit).c_str());
    case tok_charlit:
        {
            std::string s;
            s = mState.CharLit;
            return StringUtils::sprintf("'%s'", StringUtils::Escape(s).c_str());
        }
    default:
        {
            auto found = mReverseTokenMap.find(Token(tok));
            if (found != mReverseTokenMap.end())
                return found->second;
            return "<UNKNOWN TOKEN>";
        }
    }
 }
 int Lexer::peekChar(size_t distance)
 {
    if (mIndex + distance >= mInput.size())
        return EOF;
    auto ch = mInput[mIndex + distance];
    if (ch == '\0')
    {
        reportWarning(StringUtils::sprintf("\\0 character in file data"));
        return peekChar(distance + 1);
    }
    return ch;
 }
 int Lexer::readChar()
 {
    if (mIndex == mInput.size())
        return EOF;
    auto ch = mInput[mIndex++];
    mState.LineIndex++;
    if (ch == '\0')
    {
        reportWarning(StringUtils::sprintf("\\0 character in file data"));
        return readChar();
    }
    return ch;
 }
 bool Lexer::checkString(const std::string & expected)
 {
    for (size_t i = 0; i < expected.size(); i++)
    {
        auto ch = peekChar(i);
        if (ch == EOF)
            return false;
        if (ch != uint8_t(expected[i]))
            return false;
    }
    mIndex += expected.size();
    return true;
 }
 void Lexer::signalNewLine()
 {
    mState.CurLine++;
    mState.LineIndex = 0;
 }
--- a/btparser/lexer.h
+++ b/btparser/lexer.h
@ -0,0 +1,81 @@
 #pragma once
 #include <cstdint>
 #include <vector>
 #include <unordered_map>
 #include <functional>
 class Lexer
 {
 public:
    enum Token
    {
        //status tokens
        tok_eof = -10000,
        tok_error,
        //keywords
 #define DEF_KEYWORD(keyword) tok_##keyword,
 #include "keywords.h"
 #undef DEF_KEYWORD
        //others
        tok_identifier, //[a-zA-Z_][a-zA-Z0-9_]
        tok_number, //(0x[0-9a-fA-F]+)|([0-9]+)
        tok_stringlit, //"([^\\"]|\\([\\"'?abfnrtv0]|x[0-9a-fA-f]{2}))*"
        tok_charlit, //'([^\\]|\\([\\"'?abfnrtv0]|x[0-9a-fA-f]{2}))'
        //operators
 #define DEF_OP_TRIPLE(enumval, ch1, ch2, ch3) tok_##enumval,
 #define DEF_OP_DOUBLE(enumval, ch1, ch2) tok_##enumval,
 #define DEF_OP_SINGLE(enumval, ch1) tok_##enumval,
 #include "operators.h"
 #undef DEF_OP_TRIPLE
 #undef DEF_OP_DOUBLE
 #undef DEF_OP_SINGLE
    };
    struct TokenState
    {
        Token Token;
        std::string IdentifierStr; //tok_identifier
        uint64_t NumberVal = 0; //tok_number
        std::string StringLit; //tok_stringlit
        char CharLit = '\0'; //tok_charlit
        size_t CurLine = 0;
        size_t LineIndex = 0;
    };
    explicit Lexer();
    bool ReadInputFile(const std::string & filename);
    bool DoLexing(std::vector<TokenState> & tokens, std::string & error);
    bool Test(const std::function<void(const std::string & line)> & lexEnum, bool output = true);
 private:
    TokenState mState;
    std::vector<std::string> mWarnings;
    std::string mError;
    std::vector<uint8_t> mInput;
    size_t mIndex = 0;
    bool mIsHexNumberVal = false;
    std::string mNumStr;
    int mLastChar = ' ';
    std::unordered_map<std::string, Token> mKeywordMap;
    std::unordered_map<Token, std::string> mReverseTokenMap;
    std::unordered_map<int, Token> mOpTripleMap;
    std::unordered_map<int, Token> mOpDoubleMap;
    std::unordered_map<int, Token> mOpSingleMap;
    void resetLexerState();
    void setupTokenMaps();
    Token reportError(const std::string & error);
    void reportWarning(const std::string & warning);
    std::string tokString(Token tok);
    int peekChar(size_t distance = 0);
    int readChar();
    bool checkString(const std::string & expected);
    int nextChar();
    void signalNewLine();
    Token getToken();
 };
--- a/btparser/main.cpp
+++ b/btparser/main.cpp
@ -1,568 +1,28 @@
 #include <windows.h>
 #include <stdio.h>
 #include <string>
 #include <stdint.h>
 #include <unordered_map>
 #include <functional>
 #include <vector>
 #include "filehelper.h"
 #include "stringutils.h"
 #include "testfiles.h"
 #include "lexer.h"
 #include "filehelper.h"
-#define MAKE_OP_TRIPLE(ch1, ch2, ch3) (ch3 << 16 | ch2 << 8 | ch1)
+bool TestLexer(Lexer & lexer, const std::string & filename)
 #define MAKE_OP_DOUBLE(ch1, ch2) (ch2 << 8 | ch1)
 #define MAKE_OP_SINGLE(ch1) (ch1)
 #define DEFAULT_STRING_BUFFER 65536
 using namespace std;
 struct Lexer
 {
    explicit Lexer()
    {
        SetupTokenMaps();
    }
    enum Token
    {
        //status tokens
        tok_eof = -10000,
        tok_error,
        //keywords
 #define DEF_KEYWORD(keyword) tok_##keyword,
 #include "keywords.h"
 #undef DEF_KEYWORD
        //others
        tok_identifier, //[a-zA-Z_][a-zA-Z0-9_]
        tok_number, //(0x[0-9a-fA-F]+)|([0-9]+)
        tok_stringlit, //"([^\\"]|\\([\\"'?abfnrtv0]|x[0-9a-fA-f]{2}))*"
        tok_charlit, //'([^\\]|\\([\\"'?abfnrtv0]|x[0-9a-fA-f]{2}))'
        //operators
 #define DEF_OP_TRIPLE(enumval, ch1, ch2, ch3) tok_##enumval,
 #define DEF_OP_DOUBLE(enumval, ch1, ch2) tok_##enumval,
 #define DEF_OP_SINGLE(enumval, ch1) tok_##enumval,
 #include "operators.h"
 #undef DEF_OP_TRIPLE
 #undef DEF_OP_DOUBLE
 #undef DEF_OP_SINGLE
    };
    vector<uint8_t> Input;
    size_t Index = 0;
    string Error;
    vector<String> Warnings;
    //lexer state
    string IdentifierStr;
    uint64_t NumberVal = 0;
    bool IsHexNumberVal = false;
    string StringLit;
    string NumStr;
    char CharLit = '\0';
    int LastChar = ' ';
    size_t CurLine = 0;
    size_t LineIndex = 0;
    static void clearReserve(string & str, size_t reserve = DEFAULT_STRING_BUFFER)
    {
        str.clear();
        str.reserve(reserve);
    }
    static void appendCh(string & str, char ch)
    {
        str.resize(str.size() + 1);
        str[str.size() - 1] = ch;
    }
    void ResetLexerState()
    {
        Input.clear();
        Input.reserve(1024 * 1024);
        Index = 0;
        Error.clear();
        Warnings.clear();
        clearReserve(IdentifierStr);
        NumberVal = 0;
        IsHexNumberVal = false;
        clearReserve(StringLit);
        clearReserve(NumStr, 16);
        CharLit = '\0';
        LastChar = ' ';
        CurLine = 0;
        LineIndex = 0;
    }
    unordered_map<string, Token> KeywordMap;
    unordered_map<Token, string> ReverseTokenMap;
    unordered_map<int, Token> OpTripleMap;
    unordered_map<int, Token> OpDoubleMap;
    unordered_map<int, Token> OpSingleMap;
    void SetupTokenMaps()
    {
        //setup keyword map
 #define DEF_KEYWORD(keyword) KeywordMap[#keyword] = tok_##keyword;
 #include "keywords.h"
 #undef DEF_KEYWORD
        //setup token maps
 #define DEF_OP_TRIPLE(enumval, ch1, ch2, ch3) OpTripleMap[MAKE_OP_TRIPLE(ch1, ch2, ch3)] = tok_##enumval;
 #define DEF_OP_DOUBLE(enumval, ch1, ch2) OpDoubleMap[MAKE_OP_DOUBLE(ch1, ch2)] = tok_##enumval;
 #define DEF_OP_SINGLE(enumval, ch1) OpSingleMap[MAKE_OP_SINGLE(ch1)] = tok_##enumval;
 #include "operators.h"
 #undef DEF_OP_TRIPLE
 #undef DEF_OP_DOUBLE
 #undef DEF_OP_SINGLE
        //setup reverse token maps
 #define DEF_KEYWORD(keyword) ReverseTokenMap[tok_##keyword] = #keyword;
 #include "keywords.h"
 #undef DEF_KEYWORD
 #define DEF_OP_TRIPLE(enumval, ch1, ch2, ch3) ReverseTokenMap[tok_##enumval] = string({ch1, ch2, ch3});
 #define DEF_OP_DOUBLE(enumval, ch1, ch2) ReverseTokenMap[tok_##enumval] = string({ch1, ch2});
 #define DEF_OP_SINGLE(enumval, ch1) ReverseTokenMap[tok_##enumval] = string({ch1});
 #include "operators.h"
 #undef DEF_OP_TRIPLE
 #undef DEF_OP_DOUBLE
 #undef DEF_OP_SINGLE
    }
    Token ReportError(const String & error)
    {
        Error = error;
        return tok_error;
    }
    void ReportWarning(const String & warning)
    {
        Warnings.push_back(warning);
    }
    String TokString(int tok)
    {
        switch (Token(tok))
        {
        case tok_eof: return "tok_eof";
        case tok_error: return StringUtils::sprintf("error(line %d, col %d, \"%s\")", CurLine + 1, LineIndex, Error.c_str());
        case tok_identifier: return IdentifierStr;
        case tok_number: return StringUtils::sprintf(IsHexNumberVal ? "0x%llX" : "%llu", NumberVal);
        case tok_stringlit: return StringUtils::sprintf("\"%s\"", StringUtils::Escape(StringLit).c_str());
        case tok_charlit:
        {
            String s;
            s = CharLit;
            return StringUtils::sprintf("'%s'", StringUtils::Escape(s).c_str());
        }
        default:
        {
            auto found = ReverseTokenMap.find(Token(tok));
            if (found != ReverseTokenMap.end())
                return found->second;
            return "<INVALID TOKEN>";
        }
        }
    }
    int PeekChar(size_t distance = 0)
    {
        if (Index + distance >= Input.size())
            return EOF;
        auto ch = Input[Index + distance];
        if (ch == '\0')
        {
            ReportWarning(StringUtils::sprintf("\\0 character in file data"));
            return PeekChar(distance + 1);
        }
        return ch;
    }
    int ReadChar()
    {
        if (Index == Input.size())
            return EOF;
        auto ch = Input[Index++];
        LineIndex++;
        if (ch == '\0')
        {
            ReportWarning(StringUtils::sprintf("\\0 character in file data"));
            return ReadChar();
        }
        return ch;
    }
    bool CheckString(const string & expected)
    {
        for (size_t i = 0; i < expected.size(); i++)
        {
            auto ch = PeekChar(i);
            if (ch == EOF)
                return false;
            if (ch != uint8_t(expected[i]))
                return false;
        }
        Index += expected.size();
        return true;
    }
    int NextChar()
    {
        return LastChar = ReadChar();
    }
    void SignalNextLine()
    {
        CurLine++;
        LineIndex = 0;
    }
    static const char* ConvertNumber(const char* str, uint64_t & result, int radix)
    {
        errno = 0;
        char* end;
        result = strtoull(str, &end, radix);
        if (!result && end == str)
            return "not a number";
        if (result == ULLONG_MAX && errno)
            return "does not fit";
        if (*end)
            return "str not completely consumed";
        return nullptr;
    }
    int GetToken()
    {
        //skip whitespace
        while (isspace(LastChar))
        {
            if (LastChar == '\n')
                SignalNextLine();
            NextChar();
        }
        //skip \\[\r\n]
        if (LastChar == '\\' && (PeekChar() == '\r' || PeekChar() == '\n'))
        {
            NextChar();
            return GetToken();
        }
        //character literal
        if (LastChar == '\'')
        {
            string charLit;
            while (true)
            {
                NextChar();
                if (LastChar == EOF) //end of file
                    return ReportError("unexpected end of file in character literal (1)");
                if (LastChar == '\r' || LastChar == '\n')
                    return ReportError("unexpected newline in character literal (1)");
                if (LastChar == '\'') //end of character literal
                {
                    if (charLit.length() != 1)
                        return ReportError(StringUtils::sprintf("invalid character literal '%s'", charLit.c_str()));
                    CharLit = charLit[0];
                    NextChar();
                    return tok_charlit;
                }
                if (LastChar == '\\') //escape sequence
                {
                    NextChar();
                    if (LastChar == EOF)
                        return ReportError("unexpected end of file in character literal (2)");
                    if (LastChar == '\r' || LastChar == '\n')
                        return ReportError("unexpected newline in character literal (2)");
                    if (LastChar == '\'' || LastChar == '\"' || LastChar == '?' || LastChar == '\\')
                        LastChar = LastChar;
                    else if (LastChar == 'a')
                        LastChar = '\a';
                    else if (LastChar == 'b')
                        LastChar = '\b';
                    else if (LastChar == 'f')
                        LastChar = '\f';
                    else if (LastChar == 'n')
                        LastChar = '\n';
                    else if (LastChar == 'r')
                        LastChar = '\r';
                    else if (LastChar == 't')
                        LastChar = '\t';
                    else if (LastChar == 'v')
                        LastChar = '\v';
                    else if (LastChar == '0')
                        LastChar = '\0';
                    else if (LastChar == 'x') //\xHH
                    {
                        auto ch1 = NextChar();
                        auto ch2 = NextChar();
                        if (isxdigit(ch1) && isxdigit(ch2))
                        {
                            char byteStr[3] = "";
                            byteStr[0] = ch1;
                            byteStr[1] = ch2;
                            uint64_t hexData;
                            auto error = ConvertNumber(byteStr, hexData, 16);
                            if (error)
                                return ReportError(StringUtils::sprintf("ConvertNumber failed (%s) for hex sequence \"\\x%c%c\" in character literal", error, ch1, ch2));
                            LastChar = hexData & 0xFF;
                        }
                        else
                            return ReportError(StringUtils::sprintf("invalid hex sequence \"\\x%c%c\" in character literal", ch1, ch2));
                    }
                    else
                        return ReportError(StringUtils::sprintf("invalid escape sequence \"\\%c\" in character literal", LastChar));
                }
                charLit += LastChar;
            }
        }
        //string literal
        if (LastChar == '\"')
        {
            StringLit.clear();
            while (true)
            {
                NextChar();
                if (LastChar == EOF) //end of file
                    return ReportError("unexpected end of file in string literal (1)");
                if (LastChar == '\r' || LastChar == '\n')
                    return ReportError("unexpected newline in string literal (1)");
                if (LastChar == '\"') //end of string literal
                {
                    NextChar();
                    return tok_stringlit;
                }
                if (LastChar == '\\') //escape sequence
                {
                    NextChar();
                    if (LastChar == EOF)
                        return ReportError("unexpected end of file in string literal (2)");
                    if (LastChar == '\r' || LastChar == '\n')
                        return ReportError("unexpected newline in string literal (2)");
                    if (LastChar == '\'' || LastChar == '\"' || LastChar == '?' || LastChar == '\\')
                        LastChar = LastChar;
                    else if (LastChar == 'a')
                        LastChar = '\a';
                    else if (LastChar == 'b')
                        LastChar = '\b';
                    else if (LastChar == 'f')
                        LastChar = '\f';
                    else if (LastChar == 'n')
                        LastChar = '\n';
                    else if (LastChar == 'r')
                        LastChar = '\r';
                    else if (LastChar == 't')
                        LastChar = '\t';
                    else if (LastChar == 'v')
                        LastChar = '\v';
                    else if (LastChar == '0')
                        LastChar = '\0';
                    else if (LastChar == 'x') //\xHH
                    {
                        auto ch1 = NextChar();
                        auto ch2 = NextChar();
                        if (isxdigit(ch1) && isxdigit(ch2))
                        {
                            char byteStr[3] = "";
                            byteStr[0] = ch1;
                            byteStr[1] = ch2;
                            uint64_t hexData;
                            auto error = ConvertNumber(byteStr, hexData, 16);
                            if (error)
                                return ReportError(StringUtils::sprintf("ConvertNumber failed (%s) for hex sequence \"\\x%c%c\" in string literal", error, ch1, ch2));
                            LastChar = hexData & 0xFF;
                        }
                        else
                            return ReportError(StringUtils::sprintf("invalid hex sequence \"\\x%c%c\" in string literal", ch1, ch2));
                    }
                    else
                        return ReportError(StringUtils::sprintf("invalid escape sequence \"\\%c\" in string literal", LastChar));
                }
                appendCh(StringLit, LastChar);
            }
        }
        //identifier/keyword
        if (isalpha(LastChar) || LastChar == '_') //[a-zA-Z_]
        {
            IdentifierStr = LastChar;
            NextChar();
            while (isalnum(LastChar) || LastChar == '_') //[0-9a-zA-Z_]
            {
                appendCh(IdentifierStr, LastChar);
                NextChar();
            }
            //keywords
            auto found = KeywordMap.find(IdentifierStr);
            if (found != KeywordMap.end())
                return found->second;
            return tok_identifier;
        }
        //hex numbers
        if (LastChar == '0' && PeekChar() == 'x') //0x
        {
            NextChar(); //consume the 'x'
            NumStr.clear();
            while (isxdigit(NextChar())) //[0-9a-fA-F]*
                appendCh(NumStr, LastChar);
            if (!NumStr.length()) //check for error condition
                return ReportError("no hex digits after \"0x\" prefix");
            auto error = ConvertNumber(NumStr.c_str(), NumberVal, 16);
            if (error)
                return ReportError(StringUtils::sprintf("ConvertNumber failed (%s) on hexadecimal number", error));
            IsHexNumberVal = true;
            return tok_number;
        }
        if (isdigit(LastChar)) //[0-9]
        {
            NumStr = LastChar;
            while (isdigit(NextChar())) //[0-9]*
                NumStr += LastChar;
            auto error = ConvertNumber(NumStr.c_str(), NumberVal, 10);
            if (error)
                return ReportError(StringUtils::sprintf("ConvertNumber failed (%s) on decimal number", error));
            IsHexNumberVal = false;
            return tok_number;
        }
        //comments
        if (LastChar == '/' && PeekChar() == '/') //line comment
        {
            do
            {
                if (LastChar == '\n')
                    SignalNextLine();
                NextChar();
            } while (!(LastChar == EOF || LastChar == '\n'));
            return GetToken(); //interpret the next line
        }
        if (LastChar == '/' && PeekChar() == '*') //block comment
        {
            do
            {
                if (LastChar == '\n')
                    SignalNextLine();
                NextChar();
            } while (!(LastChar == EOF || LastChar == '*' && PeekChar() == '/'));
            if (LastChar == EOF) //unexpected end of file
            {
                LineIndex++;
                return ReportError("unexpected end of file in block comment");
            }
            NextChar();
            NextChar();
            return GetToken(); //get the next non-comment token
        }
        //operators
        auto opFound = OpTripleMap.find(MAKE_OP_TRIPLE(LastChar, PeekChar(), PeekChar(1)));
        if (opFound != OpTripleMap.end())
        {
            NextChar();
            NextChar();
            NextChar();
            return opFound->second;
        }
        opFound = OpDoubleMap.find(MAKE_OP_DOUBLE(LastChar, PeekChar()));
        if (opFound != OpDoubleMap.end())
        {
            NextChar();
            NextChar();
            return opFound->second;
        }
        opFound = OpSingleMap.find(MAKE_OP_SINGLE(LastChar));
        if (opFound != OpSingleMap.end())
        {
            NextChar();
            return opFound->second;
        }
        //end of file
        if (LastChar == EOF)
            return tok_eof;
        //unknown character
        return ReportError(StringUtils::sprintf("unexpected character \'%c\'", LastChar));
    }
    bool ReadInputFile(const string & filename)
    {
        ResetLexerState();
        return FileHelper::ReadAllData(filename, Input);
    }
    bool TestLex(const function<void(const string & line)> & lexEnum, bool output = true)
    {
        size_t line = 0;
        if (output)
            lexEnum("1: ");
        int tok;
        string toks;
        clearReserve(toks);
        char newlineText[128] = "";
        do
        {
            tok = GetToken();
            if (!output)
                continue;
            toks.clear();
            while (line < CurLine)
            {
                line++;
                sprintf_s(newlineText, "\n%d: ", line + 1);
                toks.append(newlineText);
            }
            toks.append(TokString(tok));
            appendCh(toks, ' ');
            lexEnum(toks);
        } while (tok != tok_eof && tok != tok_error);
        if (tok != tok_error && tok != tok_eof)
            tok = ReportError("lexer did not finish at the end of the file");
        for (const auto & warning : Warnings)
            if (output)
                lexEnum("\nwarning: " + warning);
        return tok != tok_error;
    }
 };
 bool TestLexer(Lexer & lexer, const string & filename)
 {
    if (!lexer.ReadInputFile("tests\\" + filename))
    {
        printf("failed to read \"%s\"\n", filename.c_str());
        return false;
    }
-    string actual;
+    std::string actual;
-    Lexer::clearReserve(actual);
+    actual.reserve(65536);
-    auto success = lexer.TestLex([&](const string & line)
+    auto success = lexer.Test([&](const std::string & line)
    {
        actual.append(line);
    });
-    string expected;
+    std::string expected;
-    if (FileHelper::ReadAllText("tests\\exp_lex\\" + filename, expected))
+    if (FileHelper::ReadAllText("tests\\exp_lex\\" + filename, expected) && expected == actual)
    {
        if (expected == actual)
    {
        printf("lexer test for \"%s\" success!\n", filename.c_str());
        return true;
    }
    }
    if (success)
        return true;
    printf("lexer test for \"%s\" failed...\n", filename.c_str());
@ -571,14 +31,14 @@ bool TestLexer(Lexer & lexer, const string & filename)
    return false;
 }
-bool DebugLexer(Lexer & lexer, const string & filename, bool output)
+bool DebugLexer(Lexer & lexer, const std::string & filename, bool output)
 {
    if (!lexer.ReadInputFile("tests\\" + filename))
    {
        printf("failed to read \"%s\"\n", filename.c_str());
        return false;
    }
-    auto success = lexer.TestLex([](const string & line)
+    auto success = lexer.Test([](const std::string & line)
    {
        printf("%s", line.c_str());
    }, output);
@ -587,16 +47,16 @@ bool DebugLexer(Lexer & lexer, const string & filename, bool output)
    return success;
 }
-void GenerateExpected(Lexer & lexer, const string & filename)
+void GenerateExpected(Lexer & lexer, const std::string & filename)
 {
    if (!lexer.ReadInputFile("tests\\" + filename))
    {
        printf("failed to read \"%s\"\n", filename.c_str());
        return;
    }
-    string actual;
+    std::string actual;
-    Lexer::clearReserve(actual);
+    actual.reserve(65536);
-    lexer.TestLex([&](const string & line)
+    lexer.Test([&](const std::string & line)
    {
        actual.append(line);
    });