From fe6952a1e8839bb2d97f8f349d97a0a369269d3c Mon Sep 17 00:00:00 2001 From: Duncan Ogilvie Date: Sun, 5 Feb 2023 02:16:27 +0100 Subject: [PATCH] Implement a hacky preprocessor --- .gitignore | 1 + btparser/btparser.vcxproj | 11 +- btparser/btparser.vcxproj.filters | 6 + btparser/lexer.cpp | 24 +- btparser/lexer.h | 9 + btparser/main.cpp | 21 +- btparser/parser.cpp | 12 +- btparser/parser.h | 1 + btparser/preprocessor.cpp | 443 ++++++++++++++++++++++++++++++ btparser/preprocessor.h | 6 + 10 files changed, 512 insertions(+), 22 deletions(-) create mode 100644 btparser/preprocessor.cpp create mode 100644 btparser/preprocessor.h diff --git a/.gitignore b/.gitignore index ec8fcc7..7275071 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ Debug/ My Amplifier XE Results */ actual.out expected.out +.vs/ diff --git a/btparser/btparser.vcxproj b/btparser/btparser.vcxproj index 4811b58..4d9a011 100644 --- a/btparser/btparser.vcxproj +++ b/btparser/btparser.vcxproj @@ -22,6 +22,7 @@ + @@ -29,6 +30,7 @@ + @@ -37,31 +39,32 @@ {B0411C78-2F06-49E0-8DE9-5C52A466F5DE} btparser + 10.0 Application true - v120 + v143 MultiByte Application true - v120 + v143 MultiByte Application false - v120 + v143 true MultiByte Application false - v120 + v143 true MultiByte diff --git a/btparser/btparser.vcxproj.filters b/btparser/btparser.vcxproj.filters index a7b5f10..b902403 100644 --- a/btparser/btparser.vcxproj.filters +++ b/btparser/btparser.vcxproj.filters @@ -24,6 +24,9 @@ Source Files + + Source Files + @@ -44,6 +47,9 @@ Header Files + + Header Files + diff --git a/btparser/lexer.cpp b/btparser/lexer.cpp index 360420e..2f06d74 100644 --- a/btparser/lexer.cpp +++ b/btparser/lexer.cpp @@ -14,12 +14,6 @@ static void clearReserve(std::string & str, size_t reserve = DEFAULT_STRING_BUFF str.reserve(reserve); } -static void appendCh(std::string & str, char ch) -{ - str.resize(str.size() + 1); - str[str.size() - 1] = ch; -} - static const char* convertNumber(const char* str, uint64_t & result, int radix) { errno = 0; @@ -64,6 +58,7 @@ bool Lexer::DoLexing(std::vector & tokens, std::string & error) return false; } tokens.push_back(mState); + mState.Clear(); if(token == tok_eof) break; } @@ -88,11 +83,11 @@ bool Lexer::Test(const std::function & lexEnum, while(line < mState.CurLine) { line++; - sprintf_s(newlineText, "\n%d: ", line + 1); + sprintf_s(newlineText, "\n%zu: ", line + 1); toks.append(newlineText); } toks.append(TokString(tok)); - appendCh(toks, ' '); + toks.push_back(' '); lexEnum(toks); } while(tok != tok_eof && tok != tok_error); @@ -250,7 +245,7 @@ Lexer::Token Lexer::getToken() else return reportError(StringUtils::sprintf("invalid escape sequence \"\\%c\" in string literal", mLastChar)); } - appendCh(mState.StringLit, mLastChar); + mState.StringLit.push_back(mLastChar); } } @@ -261,7 +256,7 @@ Lexer::Token Lexer::getToken() nextChar(); while(isalnum(mLastChar) || mLastChar == '_') //[0-9a-zA-Z_] { - appendCh(mState.IdentifierStr, mLastChar); + mState.IdentifierStr.push_back(mLastChar); nextChar(); } @@ -279,8 +274,8 @@ Lexer::Token Lexer::getToken() nextChar(); //consume the 'x' mNumStr.clear(); - while(isxdigit(nextChar())) //[0-9a-fA-F]* - appendCh(mNumStr, mLastChar); + while (isxdigit(nextChar())) //[0-9a-fA-F]* + mNumStr.push_back(mLastChar); if(!mNumStr.length()) //check for error condition return reportError("no hex digits after \"0x\" prefix"); @@ -394,14 +389,11 @@ void Lexer::resetLexerState() mError.clear(); mWarnings.clear(); clearReserve(mState.IdentifierStr); - mState.NumberVal = 0; mIsHexNumberVal = false; clearReserve(mState.StringLit); clearReserve(mNumStr, 16); - mState.CharLit = '\0'; mLastChar = ' '; - mState.CurLine = 0; - mState.LineIndex = 0; + mState.Clear(); } void Lexer::setupTokenMaps() diff --git a/btparser/lexer.h b/btparser/lexer.h index 975622c..6ef4098 100644 --- a/btparser/lexer.h +++ b/btparser/lexer.h @@ -4,6 +4,7 @@ #include #include #include +#include class Lexer { @@ -50,6 +51,14 @@ public: { return Token >= tok_signed && Token <= tok_UINT32; } + + void Clear() + { + IdentifierStr.clear(); + NumberVal = 0; + StringLit.clear(); + CharLit = '\0'; + } }; explicit Lexer(); diff --git a/btparser/main.cpp b/btparser/main.cpp index 9891b81..091fc9e 100644 --- a/btparser/main.cpp +++ b/btparser/main.cpp @@ -4,6 +4,7 @@ #include "lexer.h" #include "parser.h" #include "helpers.h" +#include "preprocessor.h" bool TestLexer(Lexer & lexer, const std::string & filename) { @@ -87,9 +88,27 @@ void DebugLexerTests(bool output = true) bool DebugParser(const std::string & filename) { + std::string data; + if (!FileHelper::ReadAllText("tests\\" + filename, data)) + { + printf("Failed to read: %s\n", filename.c_str()); + return false; + } + + std::string pperror; + std::unordered_map definitions; + definitions["WIN32"] = ""; + definitions["_MSC_VER"] = "1337"; + auto ppData = preprocess(data, pperror, definitions); + if (!pperror.empty()) + { + printf("Preprocess error: %s\n", pperror.c_str()); + return false; + } + Parser parser; std::string error; - if(!parser.ParseFile("tests\\" + filename, error)) + if(!parser.ParseString(ppData, error)) { printf("ParseFile failed: %s\n", error.c_str()); return false; diff --git a/btparser/parser.cpp b/btparser/parser.cpp index 1068006..08b786a 100644 --- a/btparser/parser.cpp +++ b/btparser/parser.cpp @@ -14,7 +14,17 @@ bool Parser::ParseFile(const string & filename, string & error) error = "failed to read input file"; return false; } - if(!mLexer.DoLexing(mTokens, error)) + if (!mLexer.DoLexing(mTokens, error)) + return false; + CurToken = mTokens[0]; + mBinaryTemplate = ParseBinaryTemplate(); + return !!mBinaryTemplate; +} + +bool Parser::ParseString(const std::string& source, std::string& error) +{ + mLexer.SetInputData(source); + if (!mLexer.DoLexing(mTokens, error)) return false; CurToken = mTokens[0]; mBinaryTemplate = ParseBinaryTemplate(); diff --git a/btparser/parser.h b/btparser/parser.h index 288084c..8f07883 100644 --- a/btparser/parser.h +++ b/btparser/parser.h @@ -16,6 +16,7 @@ public: explicit Parser(); bool ParseFile(const std::string & filename, std::string & error); + bool ParseString(const std::string& source, std::string& error); private: Lexer mLexer; diff --git a/btparser/preprocessor.cpp b/btparser/preprocessor.cpp new file mode 100644 index 0000000..bbfc99d --- /dev/null +++ b/btparser/preprocessor.cpp @@ -0,0 +1,443 @@ +#include "preprocessor.h" + +#include +#include +#include + +struct Line +{ + size_t number = 0; + bool comment = false; + std::string text; + std::string eolcomment; + + std::string str() const + { + std::string s; + s += "line "; + s += std::to_string(number); + if (comment) + s += " (comment)"; + s += ": "; + s += text; + s += eolcomment; + return s; + } + + void print() const + { + puts(str().c_str()); + } +}; + +struct Tokenizer +{ + struct exception : public std::runtime_error + { + exception(const Line& line, const std::string& message = std::string()) + : std::runtime_error(message + " === " + line.str()) + { + } + }; + + const Line& line; + size_t position = 0; + + Tokenizer(const Line& line) + : line(line) { } + + int peek() const + { + if (position >= line.text.length()) + return EOF; + return line.text[position]; + } + + char consume() + { + if (position >= line.text.length()) + error("cannot consuum"); + return line.text[position++]; + } + + void skip_spaces(bool required = false) + { + auto oldPosition = position; + while (true) + { + auto ch = peek(); + if (ch == ' ' || ch == '\t') + consume(); + else + break; + } + if (required && oldPosition == position) + error("whitespace was expected, none found"); + } + + void error(const std::string& message) + { + throw exception(line, std::to_string(line.number) + ":" + std::to_string(position + 1) + " " + message); + } + + std::string identifier() + { + std::string name; + while (true) + { + auto ch = peek(); + if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '_') + { + name.push_back(consume()); + } + else if (!name.empty() && (ch >= '0' && ch <= '9')) + { + name.push_back(consume()); + } + else + { + break; + } + } + if (name.empty()) + error("expected identifier"); + return name; + } + + std::string remainder() + { + std::string result; + while (true) + { + auto ch = peek(); + if (ch == EOF) + break; + result.push_back(consume()); + } + return result; + } + + std::string until(char expected) + { + std::string result; + while (true) + { + auto ch = peek(); + if (ch == EOF) + error("unexpected end of file"); + if (ch == expected) + break; + result.push_back(consume()); + } + return result; + } +}; + +std::string remove_block_comments(const std::string& input) +{ + std::string result; + bool inComment = false; + for (size_t i = 0; i < input.length(); i++) + { + if (inComment) + { + if (input[i] == '*' && i + 1 < input.length() && input[i + 1] == '/') + inComment = false; + } + else + { + if (input[i] == '/' && i + 1 < input.length() && input[i + 1] == '*') + inComment = true; + } + if (!inComment) + result += input[i]; + } + return result; +} + +std::string remove_line_comments(std::string& input) +{ + std::string line; + auto removeComment = [&line]() + { + auto commentIdx = line.find("//"); + if (commentIdx != std::string::npos) + { + line.resize(commentIdx); + } + }; + + std::string result; + for (auto ch : input) + { + if (ch == '\r') + { + continue; + } + + if (ch == '\n') + { + removeComment(); + result += line; + result += '\n'; + line.clear(); + } + else + { + line.push_back(ch); + } + } + + if (!line.empty()) + { + removeComment(); + result += line; + } + return result; +} + +// TODO: support comments +std::vector split_lines(const std::string& input) +{ + auto input_uncommented = remove_block_comments(input); + std::vector lines; + Line line; + + size_t lineNumber = 1; + line.number = lineNumber; + for (auto ch : input) + { + if (ch == '\r') + continue; + + if (ch == '\n') + { + lineNumber++; + if (!line.text.empty() && line.text.back() == '\\') + { + // continuation + line.text.back() = '\n'; + } + else + { + lines.push_back(line); + line.number = lineNumber; + line.text.clear(); + } + } + else + { + line.text.push_back(ch); + } + } + + if (!line.text.empty()) + { + lines.push_back(line); + line.text.clear(); + } + + for (auto& line : lines) + { + line.text = remove_line_comments(line.text); + } + + return lines; +} + +//Taken from: https://stackoverflow.com/a/24315631 +void ReplaceAll(std::string& s, const std::string& from, const std::string& to) +{ + size_t start_pos = 0; + while ((start_pos = s.find(from, start_pos)) != std::string::npos) + { + s.replace(start_pos, from.length(), to); + start_pos += to.length(); // Handles case where 'to' is a substring of 'from' + } +} + +std::string preprocess(const std::string& input, std::string& error, const std::unordered_map& definitions) +{ + auto lines = split_lines(input); + std::vector final; + struct Scope + { + size_t lineIndex = 0; + std::string condition; + bool value = false; + }; + std::vector stack; + auto state = definitions; + auto emitting = [&stack]() + { + for (const auto& s : stack) + if (!s.value) + return false; + return true; + }; + for (size_t i = 0; i < lines.size(); i++) + { + const auto& line = lines[i]; + Tokenizer t(lines[i]); + t.skip_spaces(); + + if (t.peek() == '#') + { + t.consume(); + t.skip_spaces(); + + auto directive = t.identifier(); + line.print(); + + if (directive == "ifndef") + { + t.skip_spaces(true); + auto identifier = t.identifier(); + printf("#ifndef(%s)\n", identifier.c_str()); + stack.push_back({ i, "!defined(" + identifier + ")", state.count(identifier) == 0 }); + printf("emitting: %d\n", emitting()); + } + else if (directive == "ifdef") + { + t.skip_spaces(true); + auto identifier = t.identifier(); + printf("#ifdef(%s)\n", identifier.c_str()); + stack.push_back({ i, identifier, state.count(identifier) != 0 }); + printf("emitting: %d\n", emitting()); + } + else if (directive == "else") + { + if (stack.empty()) + throw std::runtime_error("no matching #if for #else"); + if (!stack.back().value) + { + stack.back().value = true; + } + printf("#else (%s)\n", stack.back().condition.c_str()); + printf("emitting: %d\n", emitting()); + } + else if (directive == "endif") + { + if (stack.empty()) + throw std::runtime_error("no matching #if for #endif"); + printf("#endif (%s)\n", stack.back().condition.c_str()); + stack.pop_back(); + printf("emitting: %d\n", emitting()); + } + else if (directive == "define") + { + t.skip_spaces(true); + auto identifier = t.identifier(); + if (t.peek() == '(') + { + t.consume(); + t.skip_spaces(); + std::vector parameters; + while (true) + { + auto ch = t.peek(); + if (ch == ')') + break; + if (ch == EOF) + throw std::runtime_error("expected ')', got EOF instead"); + + auto argument = t.identifier(); + parameters.push_back(argument); + t.skip_spaces(); + ch = t.peek(); + if (ch == ')') + break; + else if (ch == ',') + { + t.consume(); + t.skip_spaces(); + } + else + throw std::runtime_error("expect ',' or ')' got something else (too lazy sry)"); + } + t.consume(); + t.skip_spaces(); + auto token = t.remainder(); + + std::string pretty; + for (size_t i = 0; i < parameters.size(); i++) + { + if (i > 0) + pretty += ", "; + pretty += parameters[i]; + } + + printf("#define %s('%s' = '%s')\n", identifier.c_str(), pretty.c_str(), token.c_str()); + + } + else + { + t.skip_spaces(); + auto token = t.remainder(); + if (token.empty()) + { + printf("#define(%s)\n", identifier.c_str()); + } + else + { + printf("#define('%s' = '%s')\n", identifier.c_str(), token.c_str()); + } + if (emitting()) + { + state[identifier] = token; + } + } + } + else if (directive == "include") + { + t.skip_spaces(); + auto type = t.peek(); + if (type == '\"') + { + t.consume(); + auto file = t.until('\"'); + printf("#include \"%s\"\n", file.c_str()); + } + else if (type == '<') + { + t.consume(); + auto file = t.until('>'); + printf("#include <%s>\n", file.c_str()); + } + else + { + throw std::runtime_error("invalid syntax for #include"); + } + } + else + { + printf("directive: '%s'\n", directive.c_str()); + throw std::runtime_error("unknown directive '" + directive + "'"); + } + } + else if (emitting()) + { + final.push_back(line); + } + } + + std::string result; + for (const auto& line : final) + { + result += line.text; + result += '\n'; + } + + // TODO: strip out comments + // TODO: somehow prevent replacing inside strings IsInsideString(position) + // TODO: recursively replace + + + // HACK: not proper + for (const auto& itr : state) + { + ReplaceAll(result, itr.first, itr.second); + } + + return result; +} diff --git a/btparser/preprocessor.h b/btparser/preprocessor.h new file mode 100644 index 0000000..0d17f94 --- /dev/null +++ b/btparser/preprocessor.h @@ -0,0 +1,6 @@ +#pragma once + +#include +#include + +std::string preprocess(const std::string& input, std::string& error, const std::unordered_map& definitions); \ No newline at end of file