diff --git a/.gitignore b/.gitignore
index ec8fcc7..7275071 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,3 +6,4 @@ Debug/
My Amplifier XE Results */
actual.out
expected.out
+.vs/
diff --git a/btparser/btparser.vcxproj b/btparser/btparser.vcxproj
index 4811b58..4d9a011 100644
--- a/btparser/btparser.vcxproj
+++ b/btparser/btparser.vcxproj
@@ -22,6 +22,7 @@
+
@@ -29,6 +30,7 @@
+
@@ -37,31 +39,32 @@
{B0411C78-2F06-49E0-8DE9-5C52A466F5DE}
btparser
+ 10.0
Application
true
- v120
+ v143
MultiByte
Application
true
- v120
+ v143
MultiByte
Application
false
- v120
+ v143
true
MultiByte
Application
false
- v120
+ v143
true
MultiByte
diff --git a/btparser/btparser.vcxproj.filters b/btparser/btparser.vcxproj.filters
index a7b5f10..b902403 100644
--- a/btparser/btparser.vcxproj.filters
+++ b/btparser/btparser.vcxproj.filters
@@ -24,6 +24,9 @@
Source Files
+
+ Source Files
+
@@ -44,6 +47,9 @@
Header Files
+
+ Header Files
+
diff --git a/btparser/lexer.cpp b/btparser/lexer.cpp
index 360420e..2f06d74 100644
--- a/btparser/lexer.cpp
+++ b/btparser/lexer.cpp
@@ -14,12 +14,6 @@ static void clearReserve(std::string & str, size_t reserve = DEFAULT_STRING_BUFF
str.reserve(reserve);
}
-static void appendCh(std::string & str, char ch)
-{
- str.resize(str.size() + 1);
- str[str.size() - 1] = ch;
-}
-
static const char* convertNumber(const char* str, uint64_t & result, int radix)
{
errno = 0;
@@ -64,6 +58,7 @@ bool Lexer::DoLexing(std::vector & tokens, std::string & error)
return false;
}
tokens.push_back(mState);
+ mState.Clear();
if(token == tok_eof)
break;
}
@@ -88,11 +83,11 @@ bool Lexer::Test(const std::function & lexEnum,
while(line < mState.CurLine)
{
line++;
- sprintf_s(newlineText, "\n%d: ", line + 1);
+ sprintf_s(newlineText, "\n%zu: ", line + 1);
toks.append(newlineText);
}
toks.append(TokString(tok));
- appendCh(toks, ' ');
+ toks.push_back(' ');
lexEnum(toks);
}
while(tok != tok_eof && tok != tok_error);
@@ -250,7 +245,7 @@ Lexer::Token Lexer::getToken()
else
return reportError(StringUtils::sprintf("invalid escape sequence \"\\%c\" in string literal", mLastChar));
}
- appendCh(mState.StringLit, mLastChar);
+ mState.StringLit.push_back(mLastChar);
}
}
@@ -261,7 +256,7 @@ Lexer::Token Lexer::getToken()
nextChar();
while(isalnum(mLastChar) || mLastChar == '_') //[0-9a-zA-Z_]
{
- appendCh(mState.IdentifierStr, mLastChar);
+ mState.IdentifierStr.push_back(mLastChar);
nextChar();
}
@@ -279,8 +274,8 @@ Lexer::Token Lexer::getToken()
nextChar(); //consume the 'x'
mNumStr.clear();
- while(isxdigit(nextChar())) //[0-9a-fA-F]*
- appendCh(mNumStr, mLastChar);
+ while (isxdigit(nextChar())) //[0-9a-fA-F]*
+ mNumStr.push_back(mLastChar);
if(!mNumStr.length()) //check for error condition
return reportError("no hex digits after \"0x\" prefix");
@@ -394,14 +389,11 @@ void Lexer::resetLexerState()
mError.clear();
mWarnings.clear();
clearReserve(mState.IdentifierStr);
- mState.NumberVal = 0;
mIsHexNumberVal = false;
clearReserve(mState.StringLit);
clearReserve(mNumStr, 16);
- mState.CharLit = '\0';
mLastChar = ' ';
- mState.CurLine = 0;
- mState.LineIndex = 0;
+ mState.Clear();
}
void Lexer::setupTokenMaps()
diff --git a/btparser/lexer.h b/btparser/lexer.h
index 975622c..6ef4098 100644
--- a/btparser/lexer.h
+++ b/btparser/lexer.h
@@ -4,6 +4,7 @@
#include
#include
#include
+#include
class Lexer
{
@@ -50,6 +51,14 @@ public:
{
return Token >= tok_signed && Token <= tok_UINT32;
}
+
+ void Clear()
+ {
+ IdentifierStr.clear();
+ NumberVal = 0;
+ StringLit.clear();
+ CharLit = '\0';
+ }
};
explicit Lexer();
diff --git a/btparser/main.cpp b/btparser/main.cpp
index 9891b81..091fc9e 100644
--- a/btparser/main.cpp
+++ b/btparser/main.cpp
@@ -4,6 +4,7 @@
#include "lexer.h"
#include "parser.h"
#include "helpers.h"
+#include "preprocessor.h"
bool TestLexer(Lexer & lexer, const std::string & filename)
{
@@ -87,9 +88,27 @@ void DebugLexerTests(bool output = true)
bool DebugParser(const std::string & filename)
{
+ std::string data;
+ if (!FileHelper::ReadAllText("tests\\" + filename, data))
+ {
+ printf("Failed to read: %s\n", filename.c_str());
+ return false;
+ }
+
+ std::string pperror;
+ std::unordered_map definitions;
+ definitions["WIN32"] = "";
+ definitions["_MSC_VER"] = "1337";
+ auto ppData = preprocess(data, pperror, definitions);
+ if (!pperror.empty())
+ {
+ printf("Preprocess error: %s\n", pperror.c_str());
+ return false;
+ }
+
Parser parser;
std::string error;
- if(!parser.ParseFile("tests\\" + filename, error))
+ if(!parser.ParseString(ppData, error))
{
printf("ParseFile failed: %s\n", error.c_str());
return false;
diff --git a/btparser/parser.cpp b/btparser/parser.cpp
index 1068006..08b786a 100644
--- a/btparser/parser.cpp
+++ b/btparser/parser.cpp
@@ -14,7 +14,17 @@ bool Parser::ParseFile(const string & filename, string & error)
error = "failed to read input file";
return false;
}
- if(!mLexer.DoLexing(mTokens, error))
+ if (!mLexer.DoLexing(mTokens, error))
+ return false;
+ CurToken = mTokens[0];
+ mBinaryTemplate = ParseBinaryTemplate();
+ return !!mBinaryTemplate;
+}
+
+bool Parser::ParseString(const std::string& source, std::string& error)
+{
+ mLexer.SetInputData(source);
+ if (!mLexer.DoLexing(mTokens, error))
return false;
CurToken = mTokens[0];
mBinaryTemplate = ParseBinaryTemplate();
diff --git a/btparser/parser.h b/btparser/parser.h
index 288084c..8f07883 100644
--- a/btparser/parser.h
+++ b/btparser/parser.h
@@ -16,6 +16,7 @@ public:
explicit Parser();
bool ParseFile(const std::string & filename, std::string & error);
+ bool ParseString(const std::string& source, std::string& error);
private:
Lexer mLexer;
diff --git a/btparser/preprocessor.cpp b/btparser/preprocessor.cpp
new file mode 100644
index 0000000..bbfc99d
--- /dev/null
+++ b/btparser/preprocessor.cpp
@@ -0,0 +1,443 @@
+#include "preprocessor.h"
+
+#include
+#include
+#include
+
+struct Line
+{
+ size_t number = 0;
+ bool comment = false;
+ std::string text;
+ std::string eolcomment;
+
+ std::string str() const
+ {
+ std::string s;
+ s += "line ";
+ s += std::to_string(number);
+ if (comment)
+ s += " (comment)";
+ s += ": ";
+ s += text;
+ s += eolcomment;
+ return s;
+ }
+
+ void print() const
+ {
+ puts(str().c_str());
+ }
+};
+
+struct Tokenizer
+{
+ struct exception : public std::runtime_error
+ {
+ exception(const Line& line, const std::string& message = std::string())
+ : std::runtime_error(message + " === " + line.str())
+ {
+ }
+ };
+
+ const Line& line;
+ size_t position = 0;
+
+ Tokenizer(const Line& line)
+ : line(line) { }
+
+ int peek() const
+ {
+ if (position >= line.text.length())
+ return EOF;
+ return line.text[position];
+ }
+
+ char consume()
+ {
+ if (position >= line.text.length())
+ error("cannot consuum");
+ return line.text[position++];
+ }
+
+ void skip_spaces(bool required = false)
+ {
+ auto oldPosition = position;
+ while (true)
+ {
+ auto ch = peek();
+ if (ch == ' ' || ch == '\t')
+ consume();
+ else
+ break;
+ }
+ if (required && oldPosition == position)
+ error("whitespace was expected, none found");
+ }
+
+ void error(const std::string& message)
+ {
+ throw exception(line, std::to_string(line.number) + ":" + std::to_string(position + 1) + " " + message);
+ }
+
+ std::string identifier()
+ {
+ std::string name;
+ while (true)
+ {
+ auto ch = peek();
+ if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '_')
+ {
+ name.push_back(consume());
+ }
+ else if (!name.empty() && (ch >= '0' && ch <= '9'))
+ {
+ name.push_back(consume());
+ }
+ else
+ {
+ break;
+ }
+ }
+ if (name.empty())
+ error("expected identifier");
+ return name;
+ }
+
+ std::string remainder()
+ {
+ std::string result;
+ while (true)
+ {
+ auto ch = peek();
+ if (ch == EOF)
+ break;
+ result.push_back(consume());
+ }
+ return result;
+ }
+
+ std::string until(char expected)
+ {
+ std::string result;
+ while (true)
+ {
+ auto ch = peek();
+ if (ch == EOF)
+ error("unexpected end of file");
+ if (ch == expected)
+ break;
+ result.push_back(consume());
+ }
+ return result;
+ }
+};
+
+std::string remove_block_comments(const std::string& input)
+{
+ std::string result;
+ bool inComment = false;
+ for (size_t i = 0; i < input.length(); i++)
+ {
+ if (inComment)
+ {
+ if (input[i] == '*' && i + 1 < input.length() && input[i + 1] == '/')
+ inComment = false;
+ }
+ else
+ {
+ if (input[i] == '/' && i + 1 < input.length() && input[i + 1] == '*')
+ inComment = true;
+ }
+ if (!inComment)
+ result += input[i];
+ }
+ return result;
+}
+
+std::string remove_line_comments(std::string& input)
+{
+ std::string line;
+ auto removeComment = [&line]()
+ {
+ auto commentIdx = line.find("//");
+ if (commentIdx != std::string::npos)
+ {
+ line.resize(commentIdx);
+ }
+ };
+
+ std::string result;
+ for (auto ch : input)
+ {
+ if (ch == '\r')
+ {
+ continue;
+ }
+
+ if (ch == '\n')
+ {
+ removeComment();
+ result += line;
+ result += '\n';
+ line.clear();
+ }
+ else
+ {
+ line.push_back(ch);
+ }
+ }
+
+ if (!line.empty())
+ {
+ removeComment();
+ result += line;
+ }
+ return result;
+}
+
+// TODO: support comments
+std::vector split_lines(const std::string& input)
+{
+ auto input_uncommented = remove_block_comments(input);
+ std::vector lines;
+ Line line;
+
+ size_t lineNumber = 1;
+ line.number = lineNumber;
+ for (auto ch : input)
+ {
+ if (ch == '\r')
+ continue;
+
+ if (ch == '\n')
+ {
+ lineNumber++;
+ if (!line.text.empty() && line.text.back() == '\\')
+ {
+ // continuation
+ line.text.back() = '\n';
+ }
+ else
+ {
+ lines.push_back(line);
+ line.number = lineNumber;
+ line.text.clear();
+ }
+ }
+ else
+ {
+ line.text.push_back(ch);
+ }
+ }
+
+ if (!line.text.empty())
+ {
+ lines.push_back(line);
+ line.text.clear();
+ }
+
+ for (auto& line : lines)
+ {
+ line.text = remove_line_comments(line.text);
+ }
+
+ return lines;
+}
+
+//Taken from: https://stackoverflow.com/a/24315631
+void ReplaceAll(std::string& s, const std::string& from, const std::string& to)
+{
+ size_t start_pos = 0;
+ while ((start_pos = s.find(from, start_pos)) != std::string::npos)
+ {
+ s.replace(start_pos, from.length(), to);
+ start_pos += to.length(); // Handles case where 'to' is a substring of 'from'
+ }
+}
+
+std::string preprocess(const std::string& input, std::string& error, const std::unordered_map& definitions)
+{
+ auto lines = split_lines(input);
+ std::vector final;
+ struct Scope
+ {
+ size_t lineIndex = 0;
+ std::string condition;
+ bool value = false;
+ };
+ std::vector stack;
+ auto state = definitions;
+ auto emitting = [&stack]()
+ {
+ for (const auto& s : stack)
+ if (!s.value)
+ return false;
+ return true;
+ };
+ for (size_t i = 0; i < lines.size(); i++)
+ {
+ const auto& line = lines[i];
+ Tokenizer t(lines[i]);
+ t.skip_spaces();
+
+ if (t.peek() == '#')
+ {
+ t.consume();
+ t.skip_spaces();
+
+ auto directive = t.identifier();
+ line.print();
+
+ if (directive == "ifndef")
+ {
+ t.skip_spaces(true);
+ auto identifier = t.identifier();
+ printf("#ifndef(%s)\n", identifier.c_str());
+ stack.push_back({ i, "!defined(" + identifier + ")", state.count(identifier) == 0 });
+ printf("emitting: %d\n", emitting());
+ }
+ else if (directive == "ifdef")
+ {
+ t.skip_spaces(true);
+ auto identifier = t.identifier();
+ printf("#ifdef(%s)\n", identifier.c_str());
+ stack.push_back({ i, identifier, state.count(identifier) != 0 });
+ printf("emitting: %d\n", emitting());
+ }
+ else if (directive == "else")
+ {
+ if (stack.empty())
+ throw std::runtime_error("no matching #if for #else");
+ if (!stack.back().value)
+ {
+ stack.back().value = true;
+ }
+ printf("#else (%s)\n", stack.back().condition.c_str());
+ printf("emitting: %d\n", emitting());
+ }
+ else if (directive == "endif")
+ {
+ if (stack.empty())
+ throw std::runtime_error("no matching #if for #endif");
+ printf("#endif (%s)\n", stack.back().condition.c_str());
+ stack.pop_back();
+ printf("emitting: %d\n", emitting());
+ }
+ else if (directive == "define")
+ {
+ t.skip_spaces(true);
+ auto identifier = t.identifier();
+ if (t.peek() == '(')
+ {
+ t.consume();
+ t.skip_spaces();
+ std::vector parameters;
+ while (true)
+ {
+ auto ch = t.peek();
+ if (ch == ')')
+ break;
+ if (ch == EOF)
+ throw std::runtime_error("expected ')', got EOF instead");
+
+ auto argument = t.identifier();
+ parameters.push_back(argument);
+ t.skip_spaces();
+ ch = t.peek();
+ if (ch == ')')
+ break;
+ else if (ch == ',')
+ {
+ t.consume();
+ t.skip_spaces();
+ }
+ else
+ throw std::runtime_error("expect ',' or ')' got something else (too lazy sry)");
+ }
+ t.consume();
+ t.skip_spaces();
+ auto token = t.remainder();
+
+ std::string pretty;
+ for (size_t i = 0; i < parameters.size(); i++)
+ {
+ if (i > 0)
+ pretty += ", ";
+ pretty += parameters[i];
+ }
+
+ printf("#define %s('%s' = '%s')\n", identifier.c_str(), pretty.c_str(), token.c_str());
+
+ }
+ else
+ {
+ t.skip_spaces();
+ auto token = t.remainder();
+ if (token.empty())
+ {
+ printf("#define(%s)\n", identifier.c_str());
+ }
+ else
+ {
+ printf("#define('%s' = '%s')\n", identifier.c_str(), token.c_str());
+ }
+ if (emitting())
+ {
+ state[identifier] = token;
+ }
+ }
+ }
+ else if (directive == "include")
+ {
+ t.skip_spaces();
+ auto type = t.peek();
+ if (type == '\"')
+ {
+ t.consume();
+ auto file = t.until('\"');
+ printf("#include \"%s\"\n", file.c_str());
+ }
+ else if (type == '<')
+ {
+ t.consume();
+ auto file = t.until('>');
+ printf("#include <%s>\n", file.c_str());
+ }
+ else
+ {
+ throw std::runtime_error("invalid syntax for #include");
+ }
+ }
+ else
+ {
+ printf("directive: '%s'\n", directive.c_str());
+ throw std::runtime_error("unknown directive '" + directive + "'");
+ }
+ }
+ else if (emitting())
+ {
+ final.push_back(line);
+ }
+ }
+
+ std::string result;
+ for (const auto& line : final)
+ {
+ result += line.text;
+ result += '\n';
+ }
+
+ // TODO: strip out comments
+ // TODO: somehow prevent replacing inside strings IsInsideString(position)
+ // TODO: recursively replace
+
+
+ // HACK: not proper
+ for (const auto& itr : state)
+ {
+ ReplaceAll(result, itr.first, itr.second);
+ }
+
+ return result;
+}
diff --git a/btparser/preprocessor.h b/btparser/preprocessor.h
new file mode 100644
index 0000000..0d17f94
--- /dev/null
+++ b/btparser/preprocessor.h
@@ -0,0 +1,6 @@
+#pragma once
+
+#include
+#include
+
+std::string preprocess(const std::string& input, std::string& error, const std::unordered_map& definitions);
\ No newline at end of file