Implement a hacky preprocessor

2023-02-05 02:16:27 +01:00 · 2023-02-05 02:16:27 +01:00 · fe6952a1e8
parent d5034cf6d6
commit fe6952a1e8
10 changed files with 512 additions and 22 deletions
--- a/.gitignore
+++ b/.gitignore
@ -6,3 +6,4 @@ Debug/
 My Amplifier XE Results */
 actual.out
 expected.out
 .vs/
--- a/btparser/btparser.vcxproj
+++ b/btparser/btparser.vcxproj
@ -22,6 +22,7 @@
    <ClCompile Include="lexer.cpp" />
    <ClCompile Include="main.cpp" />
    <ClCompile Include="parser.cpp" />
    <ClCompile Include="preprocessor.cpp" />
  </ItemGroup>
  <ItemGroup>
    <ClInclude Include="ast.h" />
@ -29,6 +30,7 @@
    <ClInclude Include="keywords.h" />
    <ClInclude Include="lexer.h" />
    <ClInclude Include="operators.h" />
    <ClInclude Include="preprocessor.h" />
    <ClInclude Include="testfiles.h" />
  </ItemGroup>
  <ItemGroup>
@ -37,31 +39,32 @@
  <PropertyGroup Label="Globals">
    <ProjectGuid>{B0411C78-2F06-49E0-8DE9-5C52A466F5DE}</ProjectGuid>
    <RootNamespace>btparser</RootNamespace>
    <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
  </PropertyGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
    <ConfigurationType>Application</ConfigurationType>
    <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v120</PlatformToolset>
+    <PlatformToolset>v143</PlatformToolset>
    <CharacterSet>MultiByte</CharacterSet>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
    <ConfigurationType>Application</ConfigurationType>
    <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v120</PlatformToolset>
+    <PlatformToolset>v143</PlatformToolset>
    <CharacterSet>MultiByte</CharacterSet>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
    <ConfigurationType>Application</ConfigurationType>
    <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v120</PlatformToolset>
+    <PlatformToolset>v143</PlatformToolset>
    <WholeProgramOptimization>true</WholeProgramOptimization>
    <CharacterSet>MultiByte</CharacterSet>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
    <ConfigurationType>Application</ConfigurationType>
    <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v120</PlatformToolset>
+    <PlatformToolset>v143</PlatformToolset>
    <WholeProgramOptimization>true</WholeProgramOptimization>
    <CharacterSet>MultiByte</CharacterSet>
  </PropertyGroup>
--- a/btparser/btparser.vcxproj.filters
+++ b/btparser/btparser.vcxproj.filters
@ -24,6 +24,9 @@
    <ClCompile Include="parser.cpp">
      <Filter>Source Files</Filter>
    </ClCompile>
    <ClCompile Include="preprocessor.cpp">
      <Filter>Source Files</Filter>
    </ClCompile>
  </ItemGroup>
  <ItemGroup>
    <ClInclude Include="testfiles.h">
@ -44,6 +47,9 @@
    <ClInclude Include="helpers.h">
      <Filter>Header Files</Filter>
    </ClInclude>
    <ClInclude Include="preprocessor.h">
      <Filter>Header Files</Filter>
    </ClInclude>
  </ItemGroup>
  <ItemGroup>
    <None Include="parser.h">
--- a/btparser/lexer.cpp
+++ b/btparser/lexer.cpp
@ -14,12 +14,6 @@ static void clearReserve(std::string & str, size_t reserve = DEFAULT_STRING_BUFF
    str.reserve(reserve);
 }
 static void appendCh(std::string & str, char ch)
 {
    str.resize(str.size() + 1);
    str[str.size() - 1] = ch;
 }
 static const char* convertNumber(const char* str, uint64_t & result, int radix)
 {
    errno = 0;
@ -64,6 +58,7 @@ bool Lexer::DoLexing(std::vector<TokenState> & tokens, std::string & error)
            return false;
        }
        tokens.push_back(mState);
        mState.Clear();
        if(token == tok_eof)
            break;
    }
@ -88,11 +83,11 @@ bool Lexer::Test(const std::function<void(const std::string & line)> & lexEnum,
        while(line < mState.CurLine)
        {
            line++;
-            sprintf_s(newlineText, "\n%d: ", line + 1);
+            sprintf_s(newlineText, "\n%zu: ", line + 1);
            toks.append(newlineText);
        }
        toks.append(TokString(tok));
-        appendCh(toks, ' ');
+        toks.push_back(' ');
        lexEnum(toks);
    }
    while(tok != tok_eof && tok != tok_error);
@ -250,7 +245,7 @@ Lexer::Token Lexer::getToken()
                else
                    return reportError(StringUtils::sprintf("invalid escape sequence \"\\%c\" in string literal", mLastChar));
            }
-            appendCh(mState.StringLit, mLastChar);
+            mState.StringLit.push_back(mLastChar);
        }
    }
@ -261,7 +256,7 @@ Lexer::Token Lexer::getToken()
        nextChar();
        while(isalnum(mLastChar) || mLastChar == '_') //[0-9a-zA-Z_]
        {
-            appendCh(mState.IdentifierStr, mLastChar);
+            mState.IdentifierStr.push_back(mLastChar);
            nextChar();
        }
@ -280,7 +275,7 @@ Lexer::Token Lexer::getToken()
        mNumStr.clear();
        while (isxdigit(nextChar())) //[0-9a-fA-F]*
-            appendCh(mNumStr, mLastChar);
+            mNumStr.push_back(mLastChar);
        if(!mNumStr.length()) //check for error condition
            return reportError("no hex digits after \"0x\" prefix");
@ -394,14 +389,11 @@ void Lexer::resetLexerState()
    mError.clear();
    mWarnings.clear();
    clearReserve(mState.IdentifierStr);
    mState.NumberVal = 0;
    mIsHexNumberVal = false;
    clearReserve(mState.StringLit);
    clearReserve(mNumStr, 16);
    mState.CharLit = '\0';
    mLastChar = ' ';
-    mState.CurLine = 0;
+    mState.Clear();
    mState.LineIndex = 0;
 }
 void Lexer::setupTokenMaps()
--- a/btparser/lexer.h
+++ b/btparser/lexer.h
@ -4,6 +4,7 @@
 #include <vector>
 #include <unordered_map>
 #include <functional>
 #include <string>
 class Lexer
 {
@ -50,6 +51,14 @@ public:
        {
            return Token >= tok_signed && Token <= tok_UINT32;
        }
        void Clear()
        {
            IdentifierStr.clear();
            NumberVal = 0;
            StringLit.clear();
            CharLit = '\0';
        }
    };
    explicit Lexer();
--- a/btparser/main.cpp
+++ b/btparser/main.cpp
@ -4,6 +4,7 @@
 #include "lexer.h"
 #include "parser.h"
 #include "helpers.h"
 #include "preprocessor.h"
 bool TestLexer(Lexer & lexer, const std::string & filename)
 {
@ -87,9 +88,27 @@ void DebugLexerTests(bool output = true)
 bool DebugParser(const std::string & filename)
 {
    std::string data;
    if (!FileHelper::ReadAllText("tests\\" + filename, data))
    {
        printf("Failed to read: %s\n", filename.c_str());
        return false;
    }
    std::string pperror;
    std::unordered_map<std::string, std::string> definitions;
    definitions["WIN32"] = "";
    definitions["_MSC_VER"] = "1337";
    auto ppData = preprocess(data, pperror, definitions);
    if (!pperror.empty())
    {
        printf("Preprocess error: %s\n", pperror.c_str());
        return false;
    }
    Parser parser;
    std::string error;
-    if(!parser.ParseFile("tests\\" + filename, error))
+    if(!parser.ParseString(ppData, error))
    {
        printf("ParseFile failed: %s\n", error.c_str());
        return false;
--- a/btparser/parser.cpp
+++ b/btparser/parser.cpp
@ -21,6 +21,16 @@ bool Parser::ParseFile(const string & filename, string & error)
    return !!mBinaryTemplate;
 }
 bool Parser::ParseString(const std::string& source, std::string& error)
 {
    mLexer.SetInputData(source);
    if (!mLexer.DoLexing(mTokens, error))
        return false;
    CurToken = mTokens[0];
    mBinaryTemplate = ParseBinaryTemplate();
    return !!mBinaryTemplate;
 }
 void Parser::NextToken()
 {
    if(mIndex < mTokens.size() - 1)
--- a/btparser/parser.h
+++ b/btparser/parser.h
@ -16,6 +16,7 @@ public:
    explicit Parser();
    bool ParseFile(const std::string & filename, std::string & error);
    bool ParseString(const std::string& source, std::string& error);
 private:
    Lexer mLexer;
--- a/btparser/preprocessor.cpp
+++ b/btparser/preprocessor.cpp
@ -0,0 +1,443 @@
 #include "preprocessor.h"
 #include <vector>
 #include <stdexcept>
 #include <unordered_map>
 struct Line
 {
 	size_t number = 0;
 	bool comment = false;
 	std::string text;
 	std::string eolcomment;
 	std::string str() const
 	{
 		std::string s;
 		s += "line ";
 		s += std::to_string(number);
 		if (comment)
 			s += " (comment)";
 		s += ": ";
 		s += text;
 		s += eolcomment;
 		return s;
 	}
 	void print() const
 	{
 		puts(str().c_str());
 	}
 };
 struct Tokenizer
 {
 	struct exception : public std::runtime_error
 	{
 		exception(const Line& line, const std::string& message = std::string())
 			: std::runtime_error(message + " === " + line.str())
 		{
 		}
 	};
 	const Line& line;
 	size_t position = 0;
 	Tokenizer(const Line& line)
 		: line(line) { }
 	int peek() const
 	{
 		if (position >= line.text.length())
 			return EOF;
 		return line.text[position];
 	}
 	char consume()
 	{
 		if (position >= line.text.length())
 			error("cannot consuum");
 		return line.text[position++];
 	}
 	void skip_spaces(bool required = false)
 	{
 		auto oldPosition = position;
 		while (true)
 		{
 			auto ch = peek();
 			if (ch == ' ' || ch == '\t')
 				consume();
 			else
 				break;
 		}
 		if (required && oldPosition == position)
 			error("whitespace was expected, none found");
 	}
 	void error(const std::string& message)
 	{
 		throw exception(line, std::to_string(line.number) + ":" + std::to_string(position + 1) + " " + message);
 	}
 	std::string identifier()
 	{
 		std::string name;
 		while (true)
 		{
 			auto ch = peek();
 			if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '_')
 			{
 				name.push_back(consume());
 			}
 			else if (!name.empty() && (ch >= '0' && ch <= '9'))
 			{
 				name.push_back(consume());
 			}
 			else
 			{
 				break;
 			}
 		}
 		if (name.empty())
 			error("expected identifier");
 		return name;
 	}
 	std::string remainder()
 	{
 		std::string result;
 		while (true)
 		{
 			auto ch = peek();
 			if (ch == EOF)
 				break;
 			result.push_back(consume());
 		}
 		return result;
 	}
 	std::string until(char expected)
 	{
 		std::string result;
 		while (true)
 		{
 			auto ch = peek();
 			if (ch == EOF)
 				error("unexpected end of file");
 			if (ch == expected)
 				break;
 			result.push_back(consume());
 		}
 		return result;
 	}
 };
 std::string remove_block_comments(const std::string& input)
 {
 	std::string result;
 	bool inComment = false;
 	for (size_t i = 0; i < input.length(); i++)
 	{
 		if (inComment)
 		{
 			if (input[i] == '*' && i + 1 < input.length() && input[i + 1] == '/')
 				inComment = false;
 		}
 		else
 		{
 			if (input[i] == '/' && i + 1 < input.length() && input[i + 1] == '*')
 				inComment = true;
 		}
 		if (!inComment)
 			result += input[i];
 	}
 	return result;
 }
 std::string remove_line_comments(std::string& input)
 {
 	std::string line;
 	auto removeComment = [&line]()
 	{
 		auto commentIdx = line.find("//");
 		if (commentIdx != std::string::npos)
 		{
 			line.resize(commentIdx);
 		}
 	};
 	std::string result;
 	for (auto ch : input)
 	{
 		if (ch == '\r')
 		{
 			continue;
 		}
 		if (ch == '\n')
 		{
 			removeComment();
 			result += line;
 			result += '\n';
 			line.clear();
 		}
 		else
 		{
 			line.push_back(ch);
 		}
 	}
 	if (!line.empty())
 	{
 		removeComment();
 		result += line;
 	}
 	return result;
 }
 // TODO: support comments
 std::vector<Line> split_lines(const std::string& input)
 {
 	auto input_uncommented = remove_block_comments(input);
 	std::vector<Line> lines;
 	Line line;
 	size_t lineNumber = 1;
 	line.number = lineNumber;
 	for (auto ch : input)
 	{
 		if (ch == '\r')
 			continue;
 		if (ch == '\n')
 		{
 			lineNumber++;
 			if (!line.text.empty() && line.text.back() == '\\')
 			{
 				// continuation
 				line.text.back() = '\n';
 			}
 			else
 			{
 				lines.push_back(line);
 				line.number = lineNumber;
 				line.text.clear();
 			}
 		}
 		else
 		{
 			line.text.push_back(ch);
 		}
 	}
 	if (!line.text.empty())
 	{
 		lines.push_back(line);
 		line.text.clear();
 	}
 	for (auto& line : lines)
 	{
 		line.text = remove_line_comments(line.text);
 	}
 	return lines;
 }
 //Taken from: https://stackoverflow.com/a/24315631
 void ReplaceAll(std::string& s, const std::string& from, const std::string& to)
 {
 	size_t start_pos = 0;
 	while ((start_pos = s.find(from, start_pos)) != std::string::npos)
 	{
 		s.replace(start_pos, from.length(), to);
 		start_pos += to.length(); // Handles case where 'to' is a substring of 'from'
 	}
 }
 std::string preprocess(const std::string& input, std::string& error, const std::unordered_map<std::string, std::string>& definitions)
 {
 	auto lines = split_lines(input);
 	std::vector<Line> final;
 	struct Scope
 	{
 		size_t lineIndex = 0;
 		std::string condition;
 		bool value = false;
 	};
 	std::vector<Scope> stack;
 	auto state = definitions;
 	auto emitting = [&stack]()
 	{
 		for (const auto& s : stack)
 			if (!s.value)
 				return false;
 		return true;
 	};
 	for (size_t i = 0; i < lines.size(); i++)
 	{
 		const auto& line = lines[i];
 		Tokenizer t(lines[i]);
 		t.skip_spaces();
 		if (t.peek() == '#')
 		{
 			t.consume();
 			t.skip_spaces();
 			auto directive = t.identifier();
 			line.print();
 			if (directive == "ifndef")
 			{
 				t.skip_spaces(true);
 				auto identifier = t.identifier();
 				printf("#ifndef(%s)\n", identifier.c_str());
 				stack.push_back({ i, "!defined(" + identifier + ")", state.count(identifier) == 0 });
 				printf("emitting: %d\n", emitting());
 			}
 			else if (directive == "ifdef")
 			{
 				t.skip_spaces(true);
 				auto identifier = t.identifier();
 				printf("#ifdef(%s)\n", identifier.c_str());
 				stack.push_back({ i, identifier, state.count(identifier) != 0 });
 				printf("emitting: %d\n", emitting());
 			}
 			else if (directive == "else")
 			{
 				if (stack.empty())
 					throw std::runtime_error("no matching #if for #else");
 				if (!stack.back().value)
 				{
 					stack.back().value = true;
 				}
 				printf("#else (%s)\n", stack.back().condition.c_str());
 				printf("emitting: %d\n", emitting());
 			}
 			else if (directive == "endif")
 			{
 				if (stack.empty())
 					throw std::runtime_error("no matching #if for #endif");
 				printf("#endif (%s)\n", stack.back().condition.c_str());
 				stack.pop_back();
 				printf("emitting: %d\n", emitting());
 			}
 			else if (directive == "define")
 			{
 				t.skip_spaces(true);
 				auto identifier = t.identifier();
 				if (t.peek() == '(')
 				{
 					t.consume();
 					t.skip_spaces();
 					std::vector<std::string> parameters;
 					while (true)
 					{
 						auto ch = t.peek();
 						if (ch == ')')
 							break;
 						if (ch == EOF)
 							throw std::runtime_error("expected ')', got EOF instead");
 						auto argument = t.identifier();
 						parameters.push_back(argument);
 						t.skip_spaces();
 						ch = t.peek();
 						if (ch == ')')
 							break;
 						else if (ch == ',')
 						{
 							t.consume();
 							t.skip_spaces();
 						}
 						else
 							throw std::runtime_error("expect ',' or ')' got something else (too lazy sry)");
 					}
 					t.consume();
 					t.skip_spaces();
 					auto token = t.remainder();
 					std::string pretty;
 					for (size_t i = 0; i < parameters.size(); i++)
 					{
 						if (i > 0)
 							pretty += ", ";
 						pretty += parameters[i];
 					}
 					printf("#define %s('%s' = '%s')\n", identifier.c_str(), pretty.c_str(), token.c_str());
 				}
 				else
 				{
 					t.skip_spaces();
 					auto token = t.remainder();
 					if (token.empty())
 					{
 						printf("#define(%s)\n", identifier.c_str());
 					}
 					else
 					{
 						printf("#define('%s' = '%s')\n", identifier.c_str(), token.c_str());
 					}
 					if (emitting())
 					{
 						state[identifier] = token;
 					}
 				}
 			}
 			else if (directive == "include")
 			{
 				t.skip_spaces();
 				auto type = t.peek();
 				if (type == '\"')
 				{
 					t.consume();
 					auto file = t.until('\"');
 					printf("#include \"%s\"\n", file.c_str());
 				}
 				else if (type == '<')
 				{
 					t.consume();
 					auto file = t.until('>');
 					printf("#include <%s>\n", file.c_str());
 				}
 				else
 				{
 					throw std::runtime_error("invalid syntax for #include");
 				}
 			}
 			else
 			{
 				printf("directive: '%s'\n", directive.c_str());
 				throw std::runtime_error("unknown directive '" + directive + "'");
 			}
 		}
 		else if (emitting())
 		{
 			final.push_back(line);
 		}
 	}
 	std::string result;
 	for (const auto& line : final)
 	{
 		result += line.text;
 		result += '\n';
 	}
 	// TODO: strip out comments
 	// TODO: somehow prevent replacing inside strings IsInsideString(position)
 	// TODO: recursively replace
 	// HACK: not proper
 	for (const auto& itr : state)
 	{
 		ReplaceAll(result, itr.first, itr.second);
 	}
 	return result;
 }
--- a/btparser/preprocessor.h
+++ b/btparser/preprocessor.h
@ -0,0 +1,6 @@
 #pragma once
 #include <string>
 #include <unordered_map>
 std::string preprocess(const std::string& input, std::string& error, const std::unordered_map<std::string, std::string>& definitions);