everything lexes without errors (yay!)

This commit is contained in:
mrexodia 2016-06-05 03:20:55 +02:00
parent 527c35b671
commit d63737fd2d
No known key found for this signature in database
GPG Key ID: D72F9A4FAA0073B4
2 changed files with 54 additions and 14 deletions

View File

@ -4,6 +4,8 @@
#include <stdint.h> #include <stdint.h>
#include <unordered_map> #include <unordered_map>
#include <functional> #include <functional>
#include <vector>
#include <tuple>
#include "filehelper.h" #include "filehelper.h"
#include "stringutils.h" #include "stringutils.h"
#include "testfiles.h" #include "testfiles.h"
@ -34,10 +36,11 @@ struct Lexer
tok_stringlit //"([^\\"\r\n]|\\[\\"abfnrtv])*" tok_stringlit //"([^\\"\r\n]|\\[\\"abfnrtv])*"
}; };
string Input; vector<uint8_t> Input;
string ConsumedInput; string ConsumedInput;
size_t Index = 0; size_t Index = 0;
string Error; string Error;
vector<String> Warnings;
//lexer state //lexer state
string IdentifierStr; string IdentifierStr;
@ -77,6 +80,11 @@ struct Lexer
return tok_error; return tok_error;
} }
void ReportWarning(const String & warning)
{
Warnings.push_back(warning);
}
String TokString(int tok) String TokString(int tok)
{ {
switch (Token(tok)) switch (Token(tok))
@ -104,17 +112,29 @@ struct Lexer
int PeekChar(int distance = 0) int PeekChar(int distance = 0)
{ {
if (Index + distance >= Input.length()) if (Index + distance >= Input.size())
return EOF; return EOF;
return Input[Index + distance]; auto ch = Input[Index + distance];
if (ch == '\0')
{
ReportWarning(StringUtils::sprintf("\\0 character in file data"));
return PeekChar(distance + 1);
}
return ch;
} }
int ReadChar() int ReadChar()
{ {
if (Index == Input.length()) if (Index == Input.size())
return EOF; return EOF;
ConsumedInput += Input[Index]; auto ch = Input[Index++];
return uint8_t(Input[Index++]); //do not sign-extend to support UTF-8 if (ch == '\0')
{
ReportWarning(StringUtils::sprintf("\\0 character in file data"));
return ReadChar();
}
ConsumedInput += ch;
return ch;
} }
bool CheckString(const string & expected) bool CheckString(const string & expected)
@ -150,7 +170,7 @@ struct Lexer
{ {
NextChar(); NextChar();
if (LastChar == EOF) //end of file if (LastChar == EOF) //end of file
return ReportError("unexpected end of file in string literal"); return ReportError("unexpected end of file in string literal (1)");
if (LastChar == '\"') //end of string literal if (LastChar == '\"') //end of string literal
{ {
NextChar(); NextChar();
@ -160,8 +180,8 @@ struct Lexer
{ {
NextChar(); NextChar();
if (LastChar == EOF) if (LastChar == EOF)
return ReportError("unexpected end of file in string literal"); return ReportError("unexpected end of file in string literal (2)");
if (LastChar == '\\' || LastChar == '\"') if (LastChar == '\'' || LastChar == '\"' || LastChar == '?' || LastChar == '\\')
LastChar = LastChar; LastChar = LastChar;
else if (LastChar == 'a') else if (LastChar == 'a')
LastChar = '\a'; LastChar = '\a';
@ -178,7 +198,24 @@ struct Lexer
else if (LastChar == 'v') else if (LastChar == 'v')
LastChar = '\v'; LastChar = '\v';
else if (LastChar == '0') else if (LastChar == '0')
LastChar = '\0'; LastChar = '\1'; //TODO: handle this properly (vector<uint8_t>)
else if (LastChar == 'x') //\xHH
{
auto ch1 = NextChar();
auto ch2 = NextChar();
if (isxdigit(ch1) && isxdigit(ch2))
{
char byteStr[3] = "";
byteStr[0] = ch1;
byteStr[1] = ch2;
unsigned int hexData;
if (sscanf_s(byteStr, "%X", &hexData) != 1)
return ReportError(StringUtils::sprintf("sscanf_s failed for hex sequence \"\\x%c%c\" in string literal", ch1, ch2));
LastChar = hexData & 0xFF; //TODO: handle this properly (vector<uint8_t>)
}
else
return ReportError(StringUtils::sprintf("invalid hex sequence \"\\x%c%c\" in string literal", ch1, ch2));
}
else else
return ReportError(StringUtils::sprintf("invalid escape sequence \"\\%c\" in string literal", LastChar)); return ReportError(StringUtils::sprintf("invalid escape sequence \"\\%c\" in string literal", LastChar));
} }
@ -263,7 +300,7 @@ struct Lexer
bool ReadInputFile(const string & filename) bool ReadInputFile(const string & filename)
{ {
ResetLexerState(); ResetLexerState();
return FileHelper::ReadAllText(filename, Input); return FileHelper::ReadAllData(filename, Input);
} }
bool TestLex(function<void(const string & line)> lexEnum) bool TestLex(function<void(const string & line)> lexEnum)
@ -276,6 +313,8 @@ struct Lexer
} while (tok != tok_eof && tok != tok_error); } while (tok != tok_eof && tok != tok_error);
if (tok != tok_error && tok != tok_eof) if (tok != tok_error && tok != tok_eof)
tok = ReportError("lexer did not finish at the end of the file"); tok = ReportError("lexer did not finish at the end of the file");
for (const auto & warning : Warnings)
lexEnum("Warning: " + warning);
return tok != tok_error; return tok != tok_error;
} }
}; };
@ -339,7 +378,7 @@ bool DebugLexer(const string & filename)
int main() int main()
{ {
DebugLexer(testFiles[19]); DebugLexer(testFiles[82]);
RunLexerTests(); RunLexerTests();
system("pause"); system("pause");
return 0; return 0;

View File

@ -3,6 +3,7 @@
#include "memory.h" #include "memory.h"
#include "dynamicmem.h" #include "dynamicmem.h"
#include <windows.h> #include <windows.h>
#include <cstdint>
StringList StringUtils::Split(const String & s, char delim, std::vector<String> & elems) StringList StringUtils::Split(const String & s, char delim, std::vector<String> & elems)
{ {
@ -29,7 +30,7 @@ String StringUtils::Escape(const String & s)
String escaped = ""; String escaped = "";
for(size_t i = 0; i < s.length(); i++) for(size_t i = 0; i < s.length(); i++)
{ {
char ch = s[i]; auto ch = uint8_t(s[i]);
switch(ch) switch(ch)
{ {
case '\t': case '\t':
@ -57,7 +58,7 @@ String StringUtils::Escape(const String & s)
if(!isprint(ch)) //unknown unprintable character if(!isprint(ch)) //unknown unprintable character
{ {
char buf[16] = ""; char buf[16] = "";
sprintf_s(buf, "\\x%.2X", (unsigned char)ch); sprintf_s(buf, "\\x%02X", ch);
escaped += buf; escaped += buf;
} }
else else