From b4fc237823a13cfa563475b555ffc3b4c8ea560b Mon Sep 17 00:00:00 2001 From: Duncan Ogilvie Date: Mon, 3 Jan 2022 05:20:09 +0100 Subject: [PATCH] Only use the new string detection algorithm in certain code pages --- src/dbg/_exports.cpp | 7 ++++ src/dbg/debugger.cpp | 1 + src/dbg/debugger.h | 1 + src/dbg/disasm_helper.cpp | 80 ++++++++++++++++++++++++--------------- 4 files changed, 58 insertions(+), 31 deletions(-) diff --git a/src/dbg/_exports.cpp b/src/dbg/_exports.cpp index 93654720..72acce37 100644 --- a/src/dbg/_exports.cpp +++ b/src/dbg/_exports.cpp @@ -1203,6 +1203,13 @@ extern "C" DLL_EXPORT duint _dbg_sendmessage(DBGMSG type, void* param1, void* pa maxSkipExceptionCount = setting; else BridgeSettingSetUint("Engine", "MaxSkipExceptionCount", maxSkipExceptionCount); + + duint newStringAlgorithm = 0; + if(!BridgeSettingGetUint("Engine", "NewStringAlgorithm", &newStringAlgorithm)) + { + auto acp = GetACP(); + newStringAlgorithm = acp == 932 || acp == 936 || acp == 949 || acp == 950 || acp == 951 || acp == 1251; + } } break; diff --git a/src/dbg/debugger.cpp b/src/dbg/debugger.cpp index 65e26cd6..8c73cfd9 100644 --- a/src/dbg/debugger.cpp +++ b/src/dbg/debugger.cpp @@ -87,6 +87,7 @@ bool bVerboseExceptionLogging = true; bool bNoWow64SingleStepWorkaround = false; bool bTraceBrowserNeedsUpdate = false; bool bForceLoadSymbols = false; +bool bNewStringAlgorithm = false; duint DbgEvents = 0; duint maxSkipExceptionCount = 0; HANDLE mProcHandle; diff --git a/src/dbg/debugger.h b/src/dbg/debugger.h index ca3a68ce..62822f01 100644 --- a/src/dbg/debugger.h +++ b/src/dbg/debugger.h @@ -149,6 +149,7 @@ extern bool bNoForegroundWindow; extern bool bVerboseExceptionLogging; extern bool bNoWow64SingleStepWorkaround; extern bool bForceLoadSymbols; +extern bool bNewStringAlgorithm; extern duint maxSkipExceptionCount; extern HANDLE mProcHandle; extern HANDLE mForegroundHandle; diff --git a/src/dbg/disasm_helper.cpp b/src/dbg/disasm_helper.cpp index 455d40c3..05235d81 100644 --- a/src/dbg/disasm_helper.cpp +++ b/src/dbg/disasm_helper.cpp @@ -308,41 +308,59 @@ bool isunicodestring(const WString & data) // These functions are exported so that plugins can use this to detect a string, or replace with a plugin-developed string dection algorithm through hooking extern "C" __declspec(dllexport) bool isasciistring(const unsigned char* data, int maxlen) { - int len = 0; - char* safebuffer = new char[maxlen]; - if(!safebuffer) - return false; - for(const char* p = (const char*)data; *p && len < maxlen - 1; len++, p++) + if(bNewStringAlgorithm) { - safebuffer[p - (const char*)data] = *p; - } + int len = 0; + char* safebuffer = new char[maxlen]; + if(!safebuffer) + return false; + for(const char* p = (const char*)data; *p && len < maxlen - 1; len++, p++) + { + safebuffer[p - (const char*)data] = *p; + } - if(len < 2) - { + if(len < 2) + { + delete[] safebuffer; + return false; + } + safebuffer[len] = 0; // Mark the end of string + if(len >= maxlen - 1 && (maxlen % 2) == 0 && (safebuffer[maxlen - 2] & 0x80)) + safebuffer[maxlen - 2] = 0; // Keep DBCS strings from being chopped in the middle + + String data2; + WString wdata2; + // Convert to and from Unicode + wdata2 = StringUtils::LocalCpToUtf16(safebuffer); delete[] safebuffer; - return false; + if(wdata2.size() < 2) + return false; + data2 = StringUtils::Utf16ToLocalCp(wdata2); + if(data2.size() < 2) + return false; + // Is the data exactly representable in both ANSI and Unicode? + if(memcmp(data2.c_str(), data, data2.size()) != 0) + return false; + // Filter out bad chars + if(!isunicodestring(wdata2)) + return false; + return true; + } + else + { + int len = 0; + for(const char* p = (const char*)data; *p; len++, p++) + { + if(len >= maxlen) + break; + } + if(len < 2 || len + 1 >= maxlen) + return false; + for(int i = 0; i < len; i++) + if(!isprint(data[i]) && !isspace(data[i])) + return false; + return true; } - safebuffer[len] = 0; // Mark the end of string - if(len >= maxlen - 1 && (maxlen % 2) == 0 && (safebuffer[maxlen - 2] & 0x80)) - safebuffer[maxlen - 2] = 0; // Keep DBCS strings from being chopped in the middle - - String data2; - WString wdata2; - // Convert to and from Unicode - wdata2 = StringUtils::LocalCpToUtf16(safebuffer); - delete[] safebuffer; - if(wdata2.size() < 2) - return false; - data2 = StringUtils::Utf16ToLocalCp(wdata2); - if(data2.size() < 2) - return false; - // Is the data exactly representable in both ANSI and Unicode? - if(memcmp(data2.c_str(), data, data2.size()) != 0) - return false; - // Filter out bad chars - if(!isunicodestring(wdata2)) - return false; - return true; } extern "C" __declspec(dllexport) bool isunicodestring(const unsigned char* data, int maxlen)