From 8cad0a496f475b0a5e03bd37f28eb5f5d3a75d9d Mon Sep 17 00:00:00 2001 From: Duncan Ogilvie Date: Fri, 4 Jul 2025 16:50:38 +0200 Subject: [PATCH] Fix crash with AVX-512 on 32-bit --- StaticEngine/TitanEngine.h | 4 +- TitanEngineEmulator/Emulator.h | 4 +- TitanEngineEmulator/TitanEngineEmulator.cpp | 293 ++++++++++---------- 3 files changed, 156 insertions(+), 145 deletions(-) diff --git a/StaticEngine/TitanEngine.h b/StaticEngine/TitanEngine.h index c03091a..0f3af03 100644 --- a/StaticEngine/TitanEngine.h +++ b/StaticEngine/TitanEngine.h @@ -601,8 +601,8 @@ typedef struct typedef struct { - YmmRegister_t Low; //AVX part - YmmRegister_t High; //AVX-512 part + YmmRegister_t Low; //AVX part + YmmRegister_t High; //AVX-512 part } ZmmRegister_t; typedef struct diff --git a/TitanEngineEmulator/Emulator.h b/TitanEngineEmulator/Emulator.h index fddbcca..23add79 100644 --- a/TitanEngineEmulator/Emulator.h +++ b/TitanEngineEmulator/Emulator.h @@ -1174,7 +1174,7 @@ private: //functions { auto p_RtlGetVersion = (NTSTATUS(WINAPI*)(PRTL_OSVERSIONINFOW))GetProcAddress(GetModuleHandleW(L"ntdll.dll"), "RtlGetVersion"); RTL_OSVERSIONINFOW info = { sizeof(info) }; - if (p_RtlGetVersion && p_RtlGetVersion(&info) == 0) + if(p_RtlGetVersion && p_RtlGetVersion(&info) == 0) return info.dwBuildNumber; else return 0; @@ -1184,7 +1184,7 @@ private: //functions { // https://www.vergiliusproject.com/kernels/x64/Windows%2010%20%7C%202016/1507%20Threshold%201/_KUSER_SHARED_DATA auto NtBuildNumber = *(DWORD*)(0x7FFE0000 + 0x260); - if (NtBuildNumber == 0) + if(NtBuildNumber == 0) { // Older versions of Windows static DWORD NtBuildNumber7 = GetNtBuildNumberWindows7(); diff --git a/TitanEngineEmulator/TitanEngineEmulator.cpp b/TitanEngineEmulator/TitanEngineEmulator.cpp index ad4de37..f15607f 100644 --- a/TitanEngineEmulator/TitanEngineEmulator.cpp +++ b/TitanEngineEmulator/TitanEngineEmulator.cpp @@ -194,191 +194,202 @@ __declspec(dllexport) void TITCALL Getx87FPURegisters(x87FPURegister_t x87FPUReg (1ui64 << (XSTATE_AVX512_ZMM))) #endif -static bool SetAVX512ContextFallbackToAVX(HANDLE hActiveThread, TITAN_ENGINE_CONTEXT_AVX512_t* titcontext) { - // Fall back to using AVX and ignore the rest - TITAN_ENGINE_CONTEXT_t Avx; - memset(&Avx, 0, sizeof(Avx)); - for (int i = 0; i < _countof(Avx.YmmRegisters); i++) { - Avx.YmmRegisters[i] = titcontext->ZmmRegisters[i].Low; - } - return SetAVXContext(hActiveThread, &Avx); +static bool SetAVX512ContextFallbackToAVX(HANDLE hActiveThread, TITAN_ENGINE_CONTEXT_AVX512_t* titcontext) +{ + // Fall back to using AVX and ignore the rest + TITAN_ENGINE_CONTEXT_t Avx; + memset(&Avx, 0, sizeof(Avx)); + for(int i = 0; i < _countof(Avx.YmmRegisters); i++) + { + Avx.YmmRegisters[i] = titcontext->ZmmRegisters[i].Low; + } + return SetAVXContext(hActiveThread, &Avx); } __declspec(dllexport) bool TITCALL SetAVX512Context(HANDLE hActiveThread, TITAN_ENGINE_CONTEXT_AVX512_t* titcontext) { - if (InitXState() == false) - return false; + if(InitXState() == false) + return false; - DWORD64 FeatureMask = _GetEnabledXStateFeatures(); - if ((FeatureMask & XSTATE_MASK_AVX512) == 0) - return SetAVX512ContextFallbackToAVX(hActiveThread, titcontext); + DWORD64 FeatureMask = _GetEnabledXStateFeatures(); + if((FeatureMask & XSTATE_MASK_AVX512) == 0) + return SetAVX512ContextFallbackToAVX(hActiveThread, titcontext); - DWORD ContextSize = 0; - BOOL Success = _InitializeContext(NULL, - CONTEXT_ALL | CONTEXT_XSTATE, - NULL, - &ContextSize); + DWORD ContextSize = 0; + BOOL Success = _InitializeContext(NULL, + CONTEXT_ALL | CONTEXT_XSTATE, + NULL, + &ContextSize); - if ((Success == TRUE) || (GetLastError() != ERROR_INSUFFICIENT_BUFFER)) - return false; + if((Success == TRUE) || (GetLastError() != ERROR_INSUFFICIENT_BUFFER)) + return false; std::vector dataBuffer(ContextSize); - PCONTEXT Context; - Success = _InitializeContext(dataBuffer.data(), - CONTEXT_ALL | CONTEXT_XSTATE, - &Context, - &ContextSize); - if (Success == FALSE) - return false; + PCONTEXT Context; + Success = _InitializeContext(dataBuffer.data(), + CONTEXT_ALL | CONTEXT_XSTATE, + &Context, + &ContextSize); + if(Success == FALSE) + return false; - if (_SetXStateFeaturesMask(Context, XSTATE_MASK_AVX | XSTATE_MASK_AVX512) == FALSE) - return SetAVX512ContextFallbackToAVX(hActiveThread, titcontext); + if(_SetXStateFeaturesMask(Context, XSTATE_MASK_AVX | XSTATE_MASK_AVX512) == FALSE) + return SetAVX512ContextFallbackToAVX(hActiveThread, titcontext); - if (GetThreadContext(hActiveThread, Context) == FALSE) - return false; + if(GetThreadContext(hActiveThread, Context) == FALSE) + return false; - if (_GetXStateFeaturesMask(Context, &FeatureMask) == FALSE) - return false; + if(_GetXStateFeaturesMask(Context, &FeatureMask) == FALSE) + return false; - DWORD FeatureLengthSse; - DWORD FeatureLengthAvx; - DWORD FeatureLengthAvx512_KMASK; - DWORD FeatureLengthAvx512_ZMM_H; - DWORD FeatureLengthAvx512_ZMM; - XmmRegister_t* Sse = (XmmRegister_t*)_LocateXStateFeature(Context, XSTATE_LEGACY_SSE, &FeatureLengthSse); - XmmRegister_t* Avx = (XmmRegister_t*)_LocateXStateFeature(Context, XSTATE_AVX, &FeatureLengthAvx); - ULONGLONG* Avx512_KMASK = (ULONGLONG*)_LocateXStateFeature(Context, XSTATE_AVX512_KMASK, &FeatureLengthAvx512_KMASK); - ZmmRegister_t* Avx512_ZMM = (ZmmRegister_t *)_LocateXStateFeature(Context, XSTATE_AVX512_ZMM, &FeatureLengthAvx512_ZMM); - YmmRegister_t* Avx512_ZMM_H = (YmmRegister_t *)_LocateXStateFeature(Context, XSTATE_AVX512_ZMM_H, &FeatureLengthAvx512_ZMM_H); + DWORD FeatureLengthSse; + DWORD FeatureLengthAvx; + DWORD FeatureLengthAvx512_KMASK; + DWORD FeatureLengthAvx512_ZMM_H; + DWORD FeatureLengthAvx512_ZMM; + XmmRegister_t* Sse = (XmmRegister_t*)_LocateXStateFeature(Context, XSTATE_LEGACY_SSE, &FeatureLengthSse); + XmmRegister_t* Avx = (XmmRegister_t*)_LocateXStateFeature(Context, XSTATE_AVX, &FeatureLengthAvx); + ULONGLONG* Avx512_KMASK = (ULONGLONG*)_LocateXStateFeature(Context, XSTATE_AVX512_KMASK, &FeatureLengthAvx512_KMASK); + ZmmRegister_t* Avx512_ZMM = (ZmmRegister_t*)_LocateXStateFeature(Context, XSTATE_AVX512_ZMM, &FeatureLengthAvx512_ZMM); + YmmRegister_t* Avx512_ZMM_H = (YmmRegister_t*)_LocateXStateFeature(Context, XSTATE_AVX512_ZMM_H, &FeatureLengthAvx512_ZMM_H); - if (Sse != NULL) //If the feature is unsupported by the processor it will return NULL - { - for (size_t i = 0; i < MIN(FeatureLengthSse / sizeof(XmmRegister_t), _countof(titcontext->ZmmRegisters)); i++) - Sse[i] = titcontext->ZmmRegisters[i].Low.Low; - } + if(Sse != NULL) //If the feature is unsupported by the processor it will return NULL + { + for(size_t i = 0; i < MIN(FeatureLengthSse / sizeof(XmmRegister_t), _countof(titcontext->ZmmRegisters)); i++) + Sse[i] = titcontext->ZmmRegisters[i].Low.Low; + } - if (Avx != NULL) //If the feature is unsupported by the processor it will return NULL - { - for (size_t i = 0; i < MIN(FeatureLengthAvx / sizeof(XmmRegister_t), _countof(titcontext->ZmmRegisters)); i++) - Avx[i] = titcontext->ZmmRegisters[i].Low.High; - } + if(Avx != NULL) //If the feature is unsupported by the processor it will return NULL + { + for(size_t i = 0; i < MIN(FeatureLengthAvx / sizeof(XmmRegister_t), _countof(titcontext->ZmmRegisters)); i++) + Avx[i] = titcontext->ZmmRegisters[i].Low.High; + } - if (Avx512_ZMM_H != NULL) //If the feature is unsupported by the processor it will return NULL - { - for (size_t i = 0; i < MIN(FeatureLengthAvx512_ZMM_H / sizeof(YmmRegister_t), _countof(titcontext->ZmmRegisters)); i++) - Avx512_ZMM_H[i] = titcontext->ZmmRegisters[i].High; - } + if(Avx512_ZMM_H != NULL) //If the feature is unsupported by the processor it will return NULL + { + for(size_t i = 0; i < MIN(FeatureLengthAvx512_ZMM_H / sizeof(YmmRegister_t), _countof(titcontext->ZmmRegisters)); i++) + Avx512_ZMM_H[i] = titcontext->ZmmRegisters[i].High; + } - if (Avx512_ZMM != NULL) //If the feature is unsupported by the processor it will return NULL - { - for (size_t i = 0; i < MIN(FeatureLengthAvx512_ZMM / sizeof(ZmmRegister_t), _countof(titcontext->ZmmRegisters) - FeatureLengthAvx / sizeof(XmmRegister_t)); i++) - Avx512_ZMM[i] = titcontext->ZmmRegisters[i + FeatureLengthAvx / sizeof(XmmRegister_t)]; - } +#ifdef _WIN64 + if(Avx512_ZMM != NULL) //If the feature is unsupported by the processor it will return NULL + { + for(size_t i = 0; i < MIN(FeatureLengthAvx512_ZMM / sizeof(ZmmRegister_t), _countof(titcontext->ZmmRegisters) - FeatureLengthAvx / sizeof(XmmRegister_t)); i++) + Avx512_ZMM[i] = titcontext->ZmmRegisters[i + FeatureLengthAvx / sizeof(XmmRegister_t)]; + } +#endif // _WIN64 - if (Avx512_KMASK != NULL) //If the feature is unsupported by the processor it will return NULL - { - for (size_t i = 0; i < MIN(FeatureLengthAvx512_KMASK / sizeof(ULONGLONG), _countof(titcontext->Opmask)); i++) - Avx512_KMASK[i] = titcontext->Opmask[i]; - } + if(Avx512_KMASK != NULL) //If the feature is unsupported by the processor it will return NULL + { + for(size_t i = 0; i < MIN(FeatureLengthAvx512_KMASK / sizeof(ULONGLONG), _countof(titcontext->Opmask)); i++) + Avx512_KMASK[i] = titcontext->Opmask[i]; + } - return (SetThreadContext(hActiveThread, Context) == TRUE); + return (SetThreadContext(hActiveThread, Context) == TRUE); } static bool GetAVX512ContextFallbackToAVX(HANDLE hActiveThread, TITAN_ENGINE_CONTEXT_AVX512_t* titcontext) { - // Fall back to using AVX and fill the rest with 0 - TITAN_ENGINE_CONTEXT_t Avx; - memset(titcontext, 0, sizeof(*titcontext)); - if (GetAVXContext(hActiveThread, &Avx)) { - for (int i = 0; i < _countof(Avx.YmmRegisters); i++) - titcontext->ZmmRegisters[i].Low = Avx.YmmRegisters[i]; - return true; - } - else { - return false; - } + // Fall back to using AVX and fill the rest with 0 + TITAN_ENGINE_CONTEXT_t Avx; + memset(titcontext, 0, sizeof(*titcontext)); + if(GetAVXContext(hActiveThread, &Avx)) + { + for(int i = 0; i < _countof(Avx.YmmRegisters); i++) + titcontext->ZmmRegisters[i].Low = Avx.YmmRegisters[i]; + return true; + } + else + { + return false; + } } __declspec(dllexport) bool TITCALL GetAVX512Context(HANDLE hActiveThread, TITAN_ENGINE_CONTEXT_AVX512_t* titcontext) { - if (InitXState() == false) - return false; + if(InitXState() == false) + return false; - DWORD64 FeatureMask = _GetEnabledXStateFeatures(); - if ((FeatureMask & XSTATE_MASK_AVX512) == 0) //XSTATE_MASK_AVX512 - return GetAVX512ContextFallbackToAVX(hActiveThread, titcontext); + DWORD64 FeatureMask = _GetEnabledXStateFeatures(); + if((FeatureMask & XSTATE_MASK_AVX512) == 0) //XSTATE_MASK_AVX512 + return GetAVX512ContextFallbackToAVX(hActiveThread, titcontext); - DWORD ContextSize = 0; - BOOL Success = _InitializeContext(NULL, - CONTEXT_ALL | CONTEXT_XSTATE, - NULL, - &ContextSize); + DWORD ContextSize = 0; + BOOL Success = _InitializeContext(NULL, + CONTEXT_ALL | CONTEXT_XSTATE, + NULL, + &ContextSize); - if ((Success == TRUE) || (GetLastError() != ERROR_INSUFFICIENT_BUFFER)) - return false; + if((Success == TRUE) || (GetLastError() != ERROR_INSUFFICIENT_BUFFER)) + return false; std::vector dataBuffer(ContextSize); - PCONTEXT Context; - Success = _InitializeContext(dataBuffer.data(), - CONTEXT_ALL | CONTEXT_XSTATE, - &Context, - &ContextSize); - if (Success == FALSE) - return false; + PCONTEXT Context; + Success = _InitializeContext(dataBuffer.data(), + CONTEXT_ALL | CONTEXT_XSTATE, + &Context, + &ContextSize); + if(Success == FALSE) + return false; - if (_SetXStateFeaturesMask(Context, XSTATE_MASK_AVX | XSTATE_MASK_AVX512) == FALSE) - return GetAVX512ContextFallbackToAVX(hActiveThread, titcontext); + if(_SetXStateFeaturesMask(Context, XSTATE_MASK_AVX | XSTATE_MASK_AVX512) == FALSE) + return GetAVX512ContextFallbackToAVX(hActiveThread, titcontext); - if (GetThreadContext(hActiveThread, Context) == FALSE) - return false; + if(GetThreadContext(hActiveThread, Context) == FALSE) + return false; - if (_GetXStateFeaturesMask(Context, &FeatureMask) == FALSE) - return false; + if(_GetXStateFeaturesMask(Context, &FeatureMask) == FALSE) + return false; - DWORD FeatureLengthSse; - DWORD FeatureLengthAvx; - DWORD FeatureLengthAvx512_KMASK; - DWORD FeatureLengthAvx512_ZMM_H; - DWORD FeatureLengthAvx512_ZMM; - XmmRegister_t* Sse = (XmmRegister_t*)_LocateXStateFeature(Context, XSTATE_LEGACY_SSE, &FeatureLengthSse); - XmmRegister_t* Avx = (XmmRegister_t*)_LocateXStateFeature(Context, XSTATE_AVX, &FeatureLengthAvx); - ULONGLONG* Avx512_KMASK = (ULONGLONG*)_LocateXStateFeature(Context, XSTATE_AVX512_KMASK, &FeatureLengthAvx512_KMASK); - ZmmRegister_t* Avx512_ZMM = (ZmmRegister_t *)_LocateXStateFeature(Context, XSTATE_AVX512_ZMM, &FeatureLengthAvx512_ZMM); - YmmRegister_t* Avx512_ZMM_H = (YmmRegister_t *)_LocateXStateFeature(Context, XSTATE_AVX512_ZMM_H, &FeatureLengthAvx512_ZMM_H); + // References: + // - https://github.com/rnpnr/raddebugger/blob/14860ad71da7d5cce7106180bd4e3feefd30e5d0/src/demon/win32/demon_core_win32.c#L826 + // - https://github.com/jdpatdiscord/ExceptionHandler/blob/f845854fcbe9ee48f141260e81f39eca37db5e26/ExceptionHandler.cpp#L382 + DWORD FeatureLengthSse; + DWORD FeatureLengthAvx; + DWORD FeatureLengthAvx512_KMASK; + DWORD FeatureLengthAvx512_ZMM_H; + DWORD FeatureLengthAvx512_ZMM; + XmmRegister_t* Sse = (XmmRegister_t*)_LocateXStateFeature(Context, XSTATE_LEGACY_SSE, &FeatureLengthSse); + XmmRegister_t* Avx = (XmmRegister_t*)_LocateXStateFeature(Context, XSTATE_AVX, &FeatureLengthAvx); + ULONGLONG* Avx512_KMASK = (ULONGLONG*)_LocateXStateFeature(Context, XSTATE_AVX512_KMASK, &FeatureLengthAvx512_KMASK); + ZmmRegister_t* Avx512_ZMM = (ZmmRegister_t*)_LocateXStateFeature(Context, XSTATE_AVX512_ZMM, &FeatureLengthAvx512_ZMM); + YmmRegister_t* Avx512_ZMM_H = (YmmRegister_t*)_LocateXStateFeature(Context, XSTATE_AVX512_ZMM_H, &FeatureLengthAvx512_ZMM_H); - if (Sse != NULL) //If the feature is unsupported by the processor it will return NULL - { - for (size_t i = 0; i < MIN(FeatureLengthSse / sizeof(XmmRegister_t), _countof(titcontext->ZmmRegisters)); i++) - titcontext->ZmmRegisters[i].Low.Low = Sse[i]; - } + if(Sse != NULL) //If the feature is unsupported by the processor it will return NULL + { + for(size_t i = 0; i < MIN(FeatureLengthSse / sizeof(XmmRegister_t), _countof(titcontext->ZmmRegisters)); i++) + titcontext->ZmmRegisters[i].Low.Low = Sse[i]; + } - if (Avx != NULL) //If the feature is unsupported by the processor it will return NULL - { - for (size_t i = 0; i < MIN(FeatureLengthAvx / sizeof(XmmRegister_t), _countof(titcontext->ZmmRegisters)); i++) - titcontext->ZmmRegisters[i].Low.High = Avx[i]; - } + if(Avx != NULL) //If the feature is unsupported by the processor it will return NULL + { + for(size_t i = 0; i < MIN(FeatureLengthAvx / sizeof(XmmRegister_t), _countof(titcontext->ZmmRegisters)); i++) + titcontext->ZmmRegisters[i].Low.High = Avx[i]; + } - if (Avx512_ZMM_H != NULL) //If the feature is unsupported by the processor it will return NULL - { - for (size_t i = 0; i < MIN(FeatureLengthAvx512_ZMM_H / sizeof(YmmRegister_t), _countof(titcontext->ZmmRegisters)); i++) - titcontext->ZmmRegisters[i].High = Avx512_ZMM_H[i]; - } + if(Avx512_ZMM_H != NULL) //If the feature is unsupported by the processor it will return NULL + { + for(size_t i = 0; i < MIN(FeatureLengthAvx512_ZMM_H / sizeof(YmmRegister_t), _countof(titcontext->ZmmRegisters)); i++) + titcontext->ZmmRegisters[i].High = Avx512_ZMM_H[i]; + } - if (Avx512_ZMM != NULL) //If the feature is unsupported by the processor it will return NULL - { - for (size_t i = 0; i < MIN(FeatureLengthAvx512_ZMM / sizeof(ZmmRegister_t), _countof(titcontext->ZmmRegisters) - FeatureLengthAvx / sizeof(XmmRegister_t)); i++) - titcontext->ZmmRegisters[i + FeatureLengthAvx / sizeof(XmmRegister_t)] = Avx512_ZMM[i]; - } +#ifdef _WIN64 + if(Avx512_ZMM != NULL) //If the feature is unsupported by the processor it will return NULL + { + for(size_t i = 0; i < MIN(FeatureLengthAvx512_ZMM / sizeof(ZmmRegister_t), _countof(titcontext->ZmmRegisters) - FeatureLengthAvx / sizeof(XmmRegister_t)); i++) + titcontext->ZmmRegisters[i + FeatureLengthAvx / sizeof(XmmRegister_t)] = Avx512_ZMM[i]; + } +#endif // _WIN64 - if (Avx512_KMASK != NULL) //If the feature is unsupported by the processor it will return NULL - { - for (size_t i = 0; i < MIN(FeatureLengthAvx512_KMASK / sizeof(ULONGLONG), _countof(titcontext->Opmask)); i++) - titcontext->Opmask[i] = Avx512_KMASK[i]; - } + if(Avx512_KMASK != NULL) //If the feature is unsupported by the processor it will return NULL + { + for(size_t i = 0; i < MIN(FeatureLengthAvx512_KMASK / sizeof(ULONGLONG), _countof(titcontext->Opmask)); i++) + titcontext->Opmask[i] = Avx512_KMASK[i]; + } - return true; + return true; } //PE