Fix crash with AVX-512 on 32-bit

This commit is contained in:
Duncan Ogilvie 2025-07-04 16:50:38 +02:00
parent 20d8c4a6eb
commit 8cad0a496f
3 changed files with 156 additions and 145 deletions

View File

@ -1174,7 +1174,7 @@ private: //functions
{ {
auto p_RtlGetVersion = (NTSTATUS(WINAPI*)(PRTL_OSVERSIONINFOW))GetProcAddress(GetModuleHandleW(L"ntdll.dll"), "RtlGetVersion"); auto p_RtlGetVersion = (NTSTATUS(WINAPI*)(PRTL_OSVERSIONINFOW))GetProcAddress(GetModuleHandleW(L"ntdll.dll"), "RtlGetVersion");
RTL_OSVERSIONINFOW info = { sizeof(info) }; RTL_OSVERSIONINFOW info = { sizeof(info) };
if (p_RtlGetVersion && p_RtlGetVersion(&info) == 0) if(p_RtlGetVersion && p_RtlGetVersion(&info) == 0)
return info.dwBuildNumber; return info.dwBuildNumber;
else else
return 0; return 0;
@ -1184,7 +1184,7 @@ private: //functions
{ {
// https://www.vergiliusproject.com/kernels/x64/Windows%2010%20%7C%202016/1507%20Threshold%201/_KUSER_SHARED_DATA // https://www.vergiliusproject.com/kernels/x64/Windows%2010%20%7C%202016/1507%20Threshold%201/_KUSER_SHARED_DATA
auto NtBuildNumber = *(DWORD*)(0x7FFE0000 + 0x260); auto NtBuildNumber = *(DWORD*)(0x7FFE0000 + 0x260);
if (NtBuildNumber == 0) if(NtBuildNumber == 0)
{ {
// Older versions of Windows // Older versions of Windows
static DWORD NtBuildNumber7 = GetNtBuildNumberWindows7(); static DWORD NtBuildNumber7 = GetNtBuildNumberWindows7();

View File

@ -194,11 +194,13 @@ __declspec(dllexport) void TITCALL Getx87FPURegisters(x87FPURegister_t x87FPUReg
(1ui64 << (XSTATE_AVX512_ZMM))) (1ui64 << (XSTATE_AVX512_ZMM)))
#endif #endif
static bool SetAVX512ContextFallbackToAVX(HANDLE hActiveThread, TITAN_ENGINE_CONTEXT_AVX512_t* titcontext) { static bool SetAVX512ContextFallbackToAVX(HANDLE hActiveThread, TITAN_ENGINE_CONTEXT_AVX512_t* titcontext)
{
// Fall back to using AVX and ignore the rest // Fall back to using AVX and ignore the rest
TITAN_ENGINE_CONTEXT_t Avx; TITAN_ENGINE_CONTEXT_t Avx;
memset(&Avx, 0, sizeof(Avx)); memset(&Avx, 0, sizeof(Avx));
for (int i = 0; i < _countof(Avx.YmmRegisters); i++) { for(int i = 0; i < _countof(Avx.YmmRegisters); i++)
{
Avx.YmmRegisters[i] = titcontext->ZmmRegisters[i].Low; Avx.YmmRegisters[i] = titcontext->ZmmRegisters[i].Low;
} }
return SetAVXContext(hActiveThread, &Avx); return SetAVXContext(hActiveThread, &Avx);
@ -206,11 +208,11 @@ static bool SetAVX512ContextFallbackToAVX(HANDLE hActiveThread, TITAN_ENGINE_CON
__declspec(dllexport) bool TITCALL SetAVX512Context(HANDLE hActiveThread, TITAN_ENGINE_CONTEXT_AVX512_t* titcontext) __declspec(dllexport) bool TITCALL SetAVX512Context(HANDLE hActiveThread, TITAN_ENGINE_CONTEXT_AVX512_t* titcontext)
{ {
if (InitXState() == false) if(InitXState() == false)
return false; return false;
DWORD64 FeatureMask = _GetEnabledXStateFeatures(); DWORD64 FeatureMask = _GetEnabledXStateFeatures();
if ((FeatureMask & XSTATE_MASK_AVX512) == 0) if((FeatureMask & XSTATE_MASK_AVX512) == 0)
return SetAVX512ContextFallbackToAVX(hActiveThread, titcontext); return SetAVX512ContextFallbackToAVX(hActiveThread, titcontext);
DWORD ContextSize = 0; DWORD ContextSize = 0;
@ -219,7 +221,7 @@ __declspec(dllexport) bool TITCALL SetAVX512Context(HANDLE hActiveThread, TITAN_
NULL, NULL,
&ContextSize); &ContextSize);
if ((Success == TRUE) || (GetLastError() != ERROR_INSUFFICIENT_BUFFER)) if((Success == TRUE) || (GetLastError() != ERROR_INSUFFICIENT_BUFFER))
return false; return false;
std::vector<uint8_t> dataBuffer(ContextSize); std::vector<uint8_t> dataBuffer(ContextSize);
@ -229,16 +231,16 @@ __declspec(dllexport) bool TITCALL SetAVX512Context(HANDLE hActiveThread, TITAN_
CONTEXT_ALL | CONTEXT_XSTATE, CONTEXT_ALL | CONTEXT_XSTATE,
&Context, &Context,
&ContextSize); &ContextSize);
if (Success == FALSE) if(Success == FALSE)
return false; return false;
if (_SetXStateFeaturesMask(Context, XSTATE_MASK_AVX | XSTATE_MASK_AVX512) == FALSE) if(_SetXStateFeaturesMask(Context, XSTATE_MASK_AVX | XSTATE_MASK_AVX512) == FALSE)
return SetAVX512ContextFallbackToAVX(hActiveThread, titcontext); return SetAVX512ContextFallbackToAVX(hActiveThread, titcontext);
if (GetThreadContext(hActiveThread, Context) == FALSE) if(GetThreadContext(hActiveThread, Context) == FALSE)
return false; return false;
if (_GetXStateFeaturesMask(Context, &FeatureMask) == FALSE) if(_GetXStateFeaturesMask(Context, &FeatureMask) == FALSE)
return false; return false;
DWORD FeatureLengthSse; DWORD FeatureLengthSse;
@ -249,36 +251,38 @@ __declspec(dllexport) bool TITCALL SetAVX512Context(HANDLE hActiveThread, TITAN_
XmmRegister_t* Sse = (XmmRegister_t*)_LocateXStateFeature(Context, XSTATE_LEGACY_SSE, &FeatureLengthSse); XmmRegister_t* Sse = (XmmRegister_t*)_LocateXStateFeature(Context, XSTATE_LEGACY_SSE, &FeatureLengthSse);
XmmRegister_t* Avx = (XmmRegister_t*)_LocateXStateFeature(Context, XSTATE_AVX, &FeatureLengthAvx); XmmRegister_t* Avx = (XmmRegister_t*)_LocateXStateFeature(Context, XSTATE_AVX, &FeatureLengthAvx);
ULONGLONG* Avx512_KMASK = (ULONGLONG*)_LocateXStateFeature(Context, XSTATE_AVX512_KMASK, &FeatureLengthAvx512_KMASK); ULONGLONG* Avx512_KMASK = (ULONGLONG*)_LocateXStateFeature(Context, XSTATE_AVX512_KMASK, &FeatureLengthAvx512_KMASK);
ZmmRegister_t* Avx512_ZMM = (ZmmRegister_t *)_LocateXStateFeature(Context, XSTATE_AVX512_ZMM, &FeatureLengthAvx512_ZMM); ZmmRegister_t* Avx512_ZMM = (ZmmRegister_t*)_LocateXStateFeature(Context, XSTATE_AVX512_ZMM, &FeatureLengthAvx512_ZMM);
YmmRegister_t* Avx512_ZMM_H = (YmmRegister_t *)_LocateXStateFeature(Context, XSTATE_AVX512_ZMM_H, &FeatureLengthAvx512_ZMM_H); YmmRegister_t* Avx512_ZMM_H = (YmmRegister_t*)_LocateXStateFeature(Context, XSTATE_AVX512_ZMM_H, &FeatureLengthAvx512_ZMM_H);
if (Sse != NULL) //If the feature is unsupported by the processor it will return NULL if(Sse != NULL) //If the feature is unsupported by the processor it will return NULL
{ {
for (size_t i = 0; i < MIN(FeatureLengthSse / sizeof(XmmRegister_t), _countof(titcontext->ZmmRegisters)); i++) for(size_t i = 0; i < MIN(FeatureLengthSse / sizeof(XmmRegister_t), _countof(titcontext->ZmmRegisters)); i++)
Sse[i] = titcontext->ZmmRegisters[i].Low.Low; Sse[i] = titcontext->ZmmRegisters[i].Low.Low;
} }
if (Avx != NULL) //If the feature is unsupported by the processor it will return NULL if(Avx != NULL) //If the feature is unsupported by the processor it will return NULL
{ {
for (size_t i = 0; i < MIN(FeatureLengthAvx / sizeof(XmmRegister_t), _countof(titcontext->ZmmRegisters)); i++) for(size_t i = 0; i < MIN(FeatureLengthAvx / sizeof(XmmRegister_t), _countof(titcontext->ZmmRegisters)); i++)
Avx[i] = titcontext->ZmmRegisters[i].Low.High; Avx[i] = titcontext->ZmmRegisters[i].Low.High;
} }
if (Avx512_ZMM_H != NULL) //If the feature is unsupported by the processor it will return NULL if(Avx512_ZMM_H != NULL) //If the feature is unsupported by the processor it will return NULL
{ {
for (size_t i = 0; i < MIN(FeatureLengthAvx512_ZMM_H / sizeof(YmmRegister_t), _countof(titcontext->ZmmRegisters)); i++) for(size_t i = 0; i < MIN(FeatureLengthAvx512_ZMM_H / sizeof(YmmRegister_t), _countof(titcontext->ZmmRegisters)); i++)
Avx512_ZMM_H[i] = titcontext->ZmmRegisters[i].High; Avx512_ZMM_H[i] = titcontext->ZmmRegisters[i].High;
} }
if (Avx512_ZMM != NULL) //If the feature is unsupported by the processor it will return NULL #ifdef _WIN64
if(Avx512_ZMM != NULL) //If the feature is unsupported by the processor it will return NULL
{ {
for (size_t i = 0; i < MIN(FeatureLengthAvx512_ZMM / sizeof(ZmmRegister_t), _countof(titcontext->ZmmRegisters) - FeatureLengthAvx / sizeof(XmmRegister_t)); i++) for(size_t i = 0; i < MIN(FeatureLengthAvx512_ZMM / sizeof(ZmmRegister_t), _countof(titcontext->ZmmRegisters) - FeatureLengthAvx / sizeof(XmmRegister_t)); i++)
Avx512_ZMM[i] = titcontext->ZmmRegisters[i + FeatureLengthAvx / sizeof(XmmRegister_t)]; Avx512_ZMM[i] = titcontext->ZmmRegisters[i + FeatureLengthAvx / sizeof(XmmRegister_t)];
} }
#endif // _WIN64
if (Avx512_KMASK != NULL) //If the feature is unsupported by the processor it will return NULL if(Avx512_KMASK != NULL) //If the feature is unsupported by the processor it will return NULL
{ {
for (size_t i = 0; i < MIN(FeatureLengthAvx512_KMASK / sizeof(ULONGLONG), _countof(titcontext->Opmask)); i++) for(size_t i = 0; i < MIN(FeatureLengthAvx512_KMASK / sizeof(ULONGLONG), _countof(titcontext->Opmask)); i++)
Avx512_KMASK[i] = titcontext->Opmask[i]; Avx512_KMASK[i] = titcontext->Opmask[i];
} }
@ -290,23 +294,25 @@ static bool GetAVX512ContextFallbackToAVX(HANDLE hActiveThread, TITAN_ENGINE_CON
// Fall back to using AVX and fill the rest with 0 // Fall back to using AVX and fill the rest with 0
TITAN_ENGINE_CONTEXT_t Avx; TITAN_ENGINE_CONTEXT_t Avx;
memset(titcontext, 0, sizeof(*titcontext)); memset(titcontext, 0, sizeof(*titcontext));
if (GetAVXContext(hActiveThread, &Avx)) { if(GetAVXContext(hActiveThread, &Avx))
for (int i = 0; i < _countof(Avx.YmmRegisters); i++) {
for(int i = 0; i < _countof(Avx.YmmRegisters); i++)
titcontext->ZmmRegisters[i].Low = Avx.YmmRegisters[i]; titcontext->ZmmRegisters[i].Low = Avx.YmmRegisters[i];
return true; return true;
} }
else { else
{
return false; return false;
} }
} }
__declspec(dllexport) bool TITCALL GetAVX512Context(HANDLE hActiveThread, TITAN_ENGINE_CONTEXT_AVX512_t* titcontext) __declspec(dllexport) bool TITCALL GetAVX512Context(HANDLE hActiveThread, TITAN_ENGINE_CONTEXT_AVX512_t* titcontext)
{ {
if (InitXState() == false) if(InitXState() == false)
return false; return false;
DWORD64 FeatureMask = _GetEnabledXStateFeatures(); DWORD64 FeatureMask = _GetEnabledXStateFeatures();
if ((FeatureMask & XSTATE_MASK_AVX512) == 0) //XSTATE_MASK_AVX512 if((FeatureMask & XSTATE_MASK_AVX512) == 0) //XSTATE_MASK_AVX512
return GetAVX512ContextFallbackToAVX(hActiveThread, titcontext); return GetAVX512ContextFallbackToAVX(hActiveThread, titcontext);
DWORD ContextSize = 0; DWORD ContextSize = 0;
@ -315,7 +321,7 @@ __declspec(dllexport) bool TITCALL GetAVX512Context(HANDLE hActiveThread, TITAN_
NULL, NULL,
&ContextSize); &ContextSize);
if ((Success == TRUE) || (GetLastError() != ERROR_INSUFFICIENT_BUFFER)) if((Success == TRUE) || (GetLastError() != ERROR_INSUFFICIENT_BUFFER))
return false; return false;
std::vector<uint8_t> dataBuffer(ContextSize); std::vector<uint8_t> dataBuffer(ContextSize);
@ -325,18 +331,21 @@ __declspec(dllexport) bool TITCALL GetAVX512Context(HANDLE hActiveThread, TITAN_
CONTEXT_ALL | CONTEXT_XSTATE, CONTEXT_ALL | CONTEXT_XSTATE,
&Context, &Context,
&ContextSize); &ContextSize);
if (Success == FALSE) if(Success == FALSE)
return false; return false;
if (_SetXStateFeaturesMask(Context, XSTATE_MASK_AVX | XSTATE_MASK_AVX512) == FALSE) if(_SetXStateFeaturesMask(Context, XSTATE_MASK_AVX | XSTATE_MASK_AVX512) == FALSE)
return GetAVX512ContextFallbackToAVX(hActiveThread, titcontext); return GetAVX512ContextFallbackToAVX(hActiveThread, titcontext);
if (GetThreadContext(hActiveThread, Context) == FALSE) if(GetThreadContext(hActiveThread, Context) == FALSE)
return false; return false;
if (_GetXStateFeaturesMask(Context, &FeatureMask) == FALSE) if(_GetXStateFeaturesMask(Context, &FeatureMask) == FALSE)
return false; return false;
// References:
// - https://github.com/rnpnr/raddebugger/blob/14860ad71da7d5cce7106180bd4e3feefd30e5d0/src/demon/win32/demon_core_win32.c#L826
// - https://github.com/jdpatdiscord/ExceptionHandler/blob/f845854fcbe9ee48f141260e81f39eca37db5e26/ExceptionHandler.cpp#L382
DWORD FeatureLengthSse; DWORD FeatureLengthSse;
DWORD FeatureLengthAvx; DWORD FeatureLengthAvx;
DWORD FeatureLengthAvx512_KMASK; DWORD FeatureLengthAvx512_KMASK;
@ -345,36 +354,38 @@ __declspec(dllexport) bool TITCALL GetAVX512Context(HANDLE hActiveThread, TITAN_
XmmRegister_t* Sse = (XmmRegister_t*)_LocateXStateFeature(Context, XSTATE_LEGACY_SSE, &FeatureLengthSse); XmmRegister_t* Sse = (XmmRegister_t*)_LocateXStateFeature(Context, XSTATE_LEGACY_SSE, &FeatureLengthSse);
XmmRegister_t* Avx = (XmmRegister_t*)_LocateXStateFeature(Context, XSTATE_AVX, &FeatureLengthAvx); XmmRegister_t* Avx = (XmmRegister_t*)_LocateXStateFeature(Context, XSTATE_AVX, &FeatureLengthAvx);
ULONGLONG* Avx512_KMASK = (ULONGLONG*)_LocateXStateFeature(Context, XSTATE_AVX512_KMASK, &FeatureLengthAvx512_KMASK); ULONGLONG* Avx512_KMASK = (ULONGLONG*)_LocateXStateFeature(Context, XSTATE_AVX512_KMASK, &FeatureLengthAvx512_KMASK);
ZmmRegister_t* Avx512_ZMM = (ZmmRegister_t *)_LocateXStateFeature(Context, XSTATE_AVX512_ZMM, &FeatureLengthAvx512_ZMM); ZmmRegister_t* Avx512_ZMM = (ZmmRegister_t*)_LocateXStateFeature(Context, XSTATE_AVX512_ZMM, &FeatureLengthAvx512_ZMM);
YmmRegister_t* Avx512_ZMM_H = (YmmRegister_t *)_LocateXStateFeature(Context, XSTATE_AVX512_ZMM_H, &FeatureLengthAvx512_ZMM_H); YmmRegister_t* Avx512_ZMM_H = (YmmRegister_t*)_LocateXStateFeature(Context, XSTATE_AVX512_ZMM_H, &FeatureLengthAvx512_ZMM_H);
if (Sse != NULL) //If the feature is unsupported by the processor it will return NULL if(Sse != NULL) //If the feature is unsupported by the processor it will return NULL
{ {
for (size_t i = 0; i < MIN(FeatureLengthSse / sizeof(XmmRegister_t), _countof(titcontext->ZmmRegisters)); i++) for(size_t i = 0; i < MIN(FeatureLengthSse / sizeof(XmmRegister_t), _countof(titcontext->ZmmRegisters)); i++)
titcontext->ZmmRegisters[i].Low.Low = Sse[i]; titcontext->ZmmRegisters[i].Low.Low = Sse[i];
} }
if (Avx != NULL) //If the feature is unsupported by the processor it will return NULL if(Avx != NULL) //If the feature is unsupported by the processor it will return NULL
{ {
for (size_t i = 0; i < MIN(FeatureLengthAvx / sizeof(XmmRegister_t), _countof(titcontext->ZmmRegisters)); i++) for(size_t i = 0; i < MIN(FeatureLengthAvx / sizeof(XmmRegister_t), _countof(titcontext->ZmmRegisters)); i++)
titcontext->ZmmRegisters[i].Low.High = Avx[i]; titcontext->ZmmRegisters[i].Low.High = Avx[i];
} }
if (Avx512_ZMM_H != NULL) //If the feature is unsupported by the processor it will return NULL if(Avx512_ZMM_H != NULL) //If the feature is unsupported by the processor it will return NULL
{ {
for (size_t i = 0; i < MIN(FeatureLengthAvx512_ZMM_H / sizeof(YmmRegister_t), _countof(titcontext->ZmmRegisters)); i++) for(size_t i = 0; i < MIN(FeatureLengthAvx512_ZMM_H / sizeof(YmmRegister_t), _countof(titcontext->ZmmRegisters)); i++)
titcontext->ZmmRegisters[i].High = Avx512_ZMM_H[i]; titcontext->ZmmRegisters[i].High = Avx512_ZMM_H[i];
} }
if (Avx512_ZMM != NULL) //If the feature is unsupported by the processor it will return NULL #ifdef _WIN64
if(Avx512_ZMM != NULL) //If the feature is unsupported by the processor it will return NULL
{ {
for (size_t i = 0; i < MIN(FeatureLengthAvx512_ZMM / sizeof(ZmmRegister_t), _countof(titcontext->ZmmRegisters) - FeatureLengthAvx / sizeof(XmmRegister_t)); i++) for(size_t i = 0; i < MIN(FeatureLengthAvx512_ZMM / sizeof(ZmmRegister_t), _countof(titcontext->ZmmRegisters) - FeatureLengthAvx / sizeof(XmmRegister_t)); i++)
titcontext->ZmmRegisters[i + FeatureLengthAvx / sizeof(XmmRegister_t)] = Avx512_ZMM[i]; titcontext->ZmmRegisters[i + FeatureLengthAvx / sizeof(XmmRegister_t)] = Avx512_ZMM[i];
} }
#endif // _WIN64
if (Avx512_KMASK != NULL) //If the feature is unsupported by the processor it will return NULL if(Avx512_KMASK != NULL) //If the feature is unsupported by the processor it will return NULL
{ {
for (size_t i = 0; i < MIN(FeatureLengthAvx512_KMASK / sizeof(ULONGLONG), _countof(titcontext->Opmask)); i++) for(size_t i = 0; i < MIN(FeatureLengthAvx512_KMASK / sizeof(ULONGLONG), _countof(titcontext->Opmask)); i++)
titcontext->Opmask[i] = Avx512_KMASK[i]; titcontext->Opmask[i] = Avx512_KMASK[i];
} }