Fix crash with AVX-512 on 32-bit

This commit is contained in:
Duncan Ogilvie 2025-07-04 16:50:38 +02:00
parent 20d8c4a6eb
commit 8cad0a496f
3 changed files with 156 additions and 145 deletions

View File

@ -601,8 +601,8 @@ typedef struct
typedef struct typedef struct
{ {
YmmRegister_t Low; //AVX part YmmRegister_t Low; //AVX part
YmmRegister_t High; //AVX-512 part YmmRegister_t High; //AVX-512 part
} ZmmRegister_t; } ZmmRegister_t;
typedef struct typedef struct

View File

@ -1174,7 +1174,7 @@ private: //functions
{ {
auto p_RtlGetVersion = (NTSTATUS(WINAPI*)(PRTL_OSVERSIONINFOW))GetProcAddress(GetModuleHandleW(L"ntdll.dll"), "RtlGetVersion"); auto p_RtlGetVersion = (NTSTATUS(WINAPI*)(PRTL_OSVERSIONINFOW))GetProcAddress(GetModuleHandleW(L"ntdll.dll"), "RtlGetVersion");
RTL_OSVERSIONINFOW info = { sizeof(info) }; RTL_OSVERSIONINFOW info = { sizeof(info) };
if (p_RtlGetVersion && p_RtlGetVersion(&info) == 0) if(p_RtlGetVersion && p_RtlGetVersion(&info) == 0)
return info.dwBuildNumber; return info.dwBuildNumber;
else else
return 0; return 0;
@ -1184,7 +1184,7 @@ private: //functions
{ {
// https://www.vergiliusproject.com/kernels/x64/Windows%2010%20%7C%202016/1507%20Threshold%201/_KUSER_SHARED_DATA // https://www.vergiliusproject.com/kernels/x64/Windows%2010%20%7C%202016/1507%20Threshold%201/_KUSER_SHARED_DATA
auto NtBuildNumber = *(DWORD*)(0x7FFE0000 + 0x260); auto NtBuildNumber = *(DWORD*)(0x7FFE0000 + 0x260);
if (NtBuildNumber == 0) if(NtBuildNumber == 0)
{ {
// Older versions of Windows // Older versions of Windows
static DWORD NtBuildNumber7 = GetNtBuildNumberWindows7(); static DWORD NtBuildNumber7 = GetNtBuildNumberWindows7();

View File

@ -194,191 +194,202 @@ __declspec(dllexport) void TITCALL Getx87FPURegisters(x87FPURegister_t x87FPUReg
(1ui64 << (XSTATE_AVX512_ZMM))) (1ui64 << (XSTATE_AVX512_ZMM)))
#endif #endif
static bool SetAVX512ContextFallbackToAVX(HANDLE hActiveThread, TITAN_ENGINE_CONTEXT_AVX512_t* titcontext) { static bool SetAVX512ContextFallbackToAVX(HANDLE hActiveThread, TITAN_ENGINE_CONTEXT_AVX512_t* titcontext)
// Fall back to using AVX and ignore the rest {
TITAN_ENGINE_CONTEXT_t Avx; // Fall back to using AVX and ignore the rest
memset(&Avx, 0, sizeof(Avx)); TITAN_ENGINE_CONTEXT_t Avx;
for (int i = 0; i < _countof(Avx.YmmRegisters); i++) { memset(&Avx, 0, sizeof(Avx));
Avx.YmmRegisters[i] = titcontext->ZmmRegisters[i].Low; for(int i = 0; i < _countof(Avx.YmmRegisters); i++)
} {
return SetAVXContext(hActiveThread, &Avx); Avx.YmmRegisters[i] = titcontext->ZmmRegisters[i].Low;
}
return SetAVXContext(hActiveThread, &Avx);
} }
__declspec(dllexport) bool TITCALL SetAVX512Context(HANDLE hActiveThread, TITAN_ENGINE_CONTEXT_AVX512_t* titcontext) __declspec(dllexport) bool TITCALL SetAVX512Context(HANDLE hActiveThread, TITAN_ENGINE_CONTEXT_AVX512_t* titcontext)
{ {
if (InitXState() == false) if(InitXState() == false)
return false; return false;
DWORD64 FeatureMask = _GetEnabledXStateFeatures(); DWORD64 FeatureMask = _GetEnabledXStateFeatures();
if ((FeatureMask & XSTATE_MASK_AVX512) == 0) if((FeatureMask & XSTATE_MASK_AVX512) == 0)
return SetAVX512ContextFallbackToAVX(hActiveThread, titcontext); return SetAVX512ContextFallbackToAVX(hActiveThread, titcontext);
DWORD ContextSize = 0; DWORD ContextSize = 0;
BOOL Success = _InitializeContext(NULL, BOOL Success = _InitializeContext(NULL,
CONTEXT_ALL | CONTEXT_XSTATE, CONTEXT_ALL | CONTEXT_XSTATE,
NULL, NULL,
&ContextSize); &ContextSize);
if ((Success == TRUE) || (GetLastError() != ERROR_INSUFFICIENT_BUFFER)) if((Success == TRUE) || (GetLastError() != ERROR_INSUFFICIENT_BUFFER))
return false; return false;
std::vector<uint8_t> dataBuffer(ContextSize); std::vector<uint8_t> dataBuffer(ContextSize);
PCONTEXT Context; PCONTEXT Context;
Success = _InitializeContext(dataBuffer.data(), Success = _InitializeContext(dataBuffer.data(),
CONTEXT_ALL | CONTEXT_XSTATE, CONTEXT_ALL | CONTEXT_XSTATE,
&Context, &Context,
&ContextSize); &ContextSize);
if (Success == FALSE) if(Success == FALSE)
return false; return false;
if (_SetXStateFeaturesMask(Context, XSTATE_MASK_AVX | XSTATE_MASK_AVX512) == FALSE) if(_SetXStateFeaturesMask(Context, XSTATE_MASK_AVX | XSTATE_MASK_AVX512) == FALSE)
return SetAVX512ContextFallbackToAVX(hActiveThread, titcontext); return SetAVX512ContextFallbackToAVX(hActiveThread, titcontext);
if (GetThreadContext(hActiveThread, Context) == FALSE) if(GetThreadContext(hActiveThread, Context) == FALSE)
return false; return false;
if (_GetXStateFeaturesMask(Context, &FeatureMask) == FALSE) if(_GetXStateFeaturesMask(Context, &FeatureMask) == FALSE)
return false; return false;
DWORD FeatureLengthSse; DWORD FeatureLengthSse;
DWORD FeatureLengthAvx; DWORD FeatureLengthAvx;
DWORD FeatureLengthAvx512_KMASK; DWORD FeatureLengthAvx512_KMASK;
DWORD FeatureLengthAvx512_ZMM_H; DWORD FeatureLengthAvx512_ZMM_H;
DWORD FeatureLengthAvx512_ZMM; DWORD FeatureLengthAvx512_ZMM;
XmmRegister_t* Sse = (XmmRegister_t*)_LocateXStateFeature(Context, XSTATE_LEGACY_SSE, &FeatureLengthSse); XmmRegister_t* Sse = (XmmRegister_t*)_LocateXStateFeature(Context, XSTATE_LEGACY_SSE, &FeatureLengthSse);
XmmRegister_t* Avx = (XmmRegister_t*)_LocateXStateFeature(Context, XSTATE_AVX, &FeatureLengthAvx); XmmRegister_t* Avx = (XmmRegister_t*)_LocateXStateFeature(Context, XSTATE_AVX, &FeatureLengthAvx);
ULONGLONG* Avx512_KMASK = (ULONGLONG*)_LocateXStateFeature(Context, XSTATE_AVX512_KMASK, &FeatureLengthAvx512_KMASK); ULONGLONG* Avx512_KMASK = (ULONGLONG*)_LocateXStateFeature(Context, XSTATE_AVX512_KMASK, &FeatureLengthAvx512_KMASK);
ZmmRegister_t* Avx512_ZMM = (ZmmRegister_t *)_LocateXStateFeature(Context, XSTATE_AVX512_ZMM, &FeatureLengthAvx512_ZMM); ZmmRegister_t* Avx512_ZMM = (ZmmRegister_t*)_LocateXStateFeature(Context, XSTATE_AVX512_ZMM, &FeatureLengthAvx512_ZMM);
YmmRegister_t* Avx512_ZMM_H = (YmmRegister_t *)_LocateXStateFeature(Context, XSTATE_AVX512_ZMM_H, &FeatureLengthAvx512_ZMM_H); YmmRegister_t* Avx512_ZMM_H = (YmmRegister_t*)_LocateXStateFeature(Context, XSTATE_AVX512_ZMM_H, &FeatureLengthAvx512_ZMM_H);
if (Sse != NULL) //If the feature is unsupported by the processor it will return NULL if(Sse != NULL) //If the feature is unsupported by the processor it will return NULL
{ {
for (size_t i = 0; i < MIN(FeatureLengthSse / sizeof(XmmRegister_t), _countof(titcontext->ZmmRegisters)); i++) for(size_t i = 0; i < MIN(FeatureLengthSse / sizeof(XmmRegister_t), _countof(titcontext->ZmmRegisters)); i++)
Sse[i] = titcontext->ZmmRegisters[i].Low.Low; Sse[i] = titcontext->ZmmRegisters[i].Low.Low;
} }
if (Avx != NULL) //If the feature is unsupported by the processor it will return NULL if(Avx != NULL) //If the feature is unsupported by the processor it will return NULL
{ {
for (size_t i = 0; i < MIN(FeatureLengthAvx / sizeof(XmmRegister_t), _countof(titcontext->ZmmRegisters)); i++) for(size_t i = 0; i < MIN(FeatureLengthAvx / sizeof(XmmRegister_t), _countof(titcontext->ZmmRegisters)); i++)
Avx[i] = titcontext->ZmmRegisters[i].Low.High; Avx[i] = titcontext->ZmmRegisters[i].Low.High;
} }
if (Avx512_ZMM_H != NULL) //If the feature is unsupported by the processor it will return NULL if(Avx512_ZMM_H != NULL) //If the feature is unsupported by the processor it will return NULL
{ {
for (size_t i = 0; i < MIN(FeatureLengthAvx512_ZMM_H / sizeof(YmmRegister_t), _countof(titcontext->ZmmRegisters)); i++) for(size_t i = 0; i < MIN(FeatureLengthAvx512_ZMM_H / sizeof(YmmRegister_t), _countof(titcontext->ZmmRegisters)); i++)
Avx512_ZMM_H[i] = titcontext->ZmmRegisters[i].High; Avx512_ZMM_H[i] = titcontext->ZmmRegisters[i].High;
} }
if (Avx512_ZMM != NULL) //If the feature is unsupported by the processor it will return NULL #ifdef _WIN64
{ if(Avx512_ZMM != NULL) //If the feature is unsupported by the processor it will return NULL
for (size_t i = 0; i < MIN(FeatureLengthAvx512_ZMM / sizeof(ZmmRegister_t), _countof(titcontext->ZmmRegisters) - FeatureLengthAvx / sizeof(XmmRegister_t)); i++) {
Avx512_ZMM[i] = titcontext->ZmmRegisters[i + FeatureLengthAvx / sizeof(XmmRegister_t)]; for(size_t i = 0; i < MIN(FeatureLengthAvx512_ZMM / sizeof(ZmmRegister_t), _countof(titcontext->ZmmRegisters) - FeatureLengthAvx / sizeof(XmmRegister_t)); i++)
} Avx512_ZMM[i] = titcontext->ZmmRegisters[i + FeatureLengthAvx / sizeof(XmmRegister_t)];
}
#endif // _WIN64
if (Avx512_KMASK != NULL) //If the feature is unsupported by the processor it will return NULL if(Avx512_KMASK != NULL) //If the feature is unsupported by the processor it will return NULL
{ {
for (size_t i = 0; i < MIN(FeatureLengthAvx512_KMASK / sizeof(ULONGLONG), _countof(titcontext->Opmask)); i++) for(size_t i = 0; i < MIN(FeatureLengthAvx512_KMASK / sizeof(ULONGLONG), _countof(titcontext->Opmask)); i++)
Avx512_KMASK[i] = titcontext->Opmask[i]; Avx512_KMASK[i] = titcontext->Opmask[i];
} }
return (SetThreadContext(hActiveThread, Context) == TRUE); return (SetThreadContext(hActiveThread, Context) == TRUE);
} }
static bool GetAVX512ContextFallbackToAVX(HANDLE hActiveThread, TITAN_ENGINE_CONTEXT_AVX512_t* titcontext) static bool GetAVX512ContextFallbackToAVX(HANDLE hActiveThread, TITAN_ENGINE_CONTEXT_AVX512_t* titcontext)
{ {
// Fall back to using AVX and fill the rest with 0 // Fall back to using AVX and fill the rest with 0
TITAN_ENGINE_CONTEXT_t Avx; TITAN_ENGINE_CONTEXT_t Avx;
memset(titcontext, 0, sizeof(*titcontext)); memset(titcontext, 0, sizeof(*titcontext));
if (GetAVXContext(hActiveThread, &Avx)) { if(GetAVXContext(hActiveThread, &Avx))
for (int i = 0; i < _countof(Avx.YmmRegisters); i++) {
titcontext->ZmmRegisters[i].Low = Avx.YmmRegisters[i]; for(int i = 0; i < _countof(Avx.YmmRegisters); i++)
return true; titcontext->ZmmRegisters[i].Low = Avx.YmmRegisters[i];
} return true;
else { }
return false; else
} {
return false;
}
} }
__declspec(dllexport) bool TITCALL GetAVX512Context(HANDLE hActiveThread, TITAN_ENGINE_CONTEXT_AVX512_t* titcontext) __declspec(dllexport) bool TITCALL GetAVX512Context(HANDLE hActiveThread, TITAN_ENGINE_CONTEXT_AVX512_t* titcontext)
{ {
if (InitXState() == false) if(InitXState() == false)
return false; return false;
DWORD64 FeatureMask = _GetEnabledXStateFeatures(); DWORD64 FeatureMask = _GetEnabledXStateFeatures();
if ((FeatureMask & XSTATE_MASK_AVX512) == 0) //XSTATE_MASK_AVX512 if((FeatureMask & XSTATE_MASK_AVX512) == 0) //XSTATE_MASK_AVX512
return GetAVX512ContextFallbackToAVX(hActiveThread, titcontext); return GetAVX512ContextFallbackToAVX(hActiveThread, titcontext);
DWORD ContextSize = 0; DWORD ContextSize = 0;
BOOL Success = _InitializeContext(NULL, BOOL Success = _InitializeContext(NULL,
CONTEXT_ALL | CONTEXT_XSTATE, CONTEXT_ALL | CONTEXT_XSTATE,
NULL, NULL,
&ContextSize); &ContextSize);
if ((Success == TRUE) || (GetLastError() != ERROR_INSUFFICIENT_BUFFER)) if((Success == TRUE) || (GetLastError() != ERROR_INSUFFICIENT_BUFFER))
return false; return false;
std::vector<uint8_t> dataBuffer(ContextSize); std::vector<uint8_t> dataBuffer(ContextSize);
PCONTEXT Context; PCONTEXT Context;
Success = _InitializeContext(dataBuffer.data(), Success = _InitializeContext(dataBuffer.data(),
CONTEXT_ALL | CONTEXT_XSTATE, CONTEXT_ALL | CONTEXT_XSTATE,
&Context, &Context,
&ContextSize); &ContextSize);
if (Success == FALSE) if(Success == FALSE)
return false; return false;
if (_SetXStateFeaturesMask(Context, XSTATE_MASK_AVX | XSTATE_MASK_AVX512) == FALSE) if(_SetXStateFeaturesMask(Context, XSTATE_MASK_AVX | XSTATE_MASK_AVX512) == FALSE)
return GetAVX512ContextFallbackToAVX(hActiveThread, titcontext); return GetAVX512ContextFallbackToAVX(hActiveThread, titcontext);
if (GetThreadContext(hActiveThread, Context) == FALSE) if(GetThreadContext(hActiveThread, Context) == FALSE)
return false; return false;
if (_GetXStateFeaturesMask(Context, &FeatureMask) == FALSE) if(_GetXStateFeaturesMask(Context, &FeatureMask) == FALSE)
return false; return false;
DWORD FeatureLengthSse; // References:
DWORD FeatureLengthAvx; // - https://github.com/rnpnr/raddebugger/blob/14860ad71da7d5cce7106180bd4e3feefd30e5d0/src/demon/win32/demon_core_win32.c#L826
DWORD FeatureLengthAvx512_KMASK; // - https://github.com/jdpatdiscord/ExceptionHandler/blob/f845854fcbe9ee48f141260e81f39eca37db5e26/ExceptionHandler.cpp#L382
DWORD FeatureLengthAvx512_ZMM_H; DWORD FeatureLengthSse;
DWORD FeatureLengthAvx512_ZMM; DWORD FeatureLengthAvx;
XmmRegister_t* Sse = (XmmRegister_t*)_LocateXStateFeature(Context, XSTATE_LEGACY_SSE, &FeatureLengthSse); DWORD FeatureLengthAvx512_KMASK;
XmmRegister_t* Avx = (XmmRegister_t*)_LocateXStateFeature(Context, XSTATE_AVX, &FeatureLengthAvx); DWORD FeatureLengthAvx512_ZMM_H;
ULONGLONG* Avx512_KMASK = (ULONGLONG*)_LocateXStateFeature(Context, XSTATE_AVX512_KMASK, &FeatureLengthAvx512_KMASK); DWORD FeatureLengthAvx512_ZMM;
ZmmRegister_t* Avx512_ZMM = (ZmmRegister_t *)_LocateXStateFeature(Context, XSTATE_AVX512_ZMM, &FeatureLengthAvx512_ZMM); XmmRegister_t* Sse = (XmmRegister_t*)_LocateXStateFeature(Context, XSTATE_LEGACY_SSE, &FeatureLengthSse);
YmmRegister_t* Avx512_ZMM_H = (YmmRegister_t *)_LocateXStateFeature(Context, XSTATE_AVX512_ZMM_H, &FeatureLengthAvx512_ZMM_H); XmmRegister_t* Avx = (XmmRegister_t*)_LocateXStateFeature(Context, XSTATE_AVX, &FeatureLengthAvx);
ULONGLONG* Avx512_KMASK = (ULONGLONG*)_LocateXStateFeature(Context, XSTATE_AVX512_KMASK, &FeatureLengthAvx512_KMASK);
ZmmRegister_t* Avx512_ZMM = (ZmmRegister_t*)_LocateXStateFeature(Context, XSTATE_AVX512_ZMM, &FeatureLengthAvx512_ZMM);
YmmRegister_t* Avx512_ZMM_H = (YmmRegister_t*)_LocateXStateFeature(Context, XSTATE_AVX512_ZMM_H, &FeatureLengthAvx512_ZMM_H);
if (Sse != NULL) //If the feature is unsupported by the processor it will return NULL if(Sse != NULL) //If the feature is unsupported by the processor it will return NULL
{ {
for (size_t i = 0; i < MIN(FeatureLengthSse / sizeof(XmmRegister_t), _countof(titcontext->ZmmRegisters)); i++) for(size_t i = 0; i < MIN(FeatureLengthSse / sizeof(XmmRegister_t), _countof(titcontext->ZmmRegisters)); i++)
titcontext->ZmmRegisters[i].Low.Low = Sse[i]; titcontext->ZmmRegisters[i].Low.Low = Sse[i];
} }
if (Avx != NULL) //If the feature is unsupported by the processor it will return NULL if(Avx != NULL) //If the feature is unsupported by the processor it will return NULL
{ {
for (size_t i = 0; i < MIN(FeatureLengthAvx / sizeof(XmmRegister_t), _countof(titcontext->ZmmRegisters)); i++) for(size_t i = 0; i < MIN(FeatureLengthAvx / sizeof(XmmRegister_t), _countof(titcontext->ZmmRegisters)); i++)
titcontext->ZmmRegisters[i].Low.High = Avx[i]; titcontext->ZmmRegisters[i].Low.High = Avx[i];
} }
if (Avx512_ZMM_H != NULL) //If the feature is unsupported by the processor it will return NULL if(Avx512_ZMM_H != NULL) //If the feature is unsupported by the processor it will return NULL
{ {
for (size_t i = 0; i < MIN(FeatureLengthAvx512_ZMM_H / sizeof(YmmRegister_t), _countof(titcontext->ZmmRegisters)); i++) for(size_t i = 0; i < MIN(FeatureLengthAvx512_ZMM_H / sizeof(YmmRegister_t), _countof(titcontext->ZmmRegisters)); i++)
titcontext->ZmmRegisters[i].High = Avx512_ZMM_H[i]; titcontext->ZmmRegisters[i].High = Avx512_ZMM_H[i];
} }
if (Avx512_ZMM != NULL) //If the feature is unsupported by the processor it will return NULL #ifdef _WIN64
{ if(Avx512_ZMM != NULL) //If the feature is unsupported by the processor it will return NULL
for (size_t i = 0; i < MIN(FeatureLengthAvx512_ZMM / sizeof(ZmmRegister_t), _countof(titcontext->ZmmRegisters) - FeatureLengthAvx / sizeof(XmmRegister_t)); i++) {
titcontext->ZmmRegisters[i + FeatureLengthAvx / sizeof(XmmRegister_t)] = Avx512_ZMM[i]; for(size_t i = 0; i < MIN(FeatureLengthAvx512_ZMM / sizeof(ZmmRegister_t), _countof(titcontext->ZmmRegisters) - FeatureLengthAvx / sizeof(XmmRegister_t)); i++)
} titcontext->ZmmRegisters[i + FeatureLengthAvx / sizeof(XmmRegister_t)] = Avx512_ZMM[i];
}
#endif // _WIN64
if (Avx512_KMASK != NULL) //If the feature is unsupported by the processor it will return NULL if(Avx512_KMASK != NULL) //If the feature is unsupported by the processor it will return NULL
{ {
for (size_t i = 0; i < MIN(FeatureLengthAvx512_KMASK / sizeof(ULONGLONG), _countof(titcontext->Opmask)); i++) for(size_t i = 0; i < MIN(FeatureLengthAvx512_KMASK / sizeof(ULONGLONG), _countof(titcontext->Opmask)); i++)
titcontext->Opmask[i] = Avx512_KMASK[i]; titcontext->Opmask[i] = Avx512_KMASK[i];
} }
return true; return true;
} }
//PE //PE