Fix crash with AVX-512 on 32-bit

This commit is contained in:
Duncan Ogilvie 2025-07-04 16:50:38 +02:00
parent 20d8c4a6eb
commit 8cad0a496f
3 changed files with 156 additions and 145 deletions

View File

@ -601,8 +601,8 @@ typedef struct
typedef struct
{
YmmRegister_t Low; //AVX part
YmmRegister_t High; //AVX-512 part
YmmRegister_t Low; //AVX part
YmmRegister_t High; //AVX-512 part
} ZmmRegister_t;
typedef struct

View File

@ -1174,7 +1174,7 @@ private: //functions
{
auto p_RtlGetVersion = (NTSTATUS(WINAPI*)(PRTL_OSVERSIONINFOW))GetProcAddress(GetModuleHandleW(L"ntdll.dll"), "RtlGetVersion");
RTL_OSVERSIONINFOW info = { sizeof(info) };
if (p_RtlGetVersion && p_RtlGetVersion(&info) == 0)
if(p_RtlGetVersion && p_RtlGetVersion(&info) == 0)
return info.dwBuildNumber;
else
return 0;
@ -1184,7 +1184,7 @@ private: //functions
{
// https://www.vergiliusproject.com/kernels/x64/Windows%2010%20%7C%202016/1507%20Threshold%201/_KUSER_SHARED_DATA
auto NtBuildNumber = *(DWORD*)(0x7FFE0000 + 0x260);
if (NtBuildNumber == 0)
if(NtBuildNumber == 0)
{
// Older versions of Windows
static DWORD NtBuildNumber7 = GetNtBuildNumberWindows7();

View File

@ -194,191 +194,202 @@ __declspec(dllexport) void TITCALL Getx87FPURegisters(x87FPURegister_t x87FPUReg
(1ui64 << (XSTATE_AVX512_ZMM)))
#endif
static bool SetAVX512ContextFallbackToAVX(HANDLE hActiveThread, TITAN_ENGINE_CONTEXT_AVX512_t* titcontext) {
// Fall back to using AVX and ignore the rest
TITAN_ENGINE_CONTEXT_t Avx;
memset(&Avx, 0, sizeof(Avx));
for (int i = 0; i < _countof(Avx.YmmRegisters); i++) {
Avx.YmmRegisters[i] = titcontext->ZmmRegisters[i].Low;
}
return SetAVXContext(hActiveThread, &Avx);
static bool SetAVX512ContextFallbackToAVX(HANDLE hActiveThread, TITAN_ENGINE_CONTEXT_AVX512_t* titcontext)
{
// Fall back to using AVX and ignore the rest
TITAN_ENGINE_CONTEXT_t Avx;
memset(&Avx, 0, sizeof(Avx));
for(int i = 0; i < _countof(Avx.YmmRegisters); i++)
{
Avx.YmmRegisters[i] = titcontext->ZmmRegisters[i].Low;
}
return SetAVXContext(hActiveThread, &Avx);
}
__declspec(dllexport) bool TITCALL SetAVX512Context(HANDLE hActiveThread, TITAN_ENGINE_CONTEXT_AVX512_t* titcontext)
{
if (InitXState() == false)
return false;
if(InitXState() == false)
return false;
DWORD64 FeatureMask = _GetEnabledXStateFeatures();
if ((FeatureMask & XSTATE_MASK_AVX512) == 0)
return SetAVX512ContextFallbackToAVX(hActiveThread, titcontext);
DWORD64 FeatureMask = _GetEnabledXStateFeatures();
if((FeatureMask & XSTATE_MASK_AVX512) == 0)
return SetAVX512ContextFallbackToAVX(hActiveThread, titcontext);
DWORD ContextSize = 0;
BOOL Success = _InitializeContext(NULL,
CONTEXT_ALL | CONTEXT_XSTATE,
NULL,
&ContextSize);
DWORD ContextSize = 0;
BOOL Success = _InitializeContext(NULL,
CONTEXT_ALL | CONTEXT_XSTATE,
NULL,
&ContextSize);
if ((Success == TRUE) || (GetLastError() != ERROR_INSUFFICIENT_BUFFER))
return false;
if((Success == TRUE) || (GetLastError() != ERROR_INSUFFICIENT_BUFFER))
return false;
std::vector<uint8_t> dataBuffer(ContextSize);
PCONTEXT Context;
Success = _InitializeContext(dataBuffer.data(),
CONTEXT_ALL | CONTEXT_XSTATE,
&Context,
&ContextSize);
if (Success == FALSE)
return false;
PCONTEXT Context;
Success = _InitializeContext(dataBuffer.data(),
CONTEXT_ALL | CONTEXT_XSTATE,
&Context,
&ContextSize);
if(Success == FALSE)
return false;
if (_SetXStateFeaturesMask(Context, XSTATE_MASK_AVX | XSTATE_MASK_AVX512) == FALSE)
return SetAVX512ContextFallbackToAVX(hActiveThread, titcontext);
if(_SetXStateFeaturesMask(Context, XSTATE_MASK_AVX | XSTATE_MASK_AVX512) == FALSE)
return SetAVX512ContextFallbackToAVX(hActiveThread, titcontext);
if (GetThreadContext(hActiveThread, Context) == FALSE)
return false;
if(GetThreadContext(hActiveThread, Context) == FALSE)
return false;
if (_GetXStateFeaturesMask(Context, &FeatureMask) == FALSE)
return false;
if(_GetXStateFeaturesMask(Context, &FeatureMask) == FALSE)
return false;
DWORD FeatureLengthSse;
DWORD FeatureLengthAvx;
DWORD FeatureLengthAvx512_KMASK;
DWORD FeatureLengthAvx512_ZMM_H;
DWORD FeatureLengthAvx512_ZMM;
XmmRegister_t* Sse = (XmmRegister_t*)_LocateXStateFeature(Context, XSTATE_LEGACY_SSE, &FeatureLengthSse);
XmmRegister_t* Avx = (XmmRegister_t*)_LocateXStateFeature(Context, XSTATE_AVX, &FeatureLengthAvx);
ULONGLONG* Avx512_KMASK = (ULONGLONG*)_LocateXStateFeature(Context, XSTATE_AVX512_KMASK, &FeatureLengthAvx512_KMASK);
ZmmRegister_t* Avx512_ZMM = (ZmmRegister_t *)_LocateXStateFeature(Context, XSTATE_AVX512_ZMM, &FeatureLengthAvx512_ZMM);
YmmRegister_t* Avx512_ZMM_H = (YmmRegister_t *)_LocateXStateFeature(Context, XSTATE_AVX512_ZMM_H, &FeatureLengthAvx512_ZMM_H);
DWORD FeatureLengthSse;
DWORD FeatureLengthAvx;
DWORD FeatureLengthAvx512_KMASK;
DWORD FeatureLengthAvx512_ZMM_H;
DWORD FeatureLengthAvx512_ZMM;
XmmRegister_t* Sse = (XmmRegister_t*)_LocateXStateFeature(Context, XSTATE_LEGACY_SSE, &FeatureLengthSse);
XmmRegister_t* Avx = (XmmRegister_t*)_LocateXStateFeature(Context, XSTATE_AVX, &FeatureLengthAvx);
ULONGLONG* Avx512_KMASK = (ULONGLONG*)_LocateXStateFeature(Context, XSTATE_AVX512_KMASK, &FeatureLengthAvx512_KMASK);
ZmmRegister_t* Avx512_ZMM = (ZmmRegister_t*)_LocateXStateFeature(Context, XSTATE_AVX512_ZMM, &FeatureLengthAvx512_ZMM);
YmmRegister_t* Avx512_ZMM_H = (YmmRegister_t*)_LocateXStateFeature(Context, XSTATE_AVX512_ZMM_H, &FeatureLengthAvx512_ZMM_H);
if (Sse != NULL) //If the feature is unsupported by the processor it will return NULL
{
for (size_t i = 0; i < MIN(FeatureLengthSse / sizeof(XmmRegister_t), _countof(titcontext->ZmmRegisters)); i++)
Sse[i] = titcontext->ZmmRegisters[i].Low.Low;
}
if(Sse != NULL) //If the feature is unsupported by the processor it will return NULL
{
for(size_t i = 0; i < MIN(FeatureLengthSse / sizeof(XmmRegister_t), _countof(titcontext->ZmmRegisters)); i++)
Sse[i] = titcontext->ZmmRegisters[i].Low.Low;
}
if (Avx != NULL) //If the feature is unsupported by the processor it will return NULL
{
for (size_t i = 0; i < MIN(FeatureLengthAvx / sizeof(XmmRegister_t), _countof(titcontext->ZmmRegisters)); i++)
Avx[i] = titcontext->ZmmRegisters[i].Low.High;
}
if(Avx != NULL) //If the feature is unsupported by the processor it will return NULL
{
for(size_t i = 0; i < MIN(FeatureLengthAvx / sizeof(XmmRegister_t), _countof(titcontext->ZmmRegisters)); i++)
Avx[i] = titcontext->ZmmRegisters[i].Low.High;
}
if (Avx512_ZMM_H != NULL) //If the feature is unsupported by the processor it will return NULL
{
for (size_t i = 0; i < MIN(FeatureLengthAvx512_ZMM_H / sizeof(YmmRegister_t), _countof(titcontext->ZmmRegisters)); i++)
Avx512_ZMM_H[i] = titcontext->ZmmRegisters[i].High;
}
if(Avx512_ZMM_H != NULL) //If the feature is unsupported by the processor it will return NULL
{
for(size_t i = 0; i < MIN(FeatureLengthAvx512_ZMM_H / sizeof(YmmRegister_t), _countof(titcontext->ZmmRegisters)); i++)
Avx512_ZMM_H[i] = titcontext->ZmmRegisters[i].High;
}
if (Avx512_ZMM != NULL) //If the feature is unsupported by the processor it will return NULL
{
for (size_t i = 0; i < MIN(FeatureLengthAvx512_ZMM / sizeof(ZmmRegister_t), _countof(titcontext->ZmmRegisters) - FeatureLengthAvx / sizeof(XmmRegister_t)); i++)
Avx512_ZMM[i] = titcontext->ZmmRegisters[i + FeatureLengthAvx / sizeof(XmmRegister_t)];
}
#ifdef _WIN64
if(Avx512_ZMM != NULL) //If the feature is unsupported by the processor it will return NULL
{
for(size_t i = 0; i < MIN(FeatureLengthAvx512_ZMM / sizeof(ZmmRegister_t), _countof(titcontext->ZmmRegisters) - FeatureLengthAvx / sizeof(XmmRegister_t)); i++)
Avx512_ZMM[i] = titcontext->ZmmRegisters[i + FeatureLengthAvx / sizeof(XmmRegister_t)];
}
#endif // _WIN64
if (Avx512_KMASK != NULL) //If the feature is unsupported by the processor it will return NULL
{
for (size_t i = 0; i < MIN(FeatureLengthAvx512_KMASK / sizeof(ULONGLONG), _countof(titcontext->Opmask)); i++)
Avx512_KMASK[i] = titcontext->Opmask[i];
}
if(Avx512_KMASK != NULL) //If the feature is unsupported by the processor it will return NULL
{
for(size_t i = 0; i < MIN(FeatureLengthAvx512_KMASK / sizeof(ULONGLONG), _countof(titcontext->Opmask)); i++)
Avx512_KMASK[i] = titcontext->Opmask[i];
}
return (SetThreadContext(hActiveThread, Context) == TRUE);
return (SetThreadContext(hActiveThread, Context) == TRUE);
}
static bool GetAVX512ContextFallbackToAVX(HANDLE hActiveThread, TITAN_ENGINE_CONTEXT_AVX512_t* titcontext)
{
// Fall back to using AVX and fill the rest with 0
TITAN_ENGINE_CONTEXT_t Avx;
memset(titcontext, 0, sizeof(*titcontext));
if (GetAVXContext(hActiveThread, &Avx)) {
for (int i = 0; i < _countof(Avx.YmmRegisters); i++)
titcontext->ZmmRegisters[i].Low = Avx.YmmRegisters[i];
return true;
}
else {
return false;
}
// Fall back to using AVX and fill the rest with 0
TITAN_ENGINE_CONTEXT_t Avx;
memset(titcontext, 0, sizeof(*titcontext));
if(GetAVXContext(hActiveThread, &Avx))
{
for(int i = 0; i < _countof(Avx.YmmRegisters); i++)
titcontext->ZmmRegisters[i].Low = Avx.YmmRegisters[i];
return true;
}
else
{
return false;
}
}
__declspec(dllexport) bool TITCALL GetAVX512Context(HANDLE hActiveThread, TITAN_ENGINE_CONTEXT_AVX512_t* titcontext)
{
if (InitXState() == false)
return false;
if(InitXState() == false)
return false;
DWORD64 FeatureMask = _GetEnabledXStateFeatures();
if ((FeatureMask & XSTATE_MASK_AVX512) == 0) //XSTATE_MASK_AVX512
return GetAVX512ContextFallbackToAVX(hActiveThread, titcontext);
DWORD64 FeatureMask = _GetEnabledXStateFeatures();
if((FeatureMask & XSTATE_MASK_AVX512) == 0) //XSTATE_MASK_AVX512
return GetAVX512ContextFallbackToAVX(hActiveThread, titcontext);
DWORD ContextSize = 0;
BOOL Success = _InitializeContext(NULL,
CONTEXT_ALL | CONTEXT_XSTATE,
NULL,
&ContextSize);
DWORD ContextSize = 0;
BOOL Success = _InitializeContext(NULL,
CONTEXT_ALL | CONTEXT_XSTATE,
NULL,
&ContextSize);
if ((Success == TRUE) || (GetLastError() != ERROR_INSUFFICIENT_BUFFER))
return false;
if((Success == TRUE) || (GetLastError() != ERROR_INSUFFICIENT_BUFFER))
return false;
std::vector<uint8_t> dataBuffer(ContextSize);
PCONTEXT Context;
Success = _InitializeContext(dataBuffer.data(),
CONTEXT_ALL | CONTEXT_XSTATE,
&Context,
&ContextSize);
if (Success == FALSE)
return false;
PCONTEXT Context;
Success = _InitializeContext(dataBuffer.data(),
CONTEXT_ALL | CONTEXT_XSTATE,
&Context,
&ContextSize);
if(Success == FALSE)
return false;
if (_SetXStateFeaturesMask(Context, XSTATE_MASK_AVX | XSTATE_MASK_AVX512) == FALSE)
return GetAVX512ContextFallbackToAVX(hActiveThread, titcontext);
if(_SetXStateFeaturesMask(Context, XSTATE_MASK_AVX | XSTATE_MASK_AVX512) == FALSE)
return GetAVX512ContextFallbackToAVX(hActiveThread, titcontext);
if (GetThreadContext(hActiveThread, Context) == FALSE)
return false;
if(GetThreadContext(hActiveThread, Context) == FALSE)
return false;
if (_GetXStateFeaturesMask(Context, &FeatureMask) == FALSE)
return false;
if(_GetXStateFeaturesMask(Context, &FeatureMask) == FALSE)
return false;
DWORD FeatureLengthSse;
DWORD FeatureLengthAvx;
DWORD FeatureLengthAvx512_KMASK;
DWORD FeatureLengthAvx512_ZMM_H;
DWORD FeatureLengthAvx512_ZMM;
XmmRegister_t* Sse = (XmmRegister_t*)_LocateXStateFeature(Context, XSTATE_LEGACY_SSE, &FeatureLengthSse);
XmmRegister_t* Avx = (XmmRegister_t*)_LocateXStateFeature(Context, XSTATE_AVX, &FeatureLengthAvx);
ULONGLONG* Avx512_KMASK = (ULONGLONG*)_LocateXStateFeature(Context, XSTATE_AVX512_KMASK, &FeatureLengthAvx512_KMASK);
ZmmRegister_t* Avx512_ZMM = (ZmmRegister_t *)_LocateXStateFeature(Context, XSTATE_AVX512_ZMM, &FeatureLengthAvx512_ZMM);
YmmRegister_t* Avx512_ZMM_H = (YmmRegister_t *)_LocateXStateFeature(Context, XSTATE_AVX512_ZMM_H, &FeatureLengthAvx512_ZMM_H);
// References:
// - https://github.com/rnpnr/raddebugger/blob/14860ad71da7d5cce7106180bd4e3feefd30e5d0/src/demon/win32/demon_core_win32.c#L826
// - https://github.com/jdpatdiscord/ExceptionHandler/blob/f845854fcbe9ee48f141260e81f39eca37db5e26/ExceptionHandler.cpp#L382
DWORD FeatureLengthSse;
DWORD FeatureLengthAvx;
DWORD FeatureLengthAvx512_KMASK;
DWORD FeatureLengthAvx512_ZMM_H;
DWORD FeatureLengthAvx512_ZMM;
XmmRegister_t* Sse = (XmmRegister_t*)_LocateXStateFeature(Context, XSTATE_LEGACY_SSE, &FeatureLengthSse);
XmmRegister_t* Avx = (XmmRegister_t*)_LocateXStateFeature(Context, XSTATE_AVX, &FeatureLengthAvx);
ULONGLONG* Avx512_KMASK = (ULONGLONG*)_LocateXStateFeature(Context, XSTATE_AVX512_KMASK, &FeatureLengthAvx512_KMASK);
ZmmRegister_t* Avx512_ZMM = (ZmmRegister_t*)_LocateXStateFeature(Context, XSTATE_AVX512_ZMM, &FeatureLengthAvx512_ZMM);
YmmRegister_t* Avx512_ZMM_H = (YmmRegister_t*)_LocateXStateFeature(Context, XSTATE_AVX512_ZMM_H, &FeatureLengthAvx512_ZMM_H);
if (Sse != NULL) //If the feature is unsupported by the processor it will return NULL
{
for (size_t i = 0; i < MIN(FeatureLengthSse / sizeof(XmmRegister_t), _countof(titcontext->ZmmRegisters)); i++)
titcontext->ZmmRegisters[i].Low.Low = Sse[i];
}
if(Sse != NULL) //If the feature is unsupported by the processor it will return NULL
{
for(size_t i = 0; i < MIN(FeatureLengthSse / sizeof(XmmRegister_t), _countof(titcontext->ZmmRegisters)); i++)
titcontext->ZmmRegisters[i].Low.Low = Sse[i];
}
if (Avx != NULL) //If the feature is unsupported by the processor it will return NULL
{
for (size_t i = 0; i < MIN(FeatureLengthAvx / sizeof(XmmRegister_t), _countof(titcontext->ZmmRegisters)); i++)
titcontext->ZmmRegisters[i].Low.High = Avx[i];
}
if(Avx != NULL) //If the feature is unsupported by the processor it will return NULL
{
for(size_t i = 0; i < MIN(FeatureLengthAvx / sizeof(XmmRegister_t), _countof(titcontext->ZmmRegisters)); i++)
titcontext->ZmmRegisters[i].Low.High = Avx[i];
}
if (Avx512_ZMM_H != NULL) //If the feature is unsupported by the processor it will return NULL
{
for (size_t i = 0; i < MIN(FeatureLengthAvx512_ZMM_H / sizeof(YmmRegister_t), _countof(titcontext->ZmmRegisters)); i++)
titcontext->ZmmRegisters[i].High = Avx512_ZMM_H[i];
}
if(Avx512_ZMM_H != NULL) //If the feature is unsupported by the processor it will return NULL
{
for(size_t i = 0; i < MIN(FeatureLengthAvx512_ZMM_H / sizeof(YmmRegister_t), _countof(titcontext->ZmmRegisters)); i++)
titcontext->ZmmRegisters[i].High = Avx512_ZMM_H[i];
}
if (Avx512_ZMM != NULL) //If the feature is unsupported by the processor it will return NULL
{
for (size_t i = 0; i < MIN(FeatureLengthAvx512_ZMM / sizeof(ZmmRegister_t), _countof(titcontext->ZmmRegisters) - FeatureLengthAvx / sizeof(XmmRegister_t)); i++)
titcontext->ZmmRegisters[i + FeatureLengthAvx / sizeof(XmmRegister_t)] = Avx512_ZMM[i];
}
#ifdef _WIN64
if(Avx512_ZMM != NULL) //If the feature is unsupported by the processor it will return NULL
{
for(size_t i = 0; i < MIN(FeatureLengthAvx512_ZMM / sizeof(ZmmRegister_t), _countof(titcontext->ZmmRegisters) - FeatureLengthAvx / sizeof(XmmRegister_t)); i++)
titcontext->ZmmRegisters[i + FeatureLengthAvx / sizeof(XmmRegister_t)] = Avx512_ZMM[i];
}
#endif // _WIN64
if (Avx512_KMASK != NULL) //If the feature is unsupported by the processor it will return NULL
{
for (size_t i = 0; i < MIN(FeatureLengthAvx512_KMASK / sizeof(ULONGLONG), _countof(titcontext->Opmask)); i++)
titcontext->Opmask[i] = Avx512_KMASK[i];
}
if(Avx512_KMASK != NULL) //If the feature is unsupported by the processor it will return NULL
{
for(size_t i = 0; i < MIN(FeatureLengthAvx512_KMASK / sizeof(ULONGLONG), _countof(titcontext->Opmask)); i++)
titcontext->Opmask[i] = Avx512_KMASK[i];
}
return true;
return true;
}
//PE