1
0
Fork 0

Merge pull request #3272 from GermanAizek/fix-numa

Fixed get count threads for multi-cpu system with NUMA architecture
This commit is contained in:
Duncan Ogilvie 2023-11-18 19:38:30 +01:00 committed by GitHub
commit 570aaea06d
Signed by: GitHub
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 54 additions and 1 deletions

View File

@ -3,10 +3,12 @@
\brief Implements the global class.
*/
#include <windows.h>
#include "_global.h"
#include <objbase.h>
#include <shlobj.h>
#include <psapi.h>
#include <thread>
#include "DeviceNameResolver/DeviceNameResolver.h"
/**
@ -393,3 +395,53 @@ void WaitForMultipleThreadsTermination(const HANDLE* hThread, int count, DWORD t
for(int i = 0; i < count; i++)
CloseHandle(hThread[i]);
}
// This implementation supports both conventional single-cpu PC configurations
// and multi-cpu system on NUMA (Non-uniform_memory_access) architecture
// Original code from here: https://developercommunity.visualstudio.com/t/hardware-concurrency-returns-an-incorrect-result/350854
duint GetThreadCount()
{
duint threadCount = std::thread::hardware_concurrency();
typedef BOOL(*WINAPI GetLogicalProcessorInformationEx_t)(
LOGICAL_PROCESSOR_RELATIONSHIP,
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX,
PDWORD
);
static auto p_GetLogicalProcessorInformationEx = (GetLogicalProcessorInformationEx_t)GetProcAddress(GetModuleHandleW(L"kernel32.dll"), "GetLogicalProcessorInformationEx");
if(p_GetLogicalProcessorInformationEx == nullptr)
{
return threadCount;
}
DWORD length = 0;
if(p_GetLogicalProcessorInformationEx(RelationAll, nullptr, &length) || GetLastError() != ERROR_INSUFFICIENT_BUFFER)
{
return threadCount;
}
std::vector<uint8_t> buffer(length);
if(!p_GetLogicalProcessorInformationEx(RelationAll, (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)buffer.data(), &length))
{
return threadCount;
}
threadCount = 0;
for(DWORD offset = 0; offset < length;)
{
auto info = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)(buffer.data() + offset);
if(info->Relationship == RelationProcessorCore)
{
for(WORD group = 0; group < info->Processor.GroupCount; ++group)
{
for(KAFFINITY mask = info->Processor.GroupMask[group].Mask; mask != 0; mask >>= 1)
{
threadCount += mask & 1;
}
}
}
offset += info->Size;
}
return threadCount;
}

View File

@ -65,6 +65,7 @@ bool IsWow64();
bool ResolveShortcut(HWND hwnd, const wchar_t* szShortcutPath, std::wstring & executable, std::wstring & arguments, std::wstring & workingDir);
void WaitForThreadTermination(HANDLE hThread, DWORD timeout = INFINITE);
void WaitForMultipleThreadsTermination(const HANDLE* hThread, int count, DWORD timeout = INFINITE);
duint GetThreadCount();
#ifdef _WIN64
#define ArchValue(x32value, x64value) x64value

View File

@ -77,7 +77,7 @@ duint AnalysisPass::IdealThreadCount()
if(m_InternalMaxThreads == 0)
{
// Determine the maximum hardware thread count at once
duint maximumThreads = max(std::thread::hardware_concurrency(), 1);
duint maximumThreads = max(GetThreadCount(), 1);
// Don't consume 100% of the CPU, adjust accordingly
if(maximumThreads > 1)