Merge pull request #3272 from GermanAizek/fix-numa
Fixed get count threads for multi-cpu system with NUMA architecture
This commit is contained in:
commit
570aaea06d
|
@ -3,10 +3,12 @@
|
|||
\brief Implements the global class.
|
||||
*/
|
||||
|
||||
#include <windows.h>
|
||||
#include "_global.h"
|
||||
#include <objbase.h>
|
||||
#include <shlobj.h>
|
||||
#include <psapi.h>
|
||||
#include <thread>
|
||||
#include "DeviceNameResolver/DeviceNameResolver.h"
|
||||
|
||||
/**
|
||||
|
@ -393,3 +395,53 @@ void WaitForMultipleThreadsTermination(const HANDLE* hThread, int count, DWORD t
|
|||
for(int i = 0; i < count; i++)
|
||||
CloseHandle(hThread[i]);
|
||||
}
|
||||
|
||||
// This implementation supports both conventional single-cpu PC configurations
|
||||
// and multi-cpu system on NUMA (Non-uniform_memory_access) architecture
|
||||
// Original code from here: https://developercommunity.visualstudio.com/t/hardware-concurrency-returns-an-incorrect-result/350854
|
||||
duint GetThreadCount()
|
||||
{
|
||||
duint threadCount = std::thread::hardware_concurrency();
|
||||
|
||||
typedef BOOL(*WINAPI GetLogicalProcessorInformationEx_t)(
|
||||
LOGICAL_PROCESSOR_RELATIONSHIP,
|
||||
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX,
|
||||
PDWORD
|
||||
);
|
||||
|
||||
static auto p_GetLogicalProcessorInformationEx = (GetLogicalProcessorInformationEx_t)GetProcAddress(GetModuleHandleW(L"kernel32.dll"), "GetLogicalProcessorInformationEx");
|
||||
if(p_GetLogicalProcessorInformationEx == nullptr)
|
||||
{
|
||||
return threadCount;
|
||||
}
|
||||
|
||||
DWORD length = 0;
|
||||
if(p_GetLogicalProcessorInformationEx(RelationAll, nullptr, &length) || GetLastError() != ERROR_INSUFFICIENT_BUFFER)
|
||||
{
|
||||
return threadCount;
|
||||
}
|
||||
|
||||
std::vector<uint8_t> buffer(length);
|
||||
if(!p_GetLogicalProcessorInformationEx(RelationAll, (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)buffer.data(), &length))
|
||||
{
|
||||
return threadCount;
|
||||
}
|
||||
|
||||
threadCount = 0;
|
||||
for(DWORD offset = 0; offset < length;)
|
||||
{
|
||||
auto info = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)(buffer.data() + offset);
|
||||
if(info->Relationship == RelationProcessorCore)
|
||||
{
|
||||
for(WORD group = 0; group < info->Processor.GroupCount; ++group)
|
||||
{
|
||||
for(KAFFINITY mask = info->Processor.GroupMask[group].Mask; mask != 0; mask >>= 1)
|
||||
{
|
||||
threadCount += mask & 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
offset += info->Size;
|
||||
}
|
||||
return threadCount;
|
||||
}
|
|
@ -65,6 +65,7 @@ bool IsWow64();
|
|||
bool ResolveShortcut(HWND hwnd, const wchar_t* szShortcutPath, std::wstring & executable, std::wstring & arguments, std::wstring & workingDir);
|
||||
void WaitForThreadTermination(HANDLE hThread, DWORD timeout = INFINITE);
|
||||
void WaitForMultipleThreadsTermination(const HANDLE* hThread, int count, DWORD timeout = INFINITE);
|
||||
duint GetThreadCount();
|
||||
|
||||
#ifdef _WIN64
|
||||
#define ArchValue(x32value, x64value) x64value
|
||||
|
|
|
@ -77,7 +77,7 @@ duint AnalysisPass::IdealThreadCount()
|
|||
if(m_InternalMaxThreads == 0)
|
||||
{
|
||||
// Determine the maximum hardware thread count at once
|
||||
duint maximumThreads = max(std::thread::hardware_concurrency(), 1);
|
||||
duint maximumThreads = max(GetThreadCount(), 1);
|
||||
|
||||
// Don't consume 100% of the CPU, adjust accordingly
|
||||
if(maximumThreads > 1)
|
||||
|
|
Loading…
Reference in New Issue