Merge pull request #3272 from GermanAizek/fix-numa
Fixed get count threads for multi-cpu system with NUMA architecture
This commit is contained in:
		
						commit
						570aaea06d
					
				|  | @ -3,10 +3,12 @@ | |||
| \brief Implements the global class. | ||||
| */ | ||||
| 
 | ||||
| #include <windows.h> | ||||
| #include "_global.h" | ||||
| #include <objbase.h> | ||||
| #include <shlobj.h> | ||||
| #include <psapi.h> | ||||
| #include <thread> | ||||
| #include "DeviceNameResolver/DeviceNameResolver.h" | ||||
| 
 | ||||
| /**
 | ||||
|  | @ -393,3 +395,53 @@ void WaitForMultipleThreadsTermination(const HANDLE* hThread, int count, DWORD t | |||
|     for(int i = 0; i < count; i++) | ||||
|         CloseHandle(hThread[i]); | ||||
| } | ||||
| 
 | ||||
| // This implementation supports both conventional single-cpu PC configurations
 | ||||
| // and multi-cpu system on NUMA (Non-uniform_memory_access) architecture
 | ||||
| // Original code from here: https://developercommunity.visualstudio.com/t/hardware-concurrency-returns-an-incorrect-result/350854
 | ||||
| duint GetThreadCount() | ||||
| { | ||||
|     duint threadCount = std::thread::hardware_concurrency(); | ||||
| 
 | ||||
|     typedef BOOL(*WINAPI GetLogicalProcessorInformationEx_t)( | ||||
|         LOGICAL_PROCESSOR_RELATIONSHIP, | ||||
|         PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, | ||||
|         PDWORD | ||||
|     ); | ||||
| 
 | ||||
|     static auto p_GetLogicalProcessorInformationEx = (GetLogicalProcessorInformationEx_t)GetProcAddress(GetModuleHandleW(L"kernel32.dll"), "GetLogicalProcessorInformationEx"); | ||||
|     if(p_GetLogicalProcessorInformationEx == nullptr) | ||||
|     { | ||||
|         return threadCount; | ||||
|     } | ||||
| 
 | ||||
|     DWORD length = 0; | ||||
|     if(p_GetLogicalProcessorInformationEx(RelationAll, nullptr, &length) || GetLastError() != ERROR_INSUFFICIENT_BUFFER) | ||||
|     { | ||||
|         return threadCount; | ||||
|     } | ||||
| 
 | ||||
|     std::vector<uint8_t> buffer(length); | ||||
|     if(!p_GetLogicalProcessorInformationEx(RelationAll, (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)buffer.data(), &length)) | ||||
|     { | ||||
|         return threadCount; | ||||
|     } | ||||
| 
 | ||||
|     threadCount = 0; | ||||
|     for(DWORD offset = 0; offset < length;) | ||||
|     { | ||||
|         auto info = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)(buffer.data() + offset); | ||||
|         if(info->Relationship == RelationProcessorCore) | ||||
|         { | ||||
|             for(WORD group = 0; group < info->Processor.GroupCount; ++group) | ||||
|             { | ||||
|                 for(KAFFINITY mask = info->Processor.GroupMask[group].Mask; mask != 0; mask >>= 1) | ||||
|                 { | ||||
|                     threadCount += mask & 1; | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|         offset += info->Size; | ||||
|     } | ||||
|     return threadCount; | ||||
| } | ||||
|  | @ -65,6 +65,7 @@ bool IsWow64(); | |||
| bool ResolveShortcut(HWND hwnd, const wchar_t* szShortcutPath, std::wstring & executable, std::wstring & arguments, std::wstring & workingDir); | ||||
| void WaitForThreadTermination(HANDLE hThread, DWORD timeout = INFINITE); | ||||
| void WaitForMultipleThreadsTermination(const HANDLE* hThread, int count, DWORD timeout = INFINITE); | ||||
| duint GetThreadCount(); | ||||
| 
 | ||||
| #ifdef _WIN64 | ||||
| #define ArchValue(x32value, x64value) x64value | ||||
|  |  | |||
|  | @ -77,7 +77,7 @@ duint AnalysisPass::IdealThreadCount() | |||
|     if(m_InternalMaxThreads == 0) | ||||
|     { | ||||
|         // Determine the maximum hardware thread count at once
 | ||||
|         duint maximumThreads = max(std::thread::hardware_concurrency(), 1); | ||||
|         duint maximumThreads = max(GetThreadCount(), 1); | ||||
| 
 | ||||
|         // Don't consume 100% of the CPU, adjust accordingly
 | ||||
|         if(maximumThreads > 1) | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue