mirror of
https://github.com/sockspls/badfish
synced 2025-04-29 16:23:09 +00:00
NUMA: Fix concurrency counting for windows systems
If there is more than 1 processor group, std:🧵:hardware_concurrency should not be used. fixes #5307 closes https://github.com/official-stockfish/Stockfish/pull/5311 No functional change
This commit is contained in:
parent
02eae52833
commit
596fb4842b
2 changed files with 41 additions and 25 deletions
41
src/misc.cpp
41
src/misc.cpp
|
@ -34,16 +34,10 @@
|
|||
// the calls at compile time), try to load them at runtime. To do this we need
|
||||
// first to define the corresponding function pointers.
|
||||
extern "C" {
|
||||
using fun1_t = bool (*)(LOGICAL_PROCESSOR_RELATIONSHIP,
|
||||
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX,
|
||||
PDWORD);
|
||||
using fun2_t = bool (*)(USHORT, PGROUP_AFFINITY);
|
||||
using fun3_t = bool (*)(HANDLE, CONST GROUP_AFFINITY*, PGROUP_AFFINITY);
|
||||
using fun4_t = bool (*)(USHORT, PGROUP_AFFINITY, USHORT, PUSHORT);
|
||||
using fun5_t = WORD (*)();
|
||||
using fun6_t = bool (*)(HANDLE, DWORD, PHANDLE);
|
||||
using fun7_t = bool (*)(LPCSTR, LPCSTR, PLUID);
|
||||
using fun8_t = bool (*)(HANDLE, BOOL, PTOKEN_PRIVILEGES, DWORD, PTOKEN_PRIVILEGES, PDWORD);
|
||||
using OpenProcessToken_t = bool (*)(HANDLE, DWORD, PHANDLE);
|
||||
using LookupPrivilegeValueA_t = bool (*)(LPCSTR, LPCSTR, PLUID);
|
||||
using AdjustTokenPrivileges_t =
|
||||
bool (*)(HANDLE, BOOL, PTOKEN_PRIVILEGES, DWORD, PTOKEN_PRIVILEGES, PDWORD);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -488,23 +482,25 @@ static void* aligned_large_pages_alloc_windows([[maybe_unused]] size_t allocSize
|
|||
if (!hAdvapi32)
|
||||
hAdvapi32 = LoadLibrary(TEXT("advapi32.dll"));
|
||||
|
||||
auto fun6 = fun6_t((void (*)()) GetProcAddress(hAdvapi32, "OpenProcessToken"));
|
||||
if (!fun6)
|
||||
auto OpenProcessToken_f =
|
||||
OpenProcessToken_t((void (*)()) GetProcAddress(hAdvapi32, "OpenProcessToken"));
|
||||
if (!OpenProcessToken_f)
|
||||
return nullptr;
|
||||
auto fun7 = fun7_t((void (*)()) GetProcAddress(hAdvapi32, "LookupPrivilegeValueA"));
|
||||
if (!fun7)
|
||||
auto LookupPrivilegeValueA_f =
|
||||
LookupPrivilegeValueA_t((void (*)()) GetProcAddress(hAdvapi32, "LookupPrivilegeValueA"));
|
||||
if (!LookupPrivilegeValueA_f)
|
||||
return nullptr;
|
||||
auto fun8 = fun8_t((void (*)()) GetProcAddress(hAdvapi32, "AdjustTokenPrivileges"));
|
||||
if (!fun8)
|
||||
auto AdjustTokenPrivileges_f =
|
||||
AdjustTokenPrivileges_t((void (*)()) GetProcAddress(hAdvapi32, "AdjustTokenPrivileges"));
|
||||
if (!AdjustTokenPrivileges_f)
|
||||
return nullptr;
|
||||
|
||||
// We need SeLockMemoryPrivilege, so try to enable it for the process
|
||||
if (!fun6( // OpenProcessToken()
|
||||
if (!OpenProcessToken_f( // OpenProcessToken()
|
||||
GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hProcessToken))
|
||||
return nullptr;
|
||||
|
||||
if (fun7( // LookupPrivilegeValue(nullptr, SE_LOCK_MEMORY_NAME, &luid)
|
||||
nullptr, "SeLockMemoryPrivilege", &luid))
|
||||
if (LookupPrivilegeValueA_f(nullptr, "SeLockMemoryPrivilege", &luid))
|
||||
{
|
||||
TOKEN_PRIVILEGES tp{};
|
||||
TOKEN_PRIVILEGES prevTp{};
|
||||
|
@ -516,8 +512,8 @@ static void* aligned_large_pages_alloc_windows([[maybe_unused]] size_t allocSize
|
|||
|
||||
// Try to enable SeLockMemoryPrivilege. Note that even if AdjustTokenPrivileges() succeeds,
|
||||
// we still need to query GetLastError() to ensure that the privileges were actually obtained.
|
||||
if (fun8( // AdjustTokenPrivileges()
|
||||
hProcessToken, FALSE, &tp, sizeof(TOKEN_PRIVILEGES), &prevTp, &prevTpLen)
|
||||
if (AdjustTokenPrivileges_f(hProcessToken, FALSE, &tp, sizeof(TOKEN_PRIVILEGES), &prevTp,
|
||||
&prevTpLen)
|
||||
&& GetLastError() == ERROR_SUCCESS)
|
||||
{
|
||||
// Round up size to full pages and allocate
|
||||
|
@ -526,8 +522,7 @@ static void* aligned_large_pages_alloc_windows([[maybe_unused]] size_t allocSize
|
|||
PAGE_READWRITE);
|
||||
|
||||
// Privilege no longer needed, restore previous state
|
||||
fun8( // AdjustTokenPrivileges ()
|
||||
hProcessToken, FALSE, &prevTp, 0, nullptr, nullptr);
|
||||
AdjustTokenPrivileges_f(hProcessToken, FALSE, &prevTp, 0, nullptr, nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
25
src/numa.h
25
src/numa.h
|
@ -61,6 +61,7 @@ using SetThreadSelectedCpuSetMasks_t = BOOL (*)(HANDLE, PGROUP_AFFINITY, USHORT)
|
|||
// https://learn.microsoft.com/en-us/windows/win32/api/processtopologyapi/nf-processtopologyapi-setthreadgroupaffinity
|
||||
using SetThreadGroupAffinity_t = BOOL (*)(HANDLE, const GROUP_AFFINITY*, PGROUP_AFFINITY);
|
||||
|
||||
using GetActiveProcessorCount_t = DWORD (*)(WORD);
|
||||
#endif
|
||||
|
||||
#include "misc.h"
|
||||
|
@ -70,8 +71,28 @@ namespace Stockfish {
|
|||
using CpuIndex = size_t;
|
||||
using NumaIndex = size_t;
|
||||
|
||||
inline const CpuIndex SYSTEM_THREADS_NB =
|
||||
std::max<CpuIndex>(1, std::thread::hardware_concurrency());
|
||||
inline CpuIndex get_hardware_concurrency() {
|
||||
CpuIndex concurrency = std::thread::hardware_concurrency();
|
||||
|
||||
// Get all processors across all processor groups on windows, since ::hardware_concurrency
|
||||
// only returns the number of processors in the first group, because only these
|
||||
// are available to std::thread.
|
||||
#ifdef _WIN64
|
||||
HMODULE k32 = GetModuleHandle(TEXT("Kernel32.dll"));
|
||||
auto GetActiveProcessorCount_f =
|
||||
GetActiveProcessorCount_t((void (*)()) GetProcAddress(k32, "GetActiveProcessorCount"));
|
||||
|
||||
if (GetActiveProcessorCount_f != nullptr)
|
||||
{
|
||||
concurrency = GetActiveProcessorCount_f(ALL_PROCESSOR_GROUPS);
|
||||
}
|
||||
#endif
|
||||
|
||||
return concurrency;
|
||||
}
|
||||
|
||||
inline const CpuIndex SYSTEM_THREADS_NB = std::max<CpuIndex>(1, get_hardware_concurrency());
|
||||
|
||||
|
||||
// We want to abstract the purpose of storing the numa node index somewhat.
|
||||
// Whoever is using this does not need to know the specifics of the replication
|
||||
|
|
Loading…
Add table
Reference in a new issue