mirror of
https://github.com/sockspls/badfish
synced 2025-04-29 16:23:09 +00:00
On linux use sysfs instead of lscpu
Use sysfs (https://www.kernel.org/doc/Documentation/ABI/stable/sysfs-devices-node) to determine processor to NUMA node mapping. Avoids problems on some machines with high core count where lscpu was showing high cpu utilization. closes https://github.com/official-stockfish/Stockfish/pull/5315 No functional change
This commit is contained in:
parent
86694b5914
commit
c8375c2fbd
3 changed files with 107 additions and 84 deletions
13
src/misc.cpp
13
src/misc.cpp
|
@ -42,12 +42,14 @@ using AdjustTokenPrivileges_t =
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include <atomic>
|
#include <atomic>
|
||||||
|
#include <cctype>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <iomanip>
|
#include <iomanip>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <limits>
|
#include <limits>
|
||||||
|
#include <iterator>
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <string_view>
|
#include <string_view>
|
||||||
|
@ -603,6 +605,17 @@ size_t str_to_size_t(const std::string& s) {
|
||||||
return static_cast<size_t>(value);
|
return static_cast<size_t>(value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::optional<std::string> read_file_to_string(const std::string& path) {
|
||||||
|
std::ifstream f(path, std::ios_base::binary);
|
||||||
|
if (!f)
|
||||||
|
return std::nullopt;
|
||||||
|
return std::string(std::istreambuf_iterator<char>(f), std::istreambuf_iterator<char>());
|
||||||
|
}
|
||||||
|
|
||||||
|
void remove_whitespace(std::string& s) {
|
||||||
|
s.erase(std::remove_if(s.begin(), s.end(), [](char c) { return std::isspace(c); }), s.end());
|
||||||
|
}
|
||||||
|
|
||||||
std::string CommandLine::get_binary_directory(std::string argv0) {
|
std::string CommandLine::get_binary_directory(std::string argv0) {
|
||||||
std::string pathSeparator;
|
std::string pathSeparator;
|
||||||
|
|
||||||
|
|
24
src/misc.h
24
src/misc.h
|
@ -88,21 +88,12 @@ struct PipeDeleter {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
inline std::optional<std::string> get_system_command_output(const std::string& command) {
|
|
||||||
std::unique_ptr<FILE, PipeDeleter> pipe(popen(command.c_str(), "r"));
|
|
||||||
if (!pipe)
|
|
||||||
return std::nullopt;
|
|
||||||
|
|
||||||
std::string result;
|
|
||||||
char buffer[1024];
|
|
||||||
while (fgets(buffer, sizeof(buffer), pipe.get()) != nullptr)
|
|
||||||
result += buffer;
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// Reads the file as bytes.
|
||||||
|
// Returns std::nullopt if the file does not exist.
|
||||||
|
std::optional<std::string> read_file_to_string(const std::string& path);
|
||||||
|
|
||||||
void dbg_hit_on(bool cond, int slot = 0);
|
void dbg_hit_on(bool cond, int slot = 0);
|
||||||
void dbg_mean_of(int64_t value, int slot = 0);
|
void dbg_mean_of(int64_t value, int slot = 0);
|
||||||
void dbg_stdev_of(int64_t value, int slot = 0);
|
void dbg_stdev_of(int64_t value, int slot = 0);
|
||||||
|
@ -118,9 +109,12 @@ inline TimePoint now() {
|
||||||
}
|
}
|
||||||
|
|
||||||
inline std::vector<std::string> split(const std::string& s, const std::string& delimiter) {
|
inline std::vector<std::string> split(const std::string& s, const std::string& delimiter) {
|
||||||
size_t begin = 0;
|
|
||||||
std::vector<std::string> res;
|
std::vector<std::string> res;
|
||||||
|
|
||||||
|
if (s.empty())
|
||||||
|
return res;
|
||||||
|
|
||||||
|
size_t begin = 0;
|
||||||
for (;;)
|
for (;;)
|
||||||
{
|
{
|
||||||
const size_t end = s.find(delimiter, begin);
|
const size_t end = s.find(delimiter, begin);
|
||||||
|
@ -136,6 +130,8 @@ inline std::vector<std::string> split(const std::string& s, const std::string& d
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void remove_whitespace(std::string& s);
|
||||||
|
|
||||||
enum SyncCout {
|
enum SyncCout {
|
||||||
IO_LOCK,
|
IO_LOCK,
|
||||||
IO_UNLOCK
|
IO_UNLOCK
|
||||||
|
|
154
src/numa.h
154
src/numa.h
|
@ -33,9 +33,8 @@
|
||||||
#include <utility>
|
#include <utility>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
// We support linux very well, but we explicitly do NOT support Android, partially because
|
// We support linux very well, but we explicitly do NOT support Android, because there's
|
||||||
// there are potential issues with `lscpu`, `popen` availability, and partially because
|
// no affected systems, not worth maintaining.
|
||||||
// there's no NUMA environments running Android and there probably won't be.
|
|
||||||
#if defined(__linux__) && !defined(__ANDROID__)
|
#if defined(__linux__) && !defined(__ANDROID__)
|
||||||
#if !defined(_GNU_SOURCE)
|
#if !defined(_GNU_SOURCE)
|
||||||
#define _GNU_SOURCE
|
#define _GNU_SOURCE
|
||||||
|
@ -143,7 +142,9 @@ class NumaConfig {
|
||||||
}
|
}
|
||||||
|
|
||||||
// This function queries the system for the mapping of processors to NUMA nodes.
|
// This function queries the system for the mapping of processors to NUMA nodes.
|
||||||
// On Linux we utilize `lscpu` to avoid libnuma.
|
// On Linux we read from standardized kernel sysfs, with a fallback to single NUMA node.
|
||||||
|
// On Windows we utilize GetNumaProcessorNodeEx, which has its quirks, see
|
||||||
|
// comment for Windows implementation of get_process_affinity
|
||||||
static NumaConfig from_system([[maybe_unused]] bool respectProcessAffinity = true) {
|
static NumaConfig from_system([[maybe_unused]] bool respectProcessAffinity = true) {
|
||||||
NumaConfig cfg = empty();
|
NumaConfig cfg = empty();
|
||||||
|
|
||||||
|
@ -160,48 +161,52 @@ class NumaConfig {
|
||||||
|
|
||||||
// On Linux things are straightforward, since there's no processor groups and
|
// On Linux things are straightforward, since there's no processor groups and
|
||||||
// any thread can be scheduled on all processors.
|
// any thread can be scheduled on all processors.
|
||||||
// This command produces output in the following form
|
|
||||||
// CPU NODE
|
// We try to gather this information from the sysfs first
|
||||||
// 0 0
|
// https://www.kernel.org/doc/Documentation/ABI/stable/sysfs-devices-node
|
||||||
// 1 0
|
|
||||||
// 2 1
|
bool useFallback = false;
|
||||||
// 3 1
|
auto fallback = [&]() {
|
||||||
//
|
useFallback = true;
|
||||||
// On some systems it may use '-' to signify no NUMA node, in which case we assume it's in node 0.
|
cfg = empty();
|
||||||
auto lscpuOpt = get_system_command_output("lscpu -e=cpu,node");
|
};
|
||||||
if (lscpuOpt.has_value())
|
|
||||||
|
// /sys/devices/system/node/online contains information about active NUMA nodes
|
||||||
|
auto nodeIdsStr = read_file_to_string("/sys/devices/system/node/online");
|
||||||
|
if (!nodeIdsStr.has_value() || nodeIdsStr->empty())
|
||||||
{
|
{
|
||||||
|
fallback();
|
||||||
std::istringstream ss(*lscpuOpt);
|
|
||||||
|
|
||||||
// skip the list header
|
|
||||||
ss.ignore(std::numeric_limits<std::streamsize>::max(), '\n');
|
|
||||||
|
|
||||||
while (true)
|
|
||||||
{
|
|
||||||
CpuIndex c;
|
|
||||||
NumaIndex n;
|
|
||||||
|
|
||||||
ss >> c;
|
|
||||||
|
|
||||||
if (!ss)
|
|
||||||
break;
|
|
||||||
|
|
||||||
ss >> n;
|
|
||||||
|
|
||||||
if (!ss)
|
|
||||||
{
|
|
||||||
ss.clear();
|
|
||||||
std::string dummy;
|
|
||||||
ss >> dummy;
|
|
||||||
n = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (is_cpu_allowed(c))
|
|
||||||
cfg.add_cpu_to_node(n, c);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
{
|
||||||
|
remove_whitespace(*nodeIdsStr);
|
||||||
|
for (size_t n : indices_from_shortened_string(*nodeIdsStr))
|
||||||
|
{
|
||||||
|
// /sys/devices/system/node/node.../cpulist
|
||||||
|
std::string path =
|
||||||
|
std::string("/sys/devices/system/node/node") + std::to_string(n) + "/cpulist";
|
||||||
|
auto cpuIdsStr = read_file_to_string(path);
|
||||||
|
// Now, we only bail if the file does not exist. Some nodes may be empty, that's fine.
|
||||||
|
// An empty node still has a file that appears to have some whitespace, so we need
|
||||||
|
// to handle that.
|
||||||
|
if (!cpuIdsStr.has_value())
|
||||||
|
{
|
||||||
|
fallback();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
remove_whitespace(*cpuIdsStr);
|
||||||
|
for (size_t c : indices_from_shortened_string(*cpuIdsStr))
|
||||||
|
{
|
||||||
|
if (is_cpu_allowed(c))
|
||||||
|
cfg.add_cpu_to_node(n, c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (useFallback)
|
||||||
{
|
{
|
||||||
for (CpuIndex c = 0; c < SYSTEM_THREADS_NB; ++c)
|
for (CpuIndex c = 0; c < SYSTEM_THREADS_NB; ++c)
|
||||||
if (is_cpu_allowed(c))
|
if (is_cpu_allowed(c))
|
||||||
|
@ -309,38 +314,17 @@ class NumaConfig {
|
||||||
NumaIndex n = 0;
|
NumaIndex n = 0;
|
||||||
for (auto&& nodeStr : split(s, ":"))
|
for (auto&& nodeStr : split(s, ":"))
|
||||||
{
|
{
|
||||||
bool addedAnyCpuInThisNode = false;
|
auto indices = indices_from_shortened_string(nodeStr);
|
||||||
|
if (!indices.empty())
|
||||||
for (const std::string& cpuStr : split(nodeStr, ","))
|
|
||||||
{
|
{
|
||||||
if (cpuStr.empty())
|
for (auto idx : indices)
|
||||||
continue;
|
|
||||||
|
|
||||||
auto parts = split(cpuStr, "-");
|
|
||||||
if (parts.size() == 1)
|
|
||||||
{
|
{
|
||||||
const CpuIndex c = CpuIndex{str_to_size_t(parts[0])};
|
if (!cfg.add_cpu_to_node(n, CpuIndex(idx)))
|
||||||
if (!cfg.add_cpu_to_node(n, c))
|
|
||||||
std::exit(EXIT_FAILURE);
|
std::exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
else if (parts.size() == 2)
|
|
||||||
{
|
|
||||||
const CpuIndex cfirst = CpuIndex{str_to_size_t(parts[0])};
|
|
||||||
const CpuIndex clast = CpuIndex{str_to_size_t(parts[1])};
|
|
||||||
|
|
||||||
if (!cfg.add_cpu_range_to_node(n, cfirst, clast))
|
|
||||||
std::exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
std::exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
|
|
||||||
addedAnyCpuInThisNode = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (addedAnyCpuInThisNode)
|
|
||||||
n += 1;
|
n += 1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
cfg.customAffinity = true;
|
cfg.customAffinity = true;
|
||||||
|
@ -675,7 +659,6 @@ class NumaConfig {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#if defined(__linux__) && !defined(__ANDROID__)
|
#if defined(__linux__) && !defined(__ANDROID__)
|
||||||
|
|
||||||
static std::set<CpuIndex> get_process_affinity() {
|
static std::set<CpuIndex> get_process_affinity() {
|
||||||
|
@ -807,6 +790,37 @@ class NumaConfig {
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
static std::vector<size_t> indices_from_shortened_string(const std::string& s) {
|
||||||
|
std::vector<size_t> indices;
|
||||||
|
|
||||||
|
if (s.empty())
|
||||||
|
return indices;
|
||||||
|
|
||||||
|
for (const std::string& ss : split(s, ","))
|
||||||
|
{
|
||||||
|
if (ss.empty())
|
||||||
|
continue;
|
||||||
|
|
||||||
|
auto parts = split(ss, "-");
|
||||||
|
if (parts.size() == 1)
|
||||||
|
{
|
||||||
|
const CpuIndex c = CpuIndex{str_to_size_t(parts[0])};
|
||||||
|
indices.emplace_back(c);
|
||||||
|
}
|
||||||
|
else if (parts.size() == 2)
|
||||||
|
{
|
||||||
|
const CpuIndex cfirst = CpuIndex{str_to_size_t(parts[0])};
|
||||||
|
const CpuIndex clast = CpuIndex{str_to_size_t(parts[1])};
|
||||||
|
for (size_t c = cfirst; c <= clast; ++c)
|
||||||
|
{
|
||||||
|
indices.emplace_back(c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return indices;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
class NumaReplicationContext;
|
class NumaReplicationContext;
|
||||||
|
|
Loading…
Add table
Reference in a new issue