mirror of
https://github.com/sockspls/badfish
synced 2025-04-29 16:23:09 +00:00
On linux use sysfs instead of lscpu
Use sysfs (https://www.kernel.org/doc/Documentation/ABI/stable/sysfs-devices-node) to determine processor to NUMA node mapping. Avoids problems on some machines with high core count where lscpu was showing high cpu utilization. closes https://github.com/official-stockfish/Stockfish/pull/5315 No functional change
This commit is contained in:
parent
86694b5914
commit
c8375c2fbd
3 changed files with 107 additions and 84 deletions
13
src/misc.cpp
13
src/misc.cpp
|
@ -42,12 +42,14 @@ using AdjustTokenPrivileges_t =
|
|||
#endif
|
||||
|
||||
#include <atomic>
|
||||
#include <cctype>
|
||||
#include <cmath>
|
||||
#include <cstdlib>
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <limits>
|
||||
#include <iterator>
|
||||
#include <mutex>
|
||||
#include <sstream>
|
||||
#include <string_view>
|
||||
|
@ -603,6 +605,17 @@ size_t str_to_size_t(const std::string& s) {
|
|||
return static_cast<size_t>(value);
|
||||
}
|
||||
|
||||
std::optional<std::string> read_file_to_string(const std::string& path) {
|
||||
std::ifstream f(path, std::ios_base::binary);
|
||||
if (!f)
|
||||
return std::nullopt;
|
||||
return std::string(std::istreambuf_iterator<char>(f), std::istreambuf_iterator<char>());
|
||||
}
|
||||
|
||||
void remove_whitespace(std::string& s) {
|
||||
s.erase(std::remove_if(s.begin(), s.end(), [](char c) { return std::isspace(c); }), s.end());
|
||||
}
|
||||
|
||||
std::string CommandLine::get_binary_directory(std::string argv0) {
|
||||
std::string pathSeparator;
|
||||
|
||||
|
|
24
src/misc.h
24
src/misc.h
|
@ -88,21 +88,12 @@ struct PipeDeleter {
|
|||
}
|
||||
};
|
||||
|
||||
inline std::optional<std::string> get_system_command_output(const std::string& command) {
|
||||
std::unique_ptr<FILE, PipeDeleter> pipe(popen(command.c_str(), "r"));
|
||||
if (!pipe)
|
||||
return std::nullopt;
|
||||
|
||||
std::string result;
|
||||
char buffer[1024];
|
||||
while (fgets(buffer, sizeof(buffer), pipe.get()) != nullptr)
|
||||
result += buffer;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
// Reads the file as bytes.
|
||||
// Returns std::nullopt if the file does not exist.
|
||||
std::optional<std::string> read_file_to_string(const std::string& path);
|
||||
|
||||
void dbg_hit_on(bool cond, int slot = 0);
|
||||
void dbg_mean_of(int64_t value, int slot = 0);
|
||||
void dbg_stdev_of(int64_t value, int slot = 0);
|
||||
|
@ -118,9 +109,12 @@ inline TimePoint now() {
|
|||
}
|
||||
|
||||
inline std::vector<std::string> split(const std::string& s, const std::string& delimiter) {
|
||||
size_t begin = 0;
|
||||
std::vector<std::string> res;
|
||||
|
||||
if (s.empty())
|
||||
return res;
|
||||
|
||||
size_t begin = 0;
|
||||
for (;;)
|
||||
{
|
||||
const size_t end = s.find(delimiter, begin);
|
||||
|
@ -136,6 +130,8 @@ inline std::vector<std::string> split(const std::string& s, const std::string& d
|
|||
return res;
|
||||
}
|
||||
|
||||
void remove_whitespace(std::string& s);
|
||||
|
||||
enum SyncCout {
|
||||
IO_LOCK,
|
||||
IO_UNLOCK
|
||||
|
|
148
src/numa.h
148
src/numa.h
|
@ -33,9 +33,8 @@
|
|||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
// We support linux very well, but we explicitly do NOT support Android, partially because
|
||||
// there are potential issues with `lscpu`, `popen` availability, and partially because
|
||||
// there's no NUMA environments running Android and there probably won't be.
|
||||
// We support linux very well, but we explicitly do NOT support Android, because there's
|
||||
// no affected systems, not worth maintaining.
|
||||
#if defined(__linux__) && !defined(__ANDROID__)
|
||||
#if !defined(_GNU_SOURCE)
|
||||
#define _GNU_SOURCE
|
||||
|
@ -143,7 +142,9 @@ class NumaConfig {
|
|||
}
|
||||
|
||||
// This function queries the system for the mapping of processors to NUMA nodes.
|
||||
// On Linux we utilize `lscpu` to avoid libnuma.
|
||||
// On Linux we read from standardized kernel sysfs, with a fallback to single NUMA node.
|
||||
// On Windows we utilize GetNumaProcessorNodeEx, which has its quirks, see
|
||||
// comment for Windows implementation of get_process_affinity
|
||||
static NumaConfig from_system([[maybe_unused]] bool respectProcessAffinity = true) {
|
||||
NumaConfig cfg = empty();
|
||||
|
||||
|
@ -160,48 +161,52 @@ class NumaConfig {
|
|||
|
||||
// On Linux things are straightforward, since there's no processor groups and
|
||||
// any thread can be scheduled on all processors.
|
||||
// This command produces output in the following form
|
||||
// CPU NODE
|
||||
// 0 0
|
||||
// 1 0
|
||||
// 2 1
|
||||
// 3 1
|
||||
//
|
||||
// On some systems it may use '-' to signify no NUMA node, in which case we assume it's in node 0.
|
||||
auto lscpuOpt = get_system_command_output("lscpu -e=cpu,node");
|
||||
if (lscpuOpt.has_value())
|
||||
|
||||
// We try to gather this information from the sysfs first
|
||||
// https://www.kernel.org/doc/Documentation/ABI/stable/sysfs-devices-node
|
||||
|
||||
bool useFallback = false;
|
||||
auto fallback = [&]() {
|
||||
useFallback = true;
|
||||
cfg = empty();
|
||||
};
|
||||
|
||||
// /sys/devices/system/node/online contains information about active NUMA nodes
|
||||
auto nodeIdsStr = read_file_to_string("/sys/devices/system/node/online");
|
||||
if (!nodeIdsStr.has_value() || nodeIdsStr->empty())
|
||||
{
|
||||
|
||||
std::istringstream ss(*lscpuOpt);
|
||||
|
||||
// skip the list header
|
||||
ss.ignore(std::numeric_limits<std::streamsize>::max(), '\n');
|
||||
|
||||
while (true)
|
||||
{
|
||||
CpuIndex c;
|
||||
NumaIndex n;
|
||||
|
||||
ss >> c;
|
||||
|
||||
if (!ss)
|
||||
break;
|
||||
|
||||
ss >> n;
|
||||
|
||||
if (!ss)
|
||||
{
|
||||
ss.clear();
|
||||
std::string dummy;
|
||||
ss >> dummy;
|
||||
n = 0;
|
||||
fallback();
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
remove_whitespace(*nodeIdsStr);
|
||||
for (size_t n : indices_from_shortened_string(*nodeIdsStr))
|
||||
{
|
||||
// /sys/devices/system/node/node.../cpulist
|
||||
std::string path =
|
||||
std::string("/sys/devices/system/node/node") + std::to_string(n) + "/cpulist";
|
||||
auto cpuIdsStr = read_file_to_string(path);
|
||||
// Now, we only bail if the file does not exist. Some nodes may be empty, that's fine.
|
||||
// An empty node still has a file that appears to have some whitespace, so we need
|
||||
// to handle that.
|
||||
if (!cpuIdsStr.has_value())
|
||||
{
|
||||
fallback();
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
remove_whitespace(*cpuIdsStr);
|
||||
for (size_t c : indices_from_shortened_string(*cpuIdsStr))
|
||||
{
|
||||
if (is_cpu_allowed(c))
|
||||
cfg.add_cpu_to_node(n, c);
|
||||
}
|
||||
}
|
||||
else
|
||||
}
|
||||
}
|
||||
|
||||
if (useFallback)
|
||||
{
|
||||
for (CpuIndex c = 0; c < SYSTEM_THREADS_NB; ++c)
|
||||
if (is_cpu_allowed(c))
|
||||
|
@ -309,39 +314,18 @@ class NumaConfig {
|
|||
NumaIndex n = 0;
|
||||
for (auto&& nodeStr : split(s, ":"))
|
||||
{
|
||||
bool addedAnyCpuInThisNode = false;
|
||||
|
||||
for (const std::string& cpuStr : split(nodeStr, ","))
|
||||
auto indices = indices_from_shortened_string(nodeStr);
|
||||
if (!indices.empty())
|
||||
{
|
||||
if (cpuStr.empty())
|
||||
continue;
|
||||
|
||||
auto parts = split(cpuStr, "-");
|
||||
if (parts.size() == 1)
|
||||
{
|
||||
const CpuIndex c = CpuIndex{str_to_size_t(parts[0])};
|
||||
if (!cfg.add_cpu_to_node(n, c))
|
||||
std::exit(EXIT_FAILURE);
|
||||
}
|
||||
else if (parts.size() == 2)
|
||||
{
|
||||
const CpuIndex cfirst = CpuIndex{str_to_size_t(parts[0])};
|
||||
const CpuIndex clast = CpuIndex{str_to_size_t(parts[1])};
|
||||
|
||||
if (!cfg.add_cpu_range_to_node(n, cfirst, clast))
|
||||
std::exit(EXIT_FAILURE);
|
||||
}
|
||||
else
|
||||
for (auto idx : indices)
|
||||
{
|
||||
if (!cfg.add_cpu_to_node(n, CpuIndex(idx)))
|
||||
std::exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
addedAnyCpuInThisNode = true;
|
||||
}
|
||||
|
||||
if (addedAnyCpuInThisNode)
|
||||
n += 1;
|
||||
}
|
||||
}
|
||||
|
||||
cfg.customAffinity = true;
|
||||
|
||||
|
@ -675,7 +659,6 @@ class NumaConfig {
|
|||
return true;
|
||||
}
|
||||
|
||||
|
||||
#if defined(__linux__) && !defined(__ANDROID__)
|
||||
|
||||
static std::set<CpuIndex> get_process_affinity() {
|
||||
|
@ -807,6 +790,37 @@ class NumaConfig {
|
|||
}
|
||||
|
||||
#endif
|
||||
|
||||
static std::vector<size_t> indices_from_shortened_string(const std::string& s) {
|
||||
std::vector<size_t> indices;
|
||||
|
||||
if (s.empty())
|
||||
return indices;
|
||||
|
||||
for (const std::string& ss : split(s, ","))
|
||||
{
|
||||
if (ss.empty())
|
||||
continue;
|
||||
|
||||
auto parts = split(ss, "-");
|
||||
if (parts.size() == 1)
|
||||
{
|
||||
const CpuIndex c = CpuIndex{str_to_size_t(parts[0])};
|
||||
indices.emplace_back(c);
|
||||
}
|
||||
else if (parts.size() == 2)
|
||||
{
|
||||
const CpuIndex cfirst = CpuIndex{str_to_size_t(parts[0])};
|
||||
const CpuIndex clast = CpuIndex{str_to_size_t(parts[1])};
|
||||
for (size_t c = cfirst; c <= clast; ++c)
|
||||
{
|
||||
indices.emplace_back(c);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return indices;
|
||||
}
|
||||
};
|
||||
|
||||
class NumaReplicationContext;
|
||||
|
|
Loading…
Add table
Reference in a new issue