1
0
Fork 0
mirror of https://github.com/sockspls/badfish synced 2025-04-29 16:23:09 +00:00

Clean up comments in code

- Capitalize comments
- Reformat multi-lines comments to equalize the widths of the lines
- Try to keep the width of comments around 85 characters
- Remove periods at the end of single-line comments

closes https://github.com/official-stockfish/Stockfish/pull/5469

No functional change
This commit is contained in:
Stéphane Nicolet 2024-07-10 14:51:48 +02:00 committed by Joost VandeVondele
parent 98a7bb4436
commit 7e72b37e4c
12 changed files with 356 additions and 311 deletions

View file

@ -49,7 +49,7 @@ class Engine {
Engine(std::string path = "");
// Can't be movable due to components holding backreferences to fields
// Cannot be movable due to components holding backreferences to fields
Engine(const Engine&) = delete;
Engine(Engine&&) = delete;
Engine& operator=(const Engine&) = delete;

View file

@ -49,10 +49,12 @@
#include <iostream> // std::cerr
#include <ostream> // std::endl
#include <windows.h>
// The needed Windows API for processor groups could be missed from old Windows
// versions, so instead of calling them directly (forcing the linker to resolve
// the calls at compile time), try to load them at runtime. To do this we need
// first to define the corresponding function pointers.
extern "C" {
using OpenProcessToken_t = bool (*)(HANDLE, DWORD, PHANDLE);
using LookupPrivilegeValueA_t = bool (*)(LPCSTR, LPCSTR, PLUID);
@ -64,9 +66,10 @@ using AdjustTokenPrivileges_t =
namespace Stockfish {
// Wrapper for systems where the c++17 implementation
// does not guarantee the availability of aligned_alloc(). Memory allocated with
// std_aligned_alloc() must be freed with std_aligned_free().
// Wrappers for systems where the c++17 implementation does not guarantee the
// availability of aligned_alloc(). Memory allocated with std_aligned_alloc()
// must be freed with std_aligned_free().
void* std_aligned_alloc(size_t alignment, size_t size) {
#if defined(_ISOC11_SOURCE)
return aligned_alloc(alignment, size);
@ -96,7 +99,8 @@ void std_aligned_free(void* ptr) {
#endif
}
// aligned_large_pages_alloc() will return suitably aligned memory, if possible using large pages.
// aligned_large_pages_alloc() will return suitably aligned memory,
// if possible using large pages.
#if defined(_WIN32)
@ -135,6 +139,7 @@ static void* aligned_large_pages_alloc_windows([[maybe_unused]] size_t allocSize
return nullptr;
// We need SeLockMemoryPrivilege, so try to enable it for the process
if (!OpenProcessToken_f( // OpenProcessToken()
GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hProcessToken))
return nullptr;
@ -149,8 +154,10 @@ static void* aligned_large_pages_alloc_windows([[maybe_unused]] size_t allocSize
tp.Privileges[0].Luid = luid;
tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
// Try to enable SeLockMemoryPrivilege. Note that even if AdjustTokenPrivileges() succeeds,
// we still need to query GetLastError() to ensure that the privileges were actually obtained.
// Try to enable SeLockMemoryPrivilege. Note that even if AdjustTokenPrivileges()
// succeeds, we still need to query GetLastError() to ensure that the privileges
// were actually obtained.
if (AdjustTokenPrivileges_f(hProcessToken, FALSE, &tp, sizeof(TOKEN_PRIVILEGES), &prevTp,
&prevTpLen)
&& GetLastError() == ERROR_SUCCESS)
@ -189,9 +196,9 @@ void* aligned_large_pages_alloc(size_t allocSize) {
void* aligned_large_pages_alloc(size_t allocSize) {
#if defined(__linux__)
constexpr size_t alignment = 2 * 1024 * 1024; // assumed 2MB page size
constexpr size_t alignment = 2 * 1024 * 1024; // 2MB page size assumed
#else
constexpr size_t alignment = 4096; // assumed small page size
constexpr size_t alignment = 4096; // small page size assumed
#endif
// Round up to multiples of alignment
@ -206,7 +213,8 @@ void* aligned_large_pages_alloc(size_t allocSize) {
#endif
// aligned_large_pages_free() will free the previously allocated ttmem
// aligned_large_pages_free() will free the previously memory allocated
// by aligned_large_pages_alloc(). The effect is a nop if mem == nullptr.
#if defined(_WIN32)

View file

@ -33,13 +33,13 @@ namespace Stockfish {
void* std_aligned_alloc(size_t alignment, size_t size);
void std_aligned_free(void* ptr);
// memory aligned by page size, min alignment: 4096 bytes
void* aligned_large_pages_alloc(size_t size);
// nop if mem == nullptr
void aligned_large_pages_free(void* mem);
// frees memory which was placed there with placement new.
// works for both single objects and arrays of unknown bound
// Memory aligned by page size, min alignment: 4096 bytes
void* aligned_large_pages_alloc(size_t size);
void aligned_large_pages_free(void* mem);
// Frees memory which was placed there with placement new.
// Works for both single objects and arrays of unknown bound.
template<typename T, typename FREE_FUNC>
void memory_deleter(T* ptr, FREE_FUNC free_func) {
if (!ptr)
@ -53,15 +53,15 @@ void memory_deleter(T* ptr, FREE_FUNC free_func) {
return;
}
// frees memory which was placed there with placement new.
// works for both single objects and arrays of unknown bound
// Frees memory which was placed there with placement new.
// Works for both single objects and arrays of unknown bound.
template<typename T, typename FREE_FUNC>
void memory_deleter_array(T* ptr, FREE_FUNC free_func) {
if (!ptr)
return;
// Move back on the pointer to where the size is allocated.
// Move back on the pointer to where the size is allocated
const size_t array_offset = std::max(sizeof(size_t), alignof(T));
char* raw_memory = reinterpret_cast<char*>(ptr) - array_offset;
@ -77,7 +77,7 @@ void memory_deleter_array(T* ptr, FREE_FUNC free_func) {
free_func(raw_memory);
}
// Allocates memory for a single object and places it there with placement new.
// Allocates memory for a single object and places it there with placement new
template<typename T, typename ALLOC_FUNC, typename... Args>
inline std::enable_if_t<!std::is_array_v<T>, T*> memory_allocator(ALLOC_FUNC alloc_func,
Args&&... args) {
@ -86,7 +86,7 @@ inline std::enable_if_t<!std::is_array_v<T>, T*> memory_allocator(ALLOC_FUNC all
return new (raw_memory) T(std::forward<Args>(args)...);
}
// Allocates memory for an array of unknown bound and places it there with placement new.
// Allocates memory for an array of unknown bound and places it there with placement new
template<typename T, typename ALLOC_FUNC>
inline std::enable_if_t<std::is_array_v<T>, std::remove_extent_t<T>*>
memory_allocator(ALLOC_FUNC alloc_func, size_t num) {
@ -94,7 +94,7 @@ memory_allocator(ALLOC_FUNC alloc_func, size_t num) {
const size_t array_offset = std::max(sizeof(size_t), alignof(ElementType));
// save the array size in the memory location
// Save the array size in the memory location
char* raw_memory =
reinterpret_cast<char*>(alloc_func(array_offset + num * sizeof(ElementType)));
ASSERT_ALIGNED(raw_memory, alignof(T));
@ -104,7 +104,8 @@ memory_allocator(ALLOC_FUNC alloc_func, size_t num) {
for (size_t i = 0; i < num; ++i)
new (raw_memory + array_offset + i * sizeof(ElementType)) ElementType();
// Need to return the pointer at the start of the array so that the indexing in unique_ptr<T[]> works
// Need to return the pointer at the start of the array so that
// the indexing in unique_ptr<T[]> works.
return reinterpret_cast<ElementType*>(raw_memory + array_offset);
}

View file

@ -112,14 +112,16 @@ class Logger {
// Returns the full name of the current Stockfish version.
// For local dev compiles we try to append the commit sha and commit date
// from git if that fails only the local compilation date is set and "nogit" is specified:
// Stockfish dev-YYYYMMDD-SHA
// or
// Stockfish dev-YYYYMMDD-nogit
//
// For local dev compiles we try to append the commit SHA and
// commit date from git. If that fails only the local compilation
// date is set and "nogit" is specified:
// Stockfish dev-YYYYMMDD-SHA
// or
// Stockfish dev-YYYYMMDD-nogit
//
// For releases (non-dev builds) we only include the version number:
// Stockfish version
// Stockfish version
std::string engine_info(bool to_uci) {
std::stringstream ss;
ss << "Stockfish " << version << std::setfill('0');
@ -131,8 +133,9 @@ std::string engine_info(bool to_uci) {
ss << stringify(GIT_DATE);
#else
constexpr std::string_view months("Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec");
std::string month, day, year;
std::stringstream date(__DATE__); // From compiler, format is "Sep 21 2008"
std::string month, day, year;
std::stringstream date(__DATE__); // From compiler, format is "Sep 21 2008"
date >> month >> day >> year;
ss << year << std::setw(2) << std::setfill('0') << (1 + months.find(month) / 4)

View file

@ -59,8 +59,8 @@ enum Stages {
QCHECK
};
// Sort moves in descending order up to and including
// a given limit. The order of moves smaller than the limit is left unspecified.
// Sort moves in descending order up to and including a given limit.
// The order of moves smaller than the limit is left unspecified.
void partial_insertion_sort(ExtMove* begin, ExtMove* end, int limit) {
for (ExtMove *sortedEnd = begin, *p = begin + 1; p < end; ++p)
@ -125,8 +125,8 @@ MovePicker::MovePicker(const Position& p,
stage = (pos.checkers() ? EVASION_TT : QSEARCH_TT) + !(ttm && pos.pseudo_legal(ttm));
}
// Constructor for ProbCut: we generate captures with SEE greater
// than or equal to the given threshold.
// Constructor for ProbCut: we generate captures with SEE greater than or equal
// to the given threshold.
MovePicker::MovePicker(const Position& p, Move ttm, int th, const CapturePieceToHistory* cph) :
pos(p),
captureHistory(cph),

View file

@ -354,13 +354,18 @@ class FeatureTransformer {
for (IndexType j = 0; j < NumOutputChunks; ++j)
{
// What we want to do is multiply inputs in a pairwise manner (after clipping), and then shift right by 9.
// Instead, we shift left by 7, and use mulhi, stripping the bottom 16 bits, effectively shifting right by 16,
// resulting in a net shift of 9 bits. We use mulhi because it maintains the sign of the multiplication (unlike mullo),
// allowing us to make use of packus to clip 2 of the inputs, resulting in a save of 2 "vec_max_16" calls.
// A special case is when we use NEON, where we shift left by 6 instead, because the instruction "vqdmulhq_s16"
// also doubles the return value after the multiplication, adding an extra shift to the left by 1, so we
// compensate by shifting less before the multiplication.
// What we want to do is multiply inputs in a pairwise manner
// (after clipping), and then shift right by 9. Instead, we
// shift left by 7, and use mulhi, stripping the bottom 16 bits,
// effectively shifting right by 16, resulting in a net shift
// of 9 bits. We use mulhi because it maintains the sign of
// the multiplication (unlike mullo), allowing us to make use
// of packus to clip 2 of the inputs, resulting in a save of 2
// "vec_max_16" calls. A special case is when we use NEON,
// where we shift left by 6 instead, because the instruction
// "vqdmulhq_s16" also doubles the return value after the
// multiplication, adding an extra shift to the left by 1, so
// we compensate by shifting less before the multiplication.
#if defined(USE_SSE2)
constexpr int shift = 7;
@ -426,10 +431,10 @@ class FeatureTransformer {
}
// NOTE: The parameter states_to_update is an array of position states.
// All states must be sequential, that is states_to_update[i] must either be reachable
// by repeatedly applying ->previous from states_to_update[i+1].
// computed_st must be reachable by repeatedly applying ->previous on
// states_to_update[0].
// All states must be sequential, that is states_to_update[i] must
// either be reachable by repeatedly applying ->previous from
// states_to_update[i+1], and computed_st must be reachable by
// repeatedly applying ->previous on states_to_update[0].
template<Color Perspective, size_t N>
void update_accumulator_incremental(const Position& pos,
StateInfo* computed_st,
@ -446,7 +451,7 @@ class FeatureTransformer {
#ifdef VECTOR
// Gcc-10.2 unnecessarily spills AVX2 registers if this array
// is defined in the VECTOR code below, once in each branch
// is defined in the VECTOR code below, once in each branch.
vec_t acc[NumRegs];
psqt_vec_t psqt[NumPsqtRegs];
#endif
@ -474,7 +479,8 @@ class FeatureTransformer {
StateInfo* st = computed_st;
// Now update the accumulators listed in states_to_update[], where the last element is a sentinel.
// Now update the accumulators listed in states_to_update[],
// where the last element is a sentinel.
#ifdef VECTOR
if (N == 1 && (removed[0].size() == 1 || removed[0].size() == 2) && added[0].size() == 1)
@ -794,7 +800,7 @@ class FeatureTransformer {
}
// The accumulator of the refresh entry has been updated.
// Now copy its content to the actual accumulator we were refreshing
// Now copy its content to the actual accumulator we were refreshing.
std::memcpy(accumulator.accumulation[Perspective], entry.accumulation,
sizeof(BiasType) * HalfDimensions);
@ -827,7 +833,7 @@ class FeatureTransformer {
if ((oldest_st->*accPtr).computed[Perspective])
{
// Only update current position accumulator to minimize work.
// Only update current position accumulator to minimize work
StateInfo* states_to_update[1] = {pos.state()};
update_accumulator_incremental<Perspective, 1>(pos, oldest_st, states_to_update);
}
@ -846,8 +852,8 @@ class FeatureTransformer {
if (next == nullptr)
return;
// Now update the accumulators listed in states_to_update[], where the last element is a sentinel.
// Currently we update 2 accumulators.
// Now update the accumulators listed in states_to_update[], where
// the last element is a sentinel. Currently we update two accumulators:
// 1. for the current position
// 2. the next accumulator after the computed one
// The heuristic may change in the future.

View file

@ -37,8 +37,8 @@
#include "memory.h"
// We support linux very well, but we explicitly do NOT support Android, because there's
// no affected systems, not worth maintaining.
// We support linux very well, but we explicitly do NOT support Android,
// because there is no affected systems, not worth maintaining.
#if defined(__linux__) && !defined(__ANDROID__)
#if !defined(_GNU_SOURCE)
#define _GNU_SOURCE
@ -81,9 +81,9 @@ using NumaIndex = size_t;
inline CpuIndex get_hardware_concurrency() {
CpuIndex concurrency = std::thread::hardware_concurrency();
// Get all processors across all processor groups on windows, since ::hardware_concurrency
// only returns the number of processors in the first group, because only these
// are available to std::thread.
// Get all processors across all processor groups on windows, since
// hardware_concurrency() only returns the number of processors in
// the first group, because only these are available to std::thread.
#ifdef _WIN64
concurrency = std::max<CpuIndex>(concurrency, GetActiveProcessorCount(ALL_PROCESSOR_GROUPS));
#endif
@ -101,7 +101,7 @@ struct WindowsAffinity {
// We also provide diagnostic for when the affinity is set to nullopt
// whether it was due to being indeterminate. If affinity is indeterminate
// it's best to assume it is not set at all, so consistent with the meaning
// it is best to assume it is not set at all, so consistent with the meaning
// of the nullopt affinity.
bool isNewDeterminate = true;
bool isOldDeterminate = true;
@ -119,23 +119,25 @@ struct WindowsAffinity {
}
// Since Windows 11 and Windows Server 2022 thread affinities can span
// processor groups and can be set as such by a new WinAPI function.
// However, we may need to force using the old API if we detect
// that the process has affinity set by the old API already and we want to override that.
// Due to the limitations of the old API we can't detect its use reliably.
// There will be cases where we detect not use but it has actually been used and vice versa.
// processor groups and can be set as such by a new WinAPI function. However,
// we may need to force using the old API if we detect that the process has
// affinity set by the old API already and we want to override that. Due to the
// limitations of the old API we cannot detect its use reliably. There will be
// cases where we detect not use but it has actually been used and vice versa.
bool likely_used_old_api() const { return oldApi.has_value() || !isOldDeterminate; }
};
inline std::pair<BOOL, std::vector<USHORT>> get_process_group_affinity() {
// GetProcessGroupAffinity requires the GroupArray argument to be
// aligned to 4 bytes instead of just 2.
static constexpr size_t GroupArrayMinimumAlignment = 4;
static_assert(GroupArrayMinimumAlignment >= alignof(USHORT));
// The function should succeed the second time, but it may fail if the group
// affinity has changed between GetProcessGroupAffinity calls.
// In such case we consider this a hard error, as we can't work with unstable affinities
// affinity has changed between GetProcessGroupAffinity calls. In such case
// we consider this a hard error, as we Cannot work with unstable affinities
// anyway.
static constexpr int MAX_TRIES = 2;
USHORT GroupCount = 1;
@ -165,10 +167,10 @@ inline std::pair<BOOL, std::vector<USHORT>> get_process_group_affinity() {
}
// On Windows there are two ways to set affinity, and therefore 2 ways to get it.
// These are not consistent, so we have to check both.
// In some cases it is actually not possible to determine affinity.
// For example when two different threads have affinity on different processor groups,
// set using SetThreadAffinityMask, we can't retrieve the actual affinities.
// These are not consistent, so we have to check both. In some cases it is actually
// not possible to determine affinity. For example when two different threads have
// affinity on different processor groups, set using SetThreadAffinityMask, we cannot
// retrieve the actual affinities.
// From documentation on GetProcessAffinityMask:
// > If the calling process contains threads in multiple groups,
// > the function returns zero for both affinity masks.
@ -196,8 +198,8 @@ inline WindowsAffinity get_process_affinity() {
}
else if (RequiredMaskCount > 0)
{
// If RequiredMaskCount then these affinities were never set, but it's not consistent
// so GetProcessAffinityMask may still return some affinity.
// If RequiredMaskCount then these affinities were never set, but it's
// not consistent so GetProcessAffinityMask may still return some affinity.
auto groupAffinities = std::make_unique<GROUP_AFFINITY[]>(RequiredMaskCount);
status = GetThreadSelectedCpuSetMasks_f(GetCurrentThread(), groupAffinities.get(),
@ -233,7 +235,7 @@ inline WindowsAffinity get_process_affinity() {
DWORD_PTR proc, sys;
status = GetProcessAffinityMask(GetCurrentProcess(), &proc, &sys);
// If proc == 0 then we can't determine affinity because it spans processor groups.
// If proc == 0 then we cannot determine affinity because it spans processor groups.
// On Windows 11 and Server 2022 it will instead
// > If, however, hHandle specifies a handle to the current process, the function
// > always uses the calling thread's primary group (which by default is the same
@ -246,10 +248,12 @@ inline WindowsAffinity get_process_affinity() {
return affinity;
}
// If SetProcessAffinityMask was never called the affinity
// must span all processor groups, but if it was called it must only span one.
// If SetProcessAffinityMask was never called the affinity must span
// all processor groups, but if it was called it must only span one.
std::vector<USHORT> groupAffinity; // We need to capture this later and capturing
// from structured bindings requires c++20.
std::tie(status, groupAffinity) = get_process_group_affinity();
if (status == 0)
{
@ -282,11 +286,12 @@ inline WindowsAffinity get_process_affinity() {
// If we got here it means that either SetProcessAffinityMask was never set
// or we're on Windows 11/Server 2022.
// Since Windows 11 and Windows Server 2022 the behaviour of GetProcessAffinityMask changed
// > If, however, hHandle specifies a handle to the current process, the function
// > always uses the calling thread's primary group (which by default is the same
// > as the process' primary group) in order to set the
// > lpProcessAffinityMask and lpSystemAffinityMask.
// Since Windows 11 and Windows Server 2022 the behaviour of
// GetProcessAffinityMask changed:
// > If, however, hHandle specifies a handle to the current process,
// > the function always uses the calling thread's primary group
// > (which by default is the same as the process' primary group)
// > in order to set the lpProcessAffinityMask and lpSystemAffinityMask.
// In which case we can actually retrieve the full affinity.
if (GetThreadSelectedCpuSetMasks_f != nullptr)
@ -300,9 +305,11 @@ inline WindowsAffinity get_process_affinity() {
const int numActiveProcessors =
GetActiveProcessorCount(static_cast<WORD>(procGroupIndex));
// We have to schedule to 2 different processors and & the affinities we get.
// Otherwise our processor choice could influence the resulting affinity.
// We assume the processor IDs within the group are filled sequentially from 0.
// We have to schedule to two different processors
// and & the affinities we get. Otherwise our processor
// choice could influence the resulting affinity.
// We assume the processor IDs within the group are
// filled sequentially from 0.
uint64_t procCombined = std::numeric_limits<uint64_t>::max();
uint64_t sysCombined = std::numeric_limits<uint64_t>::max();
@ -346,8 +353,9 @@ inline WindowsAffinity get_process_affinity() {
}
}
// We have to detect the case where the affinity was not set, or is set to all processors
// so that we correctly produce as std::nullopt result.
// We have to detect the case where the affinity was not set,
// or is set to all processors so that we correctly produce as
// std::nullopt result.
if (!isAffinityFull)
{
affinity.oldApi = std::move(cpus);
@ -369,16 +377,16 @@ inline std::set<CpuIndex> get_process_affinity() {
std::set<CpuIndex> cpus;
// For unsupported systems, or in case of a soft error, we may assume all processors
// are available for use.
// For unsupported systems, or in case of a soft error, we may assume
// all processors are available for use.
[[maybe_unused]] auto set_to_all_cpus = [&]() {
for (CpuIndex c = 0; c < SYSTEM_THREADS_NB; ++c)
cpus.insert(c);
};
// cpu_set_t by default holds 1024 entries. This may not be enough soon,
// but there is no easy way to determine how many threads there actually is.
// In this case we just choose a reasonable upper bound.
// but there is no easy way to determine how many threads there actually
// is. In this case we just choose a reasonable upper bound.
static constexpr CpuIndex MaxNumCpus = 1024 * 64;
cpu_set_t* mask = CPU_ALLOC(MaxNumCpus);
@ -437,19 +445,19 @@ class NumaReplicatedAccessToken {
NumaIndex n;
};
// Designed as immutable, because there is no good reason to alter an already existing config
// in a way that doesn't require recreating it completely, and it would be complex and expensive
// to maintain class invariants.
// The CPU (processor) numbers always correspond to the actual numbering used by the system.
// The NUMA node numbers MAY NOT correspond to the system's numbering of the NUMA nodes.
// In particular, empty nodes may be removed, or the user may create custom nodes.
// It is guaranteed that NUMA nodes are NOT empty, i.e. every node exposed by NumaConfig
// has at least one processor assigned.
// Designed as immutable, because there is no good reason to alter an already
// existing config in a way that doesn't require recreating it completely, and
// it would be complex and expensive to maintain class invariants.
// The CPU (processor) numbers always correspond to the actual numbering used
// by the system. The NUMA node numbers MAY NOT correspond to the system's
// numbering of the NUMA nodes. In particular, empty nodes may be removed, or
// the user may create custom nodes. It is guaranteed that NUMA nodes are NOT
// empty: every node exposed by NumaConfig has at least one processor assigned.
//
// We use startup affinities so as not to modify its own behaviour in time.
//
// Until Stockfish doesn't support exceptions all places where an exception should be thrown
// are replaced by std::exit.
// Since Stockfish doesn't support exceptions all places where an exception
// should be thrown are replaced by std::exit.
class NumaConfig {
public:
NumaConfig() :
@ -460,9 +468,9 @@ class NumaConfig {
}
// This function queries the system for the mapping of processors to NUMA nodes.
// On Linux we read from standardized kernel sysfs, with a fallback to single NUMA node.
// On Windows we utilize GetNumaProcessorNodeEx, which has its quirks, see
// comment for Windows implementation of get_process_affinity
// On Linux we read from standardized kernel sysfs, with a fallback to single NUMA
// node. On Windows we utilize GetNumaProcessorNodeEx, which has its quirks, see
// comment for Windows implementation of get_process_affinity.
static NumaConfig from_system([[maybe_unused]] bool respectProcessAffinity = true) {
NumaConfig cfg = empty();
@ -479,7 +487,6 @@ class NumaConfig {
// On Linux things are straightforward, since there's no processor groups and
// any thread can be scheduled on all processors.
// We try to gather this information from the sysfs first
// https://www.kernel.org/doc/Documentation/ABI/stable/sysfs-devices-node
@ -504,9 +511,9 @@ class NumaConfig {
std::string path =
std::string("/sys/devices/system/node/node") + std::to_string(n) + "/cpulist";
auto cpuIdsStr = read_file_to_string(path);
// Now, we only bail if the file does not exist. Some nodes may be empty, that's fine.
// An empty node still has a file that appears to have some whitespace, so we need
// to handle that.
// Now, we only bail if the file does not exist. Some nodes may be
// empty, that's fine. An empty node still has a file that appears
// to have some whitespace, so we need to handle that.
if (!cpuIdsStr.has_value())
{
fallback();
@ -538,9 +545,10 @@ class NumaConfig {
if (respectProcessAffinity)
allowedCpus = STARTUP_PROCESSOR_AFFINITY.get_combined();
// The affinity can't be determined in all cases on Windows, but we at least guarantee
// that the number of allowed processors is >= number of processors in the affinity mask.
// In case the user is not satisfied they must set the processor numbers explicitly.
// The affinity cannot be determined in all cases on Windows,
// but we at least guarantee that the number of allowed processors
// is >= number of processors in the affinity mask. In case the user
// is not satisfied they must set the processor numbers explicitly.
auto is_cpu_allowed = [&allowedCpus](CpuIndex c) {
return !allowedCpus.has_value() || allowedCpus->count(c) == 1;
};
@ -711,21 +719,22 @@ class NumaConfig {
}
bool suggests_binding_threads(CpuIndex numThreads) const {
// If we can reasonably determine that the threads can't be contained
// If we can reasonably determine that the threads cannot be contained
// by the OS within the first NUMA node then we advise distributing
// and binding threads. When the threads are not bound we can only use
// NUMA memory replicated objects from the first node, so when the OS
// has to schedule on other nodes we lose performance.
// We also suggest binding if there's enough threads to distribute among nodes
// with minimal disparity.
// We try to ignore small nodes, in particular the empty ones.
// has to schedule on other nodes we lose performance. We also suggest
// binding if there's enough threads to distribute among nodes with minimal
// disparity. We try to ignore small nodes, in particular the empty ones.
// If the affinity set by the user does not match the affinity given by the OS
// then binding is necessary to ensure the threads are running on correct processors.
// If the affinity set by the user does not match the affinity given by
// the OS then binding is necessary to ensure the threads are running on
// correct processors.
if (customAffinity)
return true;
// We obviously can't distribute a single thread, so a single thread should never be bound.
// We obviously cannot distribute a single thread, so a single thread
// should never be bound.
if (numThreads <= 1)
return false;
@ -754,8 +763,8 @@ class NumaConfig {
if (nodes.size() == 1)
{
// special case for when there's no NUMA nodes
// doesn't buy us much, but let's keep the default path simple
// Special case for when there's no NUMA nodes. This doesn't buy us
// much, but let's keep the default path simple.
ns.resize(numThreads, NumaIndex{0});
}
else
@ -769,9 +778,11 @@ class NumaConfig {
{
float fill =
static_cast<float>(occupation[n] + 1) / static_cast<float>(nodes[n].size());
// NOTE: Do we want to perhaps fill the first available node up to 50% first before considering other nodes?
// Probably not, because it would interfere with running multiple instances. We basically shouldn't
// favor any particular node.
// NOTE: Do we want to perhaps fill the first available node
// up to 50% first before considering other nodes?
// Probably not, because it would interfere with running
// multiple instances. We basically shouldn't favor any
// particular node.
if (fill < bestNodeFill)
{
bestNode = n;
@ -816,18 +827,19 @@ class NumaConfig {
#elif defined(_WIN64)
// Requires Windows 11. No good way to set thread affinity spanning processor groups before that.
// Requires Windows 11. No good way to set thread affinity spanning
// processor groups before that.
HMODULE k32 = GetModuleHandle(TEXT("Kernel32.dll"));
auto SetThreadSelectedCpuSetMasks_f = SetThreadSelectedCpuSetMasks_t(
(void (*)()) GetProcAddress(k32, "SetThreadSelectedCpuSetMasks"));
// We ALWAYS set affinity with the new API if available,
// because there's no downsides, and we forcibly keep it consistent
// with the old API should we need to use it. I.e. we always keep this as a superset
// of what we set with SetThreadGroupAffinity.
// We ALWAYS set affinity with the new API if available, because
// there's no downsides, and we forcibly keep it consistent with
// the old API should we need to use it. I.e. we always keep this
// as a superset of what we set with SetThreadGroupAffinity.
if (SetThreadSelectedCpuSetMasks_f != nullptr)
{
// Only available on Windows 11 and Windows Server 2022 onwards.
// Only available on Windows 11 and Windows Server 2022 onwards
const USHORT numProcGroups = USHORT(
((highestCpuIndex + 1) + WIN_PROCESSOR_GROUP_SIZE - 1) / WIN_PROCESSOR_GROUP_SIZE);
auto groupAffinities = std::make_unique<GROUP_AFFINITY[]>(numProcGroups);
@ -857,22 +869,25 @@ class NumaConfig {
// Sometimes we need to force the old API, but do not use it unless necessary.
if (SetThreadSelectedCpuSetMasks_f == nullptr || STARTUP_USE_OLD_AFFINITY_API)
{
// On earlier windows version (since windows 7) we can't run a single thread
// On earlier windows version (since windows 7) we cannot run a single thread
// on multiple processor groups, so we need to restrict the group.
// We assume the group of the first processor listed for this node.
// Processors from outside this group will not be assigned for this thread.
// Normally this won't be an issue because windows used to assign NUMA nodes
// such that they can't span processor groups. However, since Windows 10 Build 20348
// the behaviour changed, so there's a small window of versions between this and Windows 11
// that might exhibit problems with not all processors being utilized.
// We handle this in NumaConfig::from_system by manually splitting the nodes when
// we detect that there's no function to set affinity spanning processor nodes.
// This is required because otherwise our thread distribution code may produce
// suboptimal results.
// such that they cannot span processor groups. However, since Windows 10
// Build 20348 the behaviour changed, so there's a small window of versions
// between this and Windows 11 that might exhibit problems with not all
// processors being utilized.
//
// We handle this in NumaConfig::from_system by manually splitting the
// nodes when we detect that there is no function to set affinity spanning
// processor nodes. This is required because otherwise our thread distribution
// code may produce suboptimal results.
//
// See https://learn.microsoft.com/en-us/windows/win32/procthread/numa-support
GROUP_AFFINITY affinity;
std::memset(&affinity, 0, sizeof(GROUP_AFFINITY));
// We use an ordered set so we're guaranteed to get the smallest cpu number here.
// We use an ordered set to be sure to get the smallest cpu number here.
const size_t forcedProcGroupIndex = *(nodes[n].begin()) / WIN_PROCESSOR_GROUP_SIZE;
affinity.Group = static_cast<WORD>(forcedProcGroupIndex);
for (CpuIndex c : nodes[n])
@ -894,8 +909,8 @@ class NumaConfig {
if (status == 0)
std::exit(EXIT_FAILURE);
// We yield this thread just to be sure it gets rescheduled.
// This is defensive, allowed because this code is not performance critical.
// We yield this thread just to be sure it gets rescheduled. This is
// defensive, allowed because this code is not performance critical.
SwitchToThread();
}
@ -1013,8 +1028,8 @@ class NumaConfig {
class NumaReplicationContext;
// Instances of this class are tracked by the NumaReplicationContext instance
// NumaReplicationContext informs all tracked instances whenever NUMA configuration changes.
// Instances of this class are tracked by the NumaReplicationContext instance.
// NumaReplicationContext informs all tracked instances when NUMA configuration changes.
class NumaReplicatedBase {
public:
NumaReplicatedBase(NumaReplicationContext& ctx);
@ -1034,9 +1049,9 @@ class NumaReplicatedBase {
NumaReplicationContext* context;
};
// We force boxing with a unique_ptr. If this becomes an issue due to added indirection we
// may need to add an option for a custom boxing type.
// When the NUMA config changes the value stored at the index 0 is replicated to other nodes.
// We force boxing with a unique_ptr. If this becomes an issue due to added
// indirection we may need to add an option for a custom boxing type. When the
// NUMA config changes the value stored at the index 0 is replicated to other nodes.
template<typename T>
class NumaReplicated: public NumaReplicatedBase {
public:
@ -1090,8 +1105,8 @@ class NumaReplicated: public NumaReplicatedBase {
}
void on_numa_config_changed() override {
// Use the first one as the source. It doesn't matter which one we use, because they all must
// be identical, but the first one is guaranteed to exist.
// Use the first one as the source. It doesn't matter which one we use,
// because they all must be identical, but the first one is guaranteed to exist.
auto source = std::move(instances[0]);
replicate_from(std::move(*source));
}
@ -1167,7 +1182,7 @@ class NumaReplicationContext {
private:
NumaConfig config;
// std::set uses std::less by default, which is required for pointer comparison to be defined.
// std::set uses std::less by default, which is required for pointer comparison
std::set<NumaReplicatedBase*> trackedReplicatedObjects;
};

View file

@ -315,8 +315,8 @@ inline bool Position::capture(Move m) const {
}
// Returns true if a move is generated from the capture stage, having also
// queen promotions covered, i.e. consistency with the capture stage move generation
// is needed to avoid the generation of duplicate moves.
// queen promotions covered, i.e. consistency with the capture stage move
// generation is needed to avoid the generation of duplicate moves.
inline bool Position::capture_stage(Move m) const {
assert(m.is_ok());
return capture(m) || m.promotion_type() == QUEEN;

View file

@ -78,7 +78,8 @@ constexpr int futility_move_count(bool improving, Depth depth) {
return improving ? (3 + depth * depth) : (3 + depth * depth) / 2;
}
// Add correctionHistory value to raw staticEval and guarantee evaluation does not hit the tablebase range
// Add correctionHistory value to raw staticEval and guarantee evaluation
// does not hit the tablebase range.
Value to_corrected_static_eval(Value v, const Worker& w, const Position& pos) {
auto cv = w.correctionHistory[pos.side_to_move()][pawn_structure_index<Correction>(pos)];
v += cv * std::abs(cv) / 5073;
@ -333,8 +334,8 @@ void Search::Worker::iterative_deepening() {
int failedHighCnt = 0;
while (true)
{
// Adjust the effective depth searched, but ensure at least one effective increment
// for every four searchAgain steps (see issue #2717).
// Adjust the effective depth searched, but ensure at least one
// effective increment for every four searchAgain steps (see issue #2717).
Depth adjustedDepth =
std::max(1, rootDepth - failedHighCnt - 3 * (searchAgainCounter + 1) / 4);
rootDelta = beta - alpha;
@ -354,15 +355,15 @@ void Search::Worker::iterative_deepening() {
if (threads.stop)
break;
// When failing high/low give some update before a re-search.
// To avoid excessive output that could hang GUIs like Fritz 19, only start
// When failing high/low give some update before a re-search. To avoid
// excessive output that could hang GUIs like Fritz 19, only start
// at nodes > 10M (rather than depth N, which can be reached quickly)
if (mainThread && multiPV == 1 && (bestValue <= alpha || bestValue >= beta)
&& nodes > 10000000)
main_manager()->pv(*this, threads, tt, rootDepth);
// In case of failing low/high increase aspiration window and
// re-search, otherwise exit the loop.
// In case of failing low/high increase aspiration window and re-search,
// otherwise exit the loop.
if (bestValue <= alpha)
{
beta = (alpha + beta) / 2;
@ -390,10 +391,11 @@ void Search::Worker::iterative_deepening() {
if (mainThread
&& (threads.stop || pvIdx + 1 == multiPV || nodes > 10000000)
// A thread that aborted search can have mated-in/TB-loss PV and score
// that cannot be trusted, i.e. it can be delayed or refuted if we would have
// had time to fully search other root-moves. Thus we suppress this output and
// below pick a proven score/PV for this thread (from the previous iteration).
// A thread that aborted search can have mated-in/TB-loss PV and
// score that cannot be trusted, i.e. it can be delayed or refuted
// if we would have had time to fully search other root-moves. Thus
// we suppress this output and below pick a proven score/PV for this
// thread (from the previous iteration).
&& !(threads.abortedSearch && rootMoves[0].uciScore <= VALUE_TB_LOSS_IN_MAX_PLY))
main_manager()->pv(*this, threads, tt, rootDepth);
@ -504,6 +506,7 @@ void Search::Worker::iterative_deepening() {
skill.best ? skill.best : skill.pick_best(rootMoves, multiPV)));
}
// Reset histories, usually before a new game
void Search::Worker::clear() {
mainHistory.fill(0);
captureHistory.fill(-700);
@ -523,7 +526,7 @@ void Search::Worker::clear() {
}
// Main search function for both PV and non-PV nodes.
// Main search function for both PV and non-PV nodes
template<NodeType nodeType>
Value Search::Worker::search(
Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, bool cutNode) {
@ -538,7 +541,7 @@ Value Search::Worker::search(
// Limit the depth if extensions made it too large
depth = std::min(depth, MAX_PLY - 1);
// Check if we have an upcoming move that draws by repetition.
// Check if we have an upcoming move that draws by repetition
if (!rootNode && alpha < VALUE_DRAW && pos.upcoming_repetition(ss->ply))
{
alpha = value_draw(this->nodes);
@ -611,7 +614,7 @@ Value Search::Worker::search(
Square prevSq = ((ss - 1)->currentMove).is_ok() ? ((ss - 1)->currentMove).to_sq() : SQ_NONE;
ss->statScore = 0;
// Step 4. Transposition table lookup.
// Step 4. Transposition table lookup
excludedMove = ss->excludedMove;
posKey = pos.key();
auto [ttHit, ttData, ttWriter] = tt.probe(posKey);
@ -676,7 +679,7 @@ Value Search::Worker::search(
Value tbValue = VALUE_TB - ss->ply;
// use the range VALUE_TB to VALUE_TB_WIN_IN_MAX_PLY to score
// Use the range VALUE_TB to VALUE_TB_WIN_IN_MAX_PLY to score
value = wdl < -drawScore ? -tbValue
: wdl > drawScore ? tbValue
: VALUE_DRAW + 2 * wdl * drawScore;
@ -771,8 +774,8 @@ Value Search::Worker::search(
opponentWorsening = ss->staticEval + (ss - 1)->staticEval > 2;
// Step 7. Razoring (~1 Elo)
// If eval is really low check with qsearch if it can exceed alpha, if it can't,
// return a fail low.
// If eval is really low, check with qsearch if we can exceed alpha. If the
// search suggests we cannot exceed alpha, return a speculative fail low.
if (eval < alpha - 494 - 290 * depth * depth)
{
value = qsearch<NonPV>(pos, ss, alpha - 1, alpha);
@ -836,27 +839,26 @@ Value Search::Worker::search(
if (PvNode && !ttData.move)
depth -= 3;
// Use qsearch if depth <= 0.
// Use qsearch if depth <= 0
if (depth <= 0)
return qsearch<PV>(pos, ss, alpha, beta);
// For cutNodes, if depth is high enough, decrease depth by 2 if there is no ttMove, or
// by 1 if there is a ttMove with an upper bound.
// For cutNodes, if depth is high enough, decrease depth by 2 if there is no ttMove,
// or by 1 if there is a ttMove with an upper bound.
if (cutNode && depth >= 7 && (!ttData.move || ttData.bound == BOUND_UPPER))
depth -= 1 + !ttData.move;
// Step 11. ProbCut (~10 Elo)
// If we have a good enough capture (or queen promotion) and a reduced search returns a value
// much above beta, we can (almost) safely prune the previous move.
// If we have a good enough capture (or queen promotion) and a reduced search
// returns a value much above beta, we can (almost) safely prune the previous move.
probCutBeta = beta + 184 - 53 * improving;
if (
!PvNode && depth > 3
&& std::abs(beta) < VALUE_TB_WIN_IN_MAX_PLY
// If value from transposition table is lower than probCutBeta, don't attempt probCut
// there and in further interactions with transposition table cutoff depth is set to depth - 3
// because probCut search has depth set to depth - 4 but we also do a move before it
// So effective depth is equal to depth - 3
&& !(ttData.depth >= depth - 3 && ttData.value != VALUE_NONE && ttData.value < probCutBeta))
if (!PvNode && depth > 3
&& std::abs(beta) < VALUE_TB_WIN_IN_MAX_PLY
// If value from transposition table is lower than probCutBeta, don't attempt
// probCut there and in further interactions with transposition table cutoff
// depth is set to depth - 3 because probCut search has depth set to depth - 4
// but we also do a move before it. So effective depth is equal to depth - 3.
&& !(ttData.depth >= depth - 3 && ttData.value != VALUE_NONE && ttData.value < probCutBeta))
{
assert(probCutBeta < VALUE_INFINITE && probCutBeta > beta);
@ -870,7 +872,6 @@ Value Search::Worker::search(
if (move == excludedMove)
continue;
// Check for legality
if (!pos.legal(move))
continue;
@ -1050,18 +1051,18 @@ moves_loop: // When in check, search starts here
// We take care to not overdo to avoid search getting stuck.
if (ss->ply < thisThread->rootDepth * 2)
{
// Singular extension search (~76 Elo, ~170 nElo). If all moves but one fail
// low on a search of (alpha-s, beta-s), and just one fails high on (alpha, beta),
// then that move is singular and should be extended. To verify this we do
// a reduced search on the position excluding the ttMove and if the result
// is lower than ttValue minus a margin, then we will extend the ttMove.
// Recursive singular search is avoided.
// Singular extension search (~76 Elo, ~170 nElo). If all moves but one
// fail low on a search of (alpha-s, beta-s), and just one fails high on
// (alpha, beta), then that move is singular and should be extended. To
// verify this we do a reduced search on the position excluding the ttMove
// and if the result is lower than ttValue minus a margin, then we will
// extend the ttMove. Recursive singular search is avoided.
// Note: the depth margin and singularBeta margin are known for having non-linear
// scaling. Their values are optimized to time controls of 180+1.8 and longer
// so changing them requires tests at these types of time controls.
// Generally, higher singularBeta (i.e closer to ttValue) and lower extension
// margins scale well.
// Note: the depth margin and singularBeta margin are known for having
// non-linear scaling. Their values are optimized to time controls of
// 180+1.8 and longer so changing them requires tests at these types of
// time controls. Generally, higher singularBeta (i.e closer to ttValue)
// and lower extension margins scale well.
if (!rootNode && move == ttData.move && !excludedMove
&& depth >= 4 - (thisThread->completedDepth > 36) + ss->ttPv
@ -1089,28 +1090,31 @@ moves_loop: // When in check, search starts here
// Multi-cut pruning
// Our ttMove is assumed to fail high based on the bound of the TT entry,
// and if after excluding the ttMove with a reduced search we fail high over the original beta,
// we assume this expected cut-node is not singular (multiple moves fail high),
// and we can prune the whole subtree by returning a softbound.
// and if after excluding the ttMove with a reduced search we fail high
// over the original beta, we assume this expected cut-node is not
// singular (multiple moves fail high), and we can prune the whole
// subtree by returning a softbound.
else if (value >= beta && std::abs(value) < VALUE_TB_WIN_IN_MAX_PLY)
return value;
// Negative extensions
// If other moves failed high over (ttValue - margin) without the ttMove on a reduced search,
// but we cannot do multi-cut because (ttValue - margin) is lower than the original beta,
// we do not know if the ttMove is singular or can do a multi-cut,
// so we reduce the ttMove in favor of other moves based on some conditions:
// If other moves failed high over (ttValue - margin) without the
// ttMove on a reduced search, but we cannot do multi-cut because
// (ttValue - margin) is lower than the original beta, we do not know
// if the ttMove is singular or can do a multi-cut, so we reduce the
// ttMove in favor of other moves based on some conditions:
// If the ttMove is assumed to fail high over current beta (~7 Elo)
else if (ttData.value >= beta)
extension = -3;
// If we are on a cutNode but the ttMove is not assumed to fail high over current beta (~1 Elo)
// If we are on a cutNode but the ttMove is not assumed to fail high
// over current beta (~1 Elo)
else if (cutNode)
extension = -2;
}
// Extension for capturing the previous moved piece (~0 Elo on STC, ~1 Elo on LTC)
// Extension for capturing the previous moved piece (~1 Elo at LTC)
else if (PvNode && move.to_sq() == prevSq
&& thisThread->captureHistory[movedPiece][move.to_sq()]
[type_of(pos.piece_on(move.to_sq()))]
@ -1136,9 +1140,9 @@ moves_loop: // When in check, search starts here
pos.do_move(move, st, givesCheck);
// These reduction adjustments have proven non-linear scaling.
// They are optimized to time controls of 180 + 1.8 and longer so
// changing them or adding conditions that are similar
// requires tests at these types of time controls.
// They are optimized to time controls of 180 + 1.8 and longer,
// so changing them or adding conditions that are similar requires
// tests at these types of time controls.
// Decrease reduction if position is or has been on the PV (~7 Elo)
if (ss->ttPv)
@ -1148,7 +1152,7 @@ moves_loop: // When in check, search starts here
if (PvNode)
r--;
// These reduction adjustments have no proven non-linear scaling.
// These reduction adjustments have no proven non-linear scaling
// Increase reduction for cut nodes (~4 Elo)
if (cutNode)
@ -1163,8 +1167,8 @@ moves_loop: // When in check, search starts here
if ((ss + 1)->cutoffCnt > 3)
r += 1 + !(PvNode || cutNode);
// For first picked move (ttMove) reduce reduction
// but never allow it to go below 0 (~3 Elo)
// For first picked move (ttMove) reduce reduction, but never allow
// reduction to go below 0 (~3 Elo)
else if (move == ttData.move)
r = std::max(0, r - 2);
@ -1190,8 +1194,8 @@ moves_loop: // When in check, search starts here
// Do a full-depth search when reduced LMR search fails high
if (value > alpha && d < newDepth)
{
// Adjust full-depth search based on LMR results - if the result
// was good enough search deeper, if it was bad enough search shallower.
// Adjust full-depth search based on LMR results - if the result was
// good enough search deeper, if it was bad enough search shallower.
const bool doDeeperSearch = value > (bestValue + 35 + 2 * newDepth); // (~1 Elo)
const bool doShallowerSearch = value < bestValue + newDepth; // (~2 Elo)
@ -1237,8 +1241,8 @@ moves_loop: // When in check, search starts here
// Step 20. Check for a new best move
// Finished searching the move. If a stop occurred, the return value of
// the search cannot be trusted, and we return immediately without
// updating best move, PV and TT.
// the search cannot be trusted, and we return immediately without updating
// best move, principal variation nor transposition table.
if (threads.stop.load(std::memory_order_relaxed))
return VALUE_ZERO;
@ -1351,7 +1355,8 @@ moves_loop: // When in check, search starts here
if (!moveCount)
bestValue = excludedMove ? alpha : ss->inCheck ? mated_in(ss->ply) : VALUE_DRAW;
// If there is a move that produces search value greater than alpha we update the stats of searched moves
// If there is a move that produces search value greater than alpha,
// we update the stats of searched moves.
else if (bestMove)
update_all_stats(pos, ss, *this, bestMove, prevSq, quietsSearched, quietCount,
capturesSearched, captureCount, depth);
@ -1385,8 +1390,8 @@ moves_loop: // When in check, search starts here
if (bestValue <= alpha)
ss->ttPv = ss->ttPv || ((ss - 1)->ttPv && depth > 3);
// Write gathered information in transposition table
// Static evaluation is saved as it was before correction history
// Write gathered information in transposition table. Note that the
// static evaluation is saved as it was before correction history.
if (!excludedMove && !(rootNode && thisThread->pvIdx))
ttWriter.write(posKey, value_to_tt(bestValue, ss->ply), ss->ttPv,
bestValue >= beta ? BOUND_LOWER
@ -1410,12 +1415,12 @@ moves_loop: // When in check, search starts here
}
// Quiescence search function, which is called by the main search function with zero depth, or
// recursively with further decreasing depth per call. With depth <= 0, we "should" be using
// static eval only, but tactical moves may confuse the static eval. To fight this horizon effect,
// we implement this qsearch of tactical moves only.
// See https://www.chessprogramming.org/Horizon_Effect and https://www.chessprogramming.org/Quiescence_Search
// (~155 Elo)
// Quiescence search function, which is called by the main search function with
// depth zero, or recursively with further decreasing depth. With depth <= 0, we
// "should" be using static eval only, but tactical moves may confuse the static eval.
// To fight this horizon effect, we implement this qsearch of tactical moves (~155 Elo).
// See https://www.chessprogramming.org/Horizon_Effect
// and https://www.chessprogramming.org/Quiescence_Search
template<NodeType nodeType>
Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth) {
@ -1426,7 +1431,7 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta,
assert(PvNode || (alpha == beta - 1));
assert(depth <= 0);
// Check if we have an upcoming move that draws by repetition. (~1 Elo)
// Check if we have an upcoming move that draws by repetition (~1 Elo)
if (alpha < VALUE_DRAW && pos.upcoming_repetition(ss->ply))
{
alpha = value_draw(this->nodes);
@ -1469,9 +1474,10 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta,
assert(0 <= ss->ply && ss->ply < MAX_PLY);
// Note that unlike regular search, which stores the literal depth into the TT, from QS we
// only store the current movegen stage as "depth". If in check, we search all evasions and
// thus store DEPTH_QS_CHECKS. (Evasions may be quiet, and _CHECKS includes quiets.)
// Note that unlike regular search, which stores the literal depth into the
// transposition table, from qsearch we only store the current movegen stage
// as "depth". If in check, we search all evasions and thus store DEPTH_QS_CHECKS.
// Evasions may be quiet, and _CHECKS includes quiets.
Depth qsTtDepth = ss->inCheck || depth >= DEPTH_QS_CHECKS ? DEPTH_QS_CHECKS : DEPTH_QS_NORMAL;
// Step 3. Transposition table lookup
@ -1512,7 +1518,7 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta,
}
else
{
// In case of null move search, use previous static eval with a different sign
// In case of null move search, use previous static eval with opposite sign
unadjustedStaticEval =
(ss - 1)->currentMove != Move::null()
? evaluate(networks[numaAccessToken], pos, refreshTable, thisThread->optimism[us])
@ -1542,21 +1548,20 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta,
const PieceToHistory* contHist[] = {(ss - 1)->continuationHistory,
(ss - 2)->continuationHistory};
// Initialize a MovePicker object for the current position, and prepare to search the moves.
// We presently use two stages of qs movegen, first captures+checks, then captures only.
// (When in check, we simply search all evasions.)
// (Presently, having the checks stage is worth only 1 Elo, and may be removable in the near future,
// which would result in only a single stage of QS movegen.)
// Initialize a MovePicker object for the current position, and prepare to search
// the moves. We presently use two stages of move generator in quiescence search:
// first captures+checks, then captures only (but when in check, we simply search
// all evasions).
Square prevSq = ((ss - 1)->currentMove).is_ok() ? ((ss - 1)->currentMove).to_sq() : SQ_NONE;
MovePicker mp(pos, ttData.move, depth, &thisThread->mainHistory, &thisThread->captureHistory,
contHist, &thisThread->pawnHistory);
// Step 5. Loop through all pseudo-legal moves until no moves remain or a beta cutoff occurs.
// Step 5. Loop through all pseudo-legal moves until no moves remain or a beta
// cutoff occurs.
while ((move = mp.next_move()) != Move::none())
{
assert(move.is_ok());
// Check for legality
if (!pos.legal(move))
continue;
@ -1577,24 +1582,24 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta,
Value futilityValue = futilityBase + PieceValue[pos.piece_on(move.to_sq())];
// If static eval + value of piece we are going to capture is much lower
// than alpha we can prune this move. (~2 Elo)
// If static eval + value of piece we are going to capture is
// much lower than alpha, we can prune this move. (~2 Elo)
if (futilityValue <= alpha)
{
bestValue = std::max(bestValue, futilityValue);
continue;
}
// If static eval is much lower than alpha and move is not winning material
// we can prune this move. (~2 Elo)
// If static eval is much lower than alpha and move is
// not winning material, we can prune this move. (~2 Elo)
if (futilityBase <= alpha && !pos.see_ge(move, 1))
{
bestValue = std::max(bestValue, futilityBase);
continue;
}
// If static exchange evaluation is much worse than what is needed to not
// fall below alpha we can prune this move.
// If static exchange evaluation is much worse than what
// is needed to not fall below alpha, we can prune this move.
if (futilityBase > alpha && !pos.see_ge(move, (alpha - futilityBase) * 4))
{
bestValue = alpha;
@ -1654,8 +1659,8 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta,
}
// Step 9. Check for mate
// All legal moves have been searched. A special case: if we're in check
// and no legal moves were found, it is checkmate.
// All legal moves have been searched. A special case: if we are
// in check and no legal moves were found, it is checkmate.
if (ss->inCheck && bestValue == -VALUE_INFINITE)
{
assert(!MoveList<LEGAL>(pos).size());
@ -1665,8 +1670,8 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta,
if (std::abs(bestValue) < VALUE_TB_WIN_IN_MAX_PLY && bestValue >= beta)
bestValue = (3 * bestValue + beta) / 4;
// Save gathered info in transposition table
// Static evaluation is saved as it was before adjustment by correction history
// Save gathered info in transposition table. The static evaluation
// is saved as it was before adjustment by correction history.
ttWriter.write(posKey, value_to_tt(bestValue, ss->ply), pvHit,
bestValue >= beta ? BOUND_LOWER : BOUND_UPPER, qsTtDepth, bestMove,
unadjustedStaticEval, tt.generation());
@ -1697,8 +1702,8 @@ TimePoint Search::Worker::elapsed_time() const { return main_manager()->tm.elaps
namespace {
// Adjusts a mate or TB score from "plies to mate from the root"
// to "plies to mate from the current position". Standard scores are unchanged.
// Adjusts a mate or TB score from "plies to mate from the root" to
// "plies to mate from the current position". Standard scores are unchanged.
// The function is called before storing a value in the transposition table.
Value value_to_tt(Value v, int ply) {
@ -1707,11 +1712,11 @@ Value value_to_tt(Value v, int ply) {
}
// Inverse of value_to_tt(): it adjusts a mate or TB score
// from the transposition table (which refers to the plies to mate/be mated from
// current position) to "plies to mate/be mated (TB win/loss) from the root".
// However, to avoid potentially false mate or TB scores related to the 50 moves rule
// and the graph history interaction, we return the highest non-TB score instead.
// Inverse of value_to_tt(): it adjusts a mate or TB score from the transposition
// table (which refers to the plies to mate/be mated from current position) to
// "plies to mate/be mated (TB win/loss) from the root". However, to avoid
// potentially false mate or TB scores related to the 50 moves rule and the
// graph history interaction, we return the highest non-TB score instead.
Value value_from_tt(Value v, int ply, int r50c) {
if (v == VALUE_NONE)
@ -1810,8 +1815,8 @@ void update_all_stats(const Position& pos,
}
// Updates histories of the move pairs formed
// by moves at ply -1, -2, -3, -4, and -6 with current move.
// Updates histories of the move pairs formed by moves
// at ply -1, -2, -3, -4, and -6 with current move.
void update_continuation_histories(Stack* ss, Piece pc, Square to, int bonus) {
bonus = bonus * 52 / 64;
@ -1859,8 +1864,8 @@ void update_quiet_stats(
}
// When playing with strength handicap, choose the best move among a set of RootMoves
// using a statistical rule dependent on 'level'. Idea by Heinz van Saanen.
// When playing with strength handicap, choose the best move among a set of
// RootMoves using a statistical rule dependent on 'level'. Idea by Heinz van Saanen.
Move Skill::pick_best(const RootMoves& rootMoves, size_t multiPV) {
static PRNG rng(now()); // PRNG sequence should be non-deterministic
@ -1891,8 +1896,8 @@ Move Skill::pick_best(const RootMoves& rootMoves, size_t multiPV) {
}
// Used to print debug info and, more importantly,
// to detect when we are out of available time and thus stop the search.
// Used to print debug info and, more importantly, to detect
// when we are out of available time and thus stop the search.
void SearchManager::check_time(Search::Worker& worker) {
if (--callsCnt > 0)
return;
@ -1926,8 +1931,9 @@ void SearchManager::check_time(Search::Worker& worker) {
}
// Used to correct and extend PVs for moves that have a TB (but not a mate) score.
// Keeps the search based PV for as long as it is verified to maintain the game outcome, truncates afterwards.
// Finally, extends to mate the PV, providing a possible continuation (but not a proven mating line).
// Keeps the search based PV for as long as it is verified to maintain the game
// outcome, truncates afterwards. Finally, extends to mate the PV, providing a
// possible continuation (but not a proven mating line).
void syzygy_extend_pv(const OptionsMap& options,
const Search::LimitsType& limits,
Position& pos,
@ -1937,7 +1943,7 @@ void syzygy_extend_pv(const OptionsMap& options,
auto t_start = std::chrono::steady_clock::now();
int moveOverhead = int(options["Move Overhead"]);
// Do not use more than moveOverhead / 2 time, if time management is active.
// Do not use more than moveOverhead / 2 time, if time management is active
auto time_abort = [&t_start, &moveOverhead, &limits]() -> bool {
auto t_end = std::chrono::steady_clock::now();
return limits.use_time_management()
@ -1968,7 +1974,7 @@ void syzygy_extend_pv(const OptionsMap& options,
auto& st = sts.emplace_back();
pos.do_move(pvMove, st);
// don't allow for repetitions or drawing moves along the PV in TB regime.
// Do not allow for repetitions or drawing moves along the PV in TB regime
if (config.rootInTB && pos.is_draw(ply))
{
pos.undo_move(pvMove);
@ -1976,17 +1982,18 @@ void syzygy_extend_pv(const OptionsMap& options,
break;
}
// Full PV shown will thus be validated and end TB.
// If we can't validate the full PV in time, we don't show it.
// Full PV shown will thus be validated and end in TB.
// If we cannot validate the full PV in time, we do not show it.
if (config.rootInTB && time_abort())
break;
}
// resize the PV to the correct part
// Resize the PV to the correct part
rootMove.pv.resize(ply);
// Step 2, now extend the PV to mate, as if the user explores syzygy-tables.info using
// top ranked moves (minimal DTZ), which gives optimal mates only for simple endgames e.g. KRvK
// Step 2, now extend the PV to mate, as if the user explored syzygy-tables.info
// using top ranked moves (minimal DTZ), which gives optimal mates only for simple
// endgames e.g. KRvK.
while (!pos.is_draw(0))
{
if (time_abort())
@ -1998,8 +2005,8 @@ void syzygy_extend_pv(const OptionsMap& options,
auto& rm = legalMoves.emplace_back(m);
StateInfo tmpSI;
pos.do_move(m, tmpSI);
// Give a score of each move to break DTZ ties
// restricting opponent mobility, but not giving the opponent a capture.
// Give a score of each move to break DTZ ties restricting opponent mobility,
// but not giving the opponent a capture.
for (const auto& mOpp : MoveList<LEGAL>(pos))
rm.tbRank -= pos.capture(mOpp) ? 100 : 1;
pos.undo_move(m);
@ -2009,16 +2016,16 @@ void syzygy_extend_pv(const OptionsMap& options,
if (legalMoves.size() == 0)
break;
// sort moves according to their above assigned rank,
// Sort moves according to their above assigned rank.
// This will break ties for moves with equal DTZ in rank_root_moves.
std::stable_sort(
legalMoves.begin(), legalMoves.end(),
[](const Search::RootMove& a, const Search::RootMove& b) { return a.tbRank > b.tbRank; });
// The winning side tries to minimize DTZ, the losing side maximizes it.
// The winning side tries to minimize DTZ, the losing side maximizes it
Tablebases::Config config = Tablebases::rank_root_moves(options, pos, legalMoves, true);
// If DTZ is not available we might not find a mate, so we bail out.
// If DTZ is not available we might not find a mate, so we bail out
if (!config.rootInTB || config.cardinality > 0)
break;
@ -2030,23 +2037,24 @@ void syzygy_extend_pv(const OptionsMap& options,
pos.do_move(pvMove, st);
}
// Finding a draw in this function is an exceptional case, that cannot happen during engine game play,
// since we have a winning score, and play correctly with TB support.
// However, it can be that a position is draw due to the 50 move rule if it has been been reached
// on the board with a non-optimal 50 move counter e.g. 8/8/6k1/3B4/3K4/4N3/8/8 w - - 54 106
// which TB with dtz counter rounding cannot always correctly rank. See also
// Finding a draw in this function is an exceptional case, that cannot happen
// during engine game play, since we have a winning score, and play correctly
// with TB support. However, it can be that a position is draw due to the 50 move
// rule if it has been been reached on the board with a non-optimal 50 move counter
// (e.g. 8/8/6k1/3B4/3K4/4N3/8/8 w - - 54 106 ) which TB with dtz counter rounding
// cannot always correctly rank. See also
// https://github.com/official-stockfish/Stockfish/issues/5175#issuecomment-2058893495
// We adjust the score to match the found PV. Note that a TB loss score can be displayed
// if the engine did not find a drawing move yet, but eventually search will figure it out.
// E.g. 1kq5/q2r4/5K2/8/8/8/8/7Q w - - 96 1
// We adjust the score to match the found PV. Note that a TB loss score can be
// displayed if the engine did not find a drawing move yet, but eventually search
// will figure it out (e.g. 1kq5/q2r4/5K2/8/8/8/8/7Q w - - 96 1 )
if (pos.is_draw(0))
v = VALUE_DRAW;
// Undo the PV moves.
// Undo the PV moves
for (auto it = rootMove.pv.rbegin(); it != rootMove.pv.rend(); ++it)
pos.undo_move(*it);
// Inform if we couldn't get a full extension in time.
// Inform if we couldn't get a full extension in time
if (time_abort())
sync_cout
<< "info string Syzygy based PV extension requires more time, increase Move Overhead as needed."
@ -2092,7 +2100,7 @@ void SearchManager::pv(Search::Worker& worker,
for (Move m : rootMoves[i].pv)
pv += UCIEngine::move(m, pos.is_chess960()) + " ";
// remove last whitespace
// Remove last whitespace
if (!pv.empty())
pv.pop_back();

View file

@ -236,8 +236,8 @@ class Worker {
public:
Worker(SharedState&, std::unique_ptr<ISearchManager>, size_t, NumaReplicatedAccessToken);
// Called at instantiation to initialize Reductions tables
// Reset histories, usually before a new game
// Called at instantiation to initialize reductions tables.
// Reset histories, usually before a new game.
void clear();
// Called when the program receives the UCI 'go' command.
@ -256,7 +256,7 @@ class Worker {
private:
void iterative_deepening();
// Main search function for both PV and non-PV nodes
// This is the main search function, for both PV and non-PV nodes
template<NodeType nodeType>
Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, bool cutNode);
@ -266,8 +266,7 @@ class Worker {
Depth reduction(bool i, Depth d, int mn, int delta) const;
// Get a pointer to the search manager, only allowed to be called by the
// main thread.
// Pointer to the search manager, only allowed to be called by the main thread
SearchManager* main_manager() const {
assert(threadIdx == 0);
return static_cast<SearchManager*>(manager.get());

View file

@ -50,8 +50,8 @@ Thread::Thread(Search::SharedState& sharedState,
run_custom_job([this, &binder, &sharedState, &sm, n]() {
// Use the binder to [maybe] bind the threads to a NUMA node before doing
// the Worker allocation.
// Ideally we would also allocate the SearchManager here, but that's minor.
// the Worker allocation. Ideally we would also allocate the SearchManager
// here, but that's minor.
this->numaAccessToken = binder();
this->worker =
std::make_unique<Search::Worker>(sharedState, std::move(sm), n, this->numaAccessToken);
@ -72,27 +72,26 @@ Thread::~Thread() {
stdThread.join();
}
// Wakes up the thread that will start the search
void Thread::start_searching() {
assert(worker != nullptr);
run_custom_job([this]() { worker->start_searching(); });
}
// Wakes up the thread that will start the search
// Clears the histories for the thread worker (usually before a new game)
void Thread::clear_worker() {
assert(worker != nullptr);
run_custom_job([this]() { worker->clear(); });
}
// Blocks on the condition variable
// until the thread has finished searching.
// Blocks on the condition variable until the thread has finished searching
void Thread::wait_for_search_finished() {
std::unique_lock<std::mutex> lk(mutex);
cv.wait(lk, [&] { return !searching; });
}
// Launching a function in the thread
void Thread::run_custom_job(std::function<void()> f) {
{
std::unique_lock<std::mutex> lk(mutex);
@ -103,8 +102,8 @@ void Thread::run_custom_job(std::function<void()> f) {
cv.notify_one();
}
// Thread gets parked here, blocked on the
// condition variable, when it has no work to do.
// Thread gets parked here, blocked on the condition variable
// when the thread has no work to do.
void Thread::idle_loop() {
while (true)
@ -233,8 +232,9 @@ void ThreadPool::wait_on_thread(size_t threadId) {
size_t ThreadPool::num_threads() const { return threads.size(); }
// Wakes up main thread waiting in idle_loop() and
// returns immediately. Main thread will wake up other threads and start the search.
// Wakes up main thread waiting in idle_loop() and returns immediately.
// Main thread will wake up other threads and start the search.
void ThreadPool::start_thinking(const OptionsMap& options,
Position& pos,
StateListPtr& states,
@ -274,8 +274,8 @@ void ThreadPool::start_thinking(const OptionsMap& options,
// We use Position::set() to set root position across threads. But there are
// some StateInfo fields (previous, pliesFromNull, capturedPiece) that cannot
// be deduced from a fen string, so set() clears them and they are set from
// setupStates->back() later. The rootState is per thread, earlier states are shared
// since they are read-only.
// setupStates->back() later. The rootState is per thread, earlier states are
// shared since they are read-only.
for (auto&& th : threads)
{
th->run_custom_job([&]() {
@ -335,7 +335,7 @@ Thread* ThreadPool::get_best_thread() const {
const bool newThreadInProvenLoss =
newThreadScore != -VALUE_INFINITE && newThreadScore <= VALUE_TB_LOSS_IN_MAX_PLY;
// Note that we make sure not to pick a thread with truncated-PV for better viewer experience.
// We make sure not to pick a thread with truncated principal variation
const bool betterVotingValue =
thread_voting_value(th.get()) * int(newThreadPV.size() > 2)
> thread_voting_value(bestThread) * int(bestThreadPV.size() > 2);
@ -363,8 +363,8 @@ Thread* ThreadPool::get_best_thread() const {
}
// Start non-main threads
// Will be invoked by main thread after it has started searching
// Start non-main threads.
// Will be invoked by main thread after it has started searching.
void ThreadPool::start_searching() {
for (auto&& th : threads)
@ -374,7 +374,6 @@ void ThreadPool::start_searching() {
// Wait for non-main threads
void ThreadPool::wait_for_search_finished() const {
for (auto&& th : threads)

View file

@ -137,9 +137,9 @@ enum Bound {
BOUND_EXACT = BOUND_UPPER | BOUND_LOWER
};
// Value is used as an alias for int16_t, this is done to differentiate between
// a search value and any other integer value. The values used in search are always
// supposed to be in the range (-VALUE_NONE, VALUE_NONE] and should not exceed this range.
// Value is used as an alias for int, this is done to differentiate between a search
// value and any other integer value. The values used in search are always supposed
// to be in the range (-VALUE_NONE, VALUE_NONE] and should not exceed this range.
using Value = int;
constexpr Value VALUE_ZERO = 0;
@ -187,15 +187,20 @@ constexpr Value PieceValue[PIECE_NB] = {
using Depth = int;
enum : int {
// The following DEPTH_ constants are used for TT entries and QS movegen stages. In regular search,
// TT depth is literal: the search depth (effort) used to make the corresponding TT value.
// In qsearch, however, TT entries only store the current QS movegen stage (which should thus compare
// The following DEPTH_ constants are used for transposition table entries
// and quiescence search move generation stages. In regular search, the
// depth stored in the transposition table is literal: the search depth
// (effort) used to make the corresponding transposition table value. In
// quiescence search, however, the transposition table entries only store
// the current quiescence move generation stage (which should thus compare
// lower than any regular search depth).
DEPTH_QS_CHECKS = 0,
DEPTH_QS_NORMAL = -1,
// For TT entries where no searching at all was done (whether regular or qsearch) we use
// _UNSEARCHED, which should thus compare lower than any QS or regular depth. _ENTRY_OFFSET is used
// only for the TT entry occupancy check (see tt.cpp), and should thus be lower than _UNSEARCHED.
// For transposition table entries where no searching at all was done
// (whether regular or qsearch) we use DEPTH_UNSEARCHED, which should thus
// compare lower than any quiescence or regular depth. DEPTH_ENTRY_OFFSET
// is used only for the transposition table entry occupancy check (see tt.cpp),
// and should thus be lower than DEPTH_UNSEARCHED.
DEPTH_UNSEARCHED = -2,
DEPTH_ENTRY_OFFSET = -3
};
@ -356,9 +361,10 @@ enum MoveType {
// bit 14-15: special move flag: promotion (1), en passant (2), castling (3)
// NOTE: en passant bit is set only when a pawn can be captured
//
// Special cases are Move::none() and Move::null(). We can sneak these in because in
// any normal move destination square is always different from origin square
// while Move::none() and Move::null() have the same origin and destination square.
// Special cases are Move::none() and Move::null(). We can sneak these in because
// in any normal move the destination square and origin square are always different,
// but Move::none() and Move::null() have the same origin and destination square.
class Move {
public:
Move() = default;