mirror of
https://github.com/sockspls/badfish
synced 2025-04-29 16:23:09 +00:00
Clean up comments in code
- Capitalize comments - Reformat multi-lines comments to equalize the widths of the lines - Try to keep the width of comments around 85 characters - Remove periods at the end of single-line comments closes https://github.com/official-stockfish/Stockfish/pull/5469 No functional change
This commit is contained in:
parent
98a7bb4436
commit
7e72b37e4c
12 changed files with 356 additions and 311 deletions
|
@ -49,7 +49,7 @@ class Engine {
|
|||
|
||||
Engine(std::string path = "");
|
||||
|
||||
// Can't be movable due to components holding backreferences to fields
|
||||
// Cannot be movable due to components holding backreferences to fields
|
||||
Engine(const Engine&) = delete;
|
||||
Engine(Engine&&) = delete;
|
||||
Engine& operator=(const Engine&) = delete;
|
||||
|
|
|
@ -49,10 +49,12 @@
|
|||
#include <iostream> // std::cerr
|
||||
#include <ostream> // std::endl
|
||||
#include <windows.h>
|
||||
|
||||
// The needed Windows API for processor groups could be missed from old Windows
|
||||
// versions, so instead of calling them directly (forcing the linker to resolve
|
||||
// the calls at compile time), try to load them at runtime. To do this we need
|
||||
// first to define the corresponding function pointers.
|
||||
|
||||
extern "C" {
|
||||
using OpenProcessToken_t = bool (*)(HANDLE, DWORD, PHANDLE);
|
||||
using LookupPrivilegeValueA_t = bool (*)(LPCSTR, LPCSTR, PLUID);
|
||||
|
@ -64,9 +66,10 @@ using AdjustTokenPrivileges_t =
|
|||
|
||||
namespace Stockfish {
|
||||
|
||||
// Wrapper for systems where the c++17 implementation
|
||||
// does not guarantee the availability of aligned_alloc(). Memory allocated with
|
||||
// std_aligned_alloc() must be freed with std_aligned_free().
|
||||
// Wrappers for systems where the c++17 implementation does not guarantee the
|
||||
// availability of aligned_alloc(). Memory allocated with std_aligned_alloc()
|
||||
// must be freed with std_aligned_free().
|
||||
|
||||
void* std_aligned_alloc(size_t alignment, size_t size) {
|
||||
#if defined(_ISOC11_SOURCE)
|
||||
return aligned_alloc(alignment, size);
|
||||
|
@ -96,7 +99,8 @@ void std_aligned_free(void* ptr) {
|
|||
#endif
|
||||
}
|
||||
|
||||
// aligned_large_pages_alloc() will return suitably aligned memory, if possible using large pages.
|
||||
// aligned_large_pages_alloc() will return suitably aligned memory,
|
||||
// if possible using large pages.
|
||||
|
||||
#if defined(_WIN32)
|
||||
|
||||
|
@ -135,6 +139,7 @@ static void* aligned_large_pages_alloc_windows([[maybe_unused]] size_t allocSize
|
|||
return nullptr;
|
||||
|
||||
// We need SeLockMemoryPrivilege, so try to enable it for the process
|
||||
|
||||
if (!OpenProcessToken_f( // OpenProcessToken()
|
||||
GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hProcessToken))
|
||||
return nullptr;
|
||||
|
@ -149,8 +154,10 @@ static void* aligned_large_pages_alloc_windows([[maybe_unused]] size_t allocSize
|
|||
tp.Privileges[0].Luid = luid;
|
||||
tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
|
||||
|
||||
// Try to enable SeLockMemoryPrivilege. Note that even if AdjustTokenPrivileges() succeeds,
|
||||
// we still need to query GetLastError() to ensure that the privileges were actually obtained.
|
||||
// Try to enable SeLockMemoryPrivilege. Note that even if AdjustTokenPrivileges()
|
||||
// succeeds, we still need to query GetLastError() to ensure that the privileges
|
||||
// were actually obtained.
|
||||
|
||||
if (AdjustTokenPrivileges_f(hProcessToken, FALSE, &tp, sizeof(TOKEN_PRIVILEGES), &prevTp,
|
||||
&prevTpLen)
|
||||
&& GetLastError() == ERROR_SUCCESS)
|
||||
|
@ -189,9 +196,9 @@ void* aligned_large_pages_alloc(size_t allocSize) {
|
|||
void* aligned_large_pages_alloc(size_t allocSize) {
|
||||
|
||||
#if defined(__linux__)
|
||||
constexpr size_t alignment = 2 * 1024 * 1024; // assumed 2MB page size
|
||||
constexpr size_t alignment = 2 * 1024 * 1024; // 2MB page size assumed
|
||||
#else
|
||||
constexpr size_t alignment = 4096; // assumed small page size
|
||||
constexpr size_t alignment = 4096; // small page size assumed
|
||||
#endif
|
||||
|
||||
// Round up to multiples of alignment
|
||||
|
@ -206,7 +213,8 @@ void* aligned_large_pages_alloc(size_t allocSize) {
|
|||
#endif
|
||||
|
||||
|
||||
// aligned_large_pages_free() will free the previously allocated ttmem
|
||||
// aligned_large_pages_free() will free the previously memory allocated
|
||||
// by aligned_large_pages_alloc(). The effect is a nop if mem == nullptr.
|
||||
|
||||
#if defined(_WIN32)
|
||||
|
||||
|
|
27
src/memory.h
27
src/memory.h
|
@ -33,13 +33,13 @@ namespace Stockfish {
|
|||
|
||||
void* std_aligned_alloc(size_t alignment, size_t size);
|
||||
void std_aligned_free(void* ptr);
|
||||
// memory aligned by page size, min alignment: 4096 bytes
|
||||
void* aligned_large_pages_alloc(size_t size);
|
||||
// nop if mem == nullptr
|
||||
void aligned_large_pages_free(void* mem);
|
||||
|
||||
// frees memory which was placed there with placement new.
|
||||
// works for both single objects and arrays of unknown bound
|
||||
// Memory aligned by page size, min alignment: 4096 bytes
|
||||
void* aligned_large_pages_alloc(size_t size);
|
||||
void aligned_large_pages_free(void* mem);
|
||||
|
||||
// Frees memory which was placed there with placement new.
|
||||
// Works for both single objects and arrays of unknown bound.
|
||||
template<typename T, typename FREE_FUNC>
|
||||
void memory_deleter(T* ptr, FREE_FUNC free_func) {
|
||||
if (!ptr)
|
||||
|
@ -53,15 +53,15 @@ void memory_deleter(T* ptr, FREE_FUNC free_func) {
|
|||
return;
|
||||
}
|
||||
|
||||
// frees memory which was placed there with placement new.
|
||||
// works for both single objects and arrays of unknown bound
|
||||
// Frees memory which was placed there with placement new.
|
||||
// Works for both single objects and arrays of unknown bound.
|
||||
template<typename T, typename FREE_FUNC>
|
||||
void memory_deleter_array(T* ptr, FREE_FUNC free_func) {
|
||||
if (!ptr)
|
||||
return;
|
||||
|
||||
|
||||
// Move back on the pointer to where the size is allocated.
|
||||
// Move back on the pointer to where the size is allocated
|
||||
const size_t array_offset = std::max(sizeof(size_t), alignof(T));
|
||||
char* raw_memory = reinterpret_cast<char*>(ptr) - array_offset;
|
||||
|
||||
|
@ -77,7 +77,7 @@ void memory_deleter_array(T* ptr, FREE_FUNC free_func) {
|
|||
free_func(raw_memory);
|
||||
}
|
||||
|
||||
// Allocates memory for a single object and places it there with placement new.
|
||||
// Allocates memory for a single object and places it there with placement new
|
||||
template<typename T, typename ALLOC_FUNC, typename... Args>
|
||||
inline std::enable_if_t<!std::is_array_v<T>, T*> memory_allocator(ALLOC_FUNC alloc_func,
|
||||
Args&&... args) {
|
||||
|
@ -86,7 +86,7 @@ inline std::enable_if_t<!std::is_array_v<T>, T*> memory_allocator(ALLOC_FUNC all
|
|||
return new (raw_memory) T(std::forward<Args>(args)...);
|
||||
}
|
||||
|
||||
// Allocates memory for an array of unknown bound and places it there with placement new.
|
||||
// Allocates memory for an array of unknown bound and places it there with placement new
|
||||
template<typename T, typename ALLOC_FUNC>
|
||||
inline std::enable_if_t<std::is_array_v<T>, std::remove_extent_t<T>*>
|
||||
memory_allocator(ALLOC_FUNC alloc_func, size_t num) {
|
||||
|
@ -94,7 +94,7 @@ memory_allocator(ALLOC_FUNC alloc_func, size_t num) {
|
|||
|
||||
const size_t array_offset = std::max(sizeof(size_t), alignof(ElementType));
|
||||
|
||||
// save the array size in the memory location
|
||||
// Save the array size in the memory location
|
||||
char* raw_memory =
|
||||
reinterpret_cast<char*>(alloc_func(array_offset + num * sizeof(ElementType)));
|
||||
ASSERT_ALIGNED(raw_memory, alignof(T));
|
||||
|
@ -104,7 +104,8 @@ memory_allocator(ALLOC_FUNC alloc_func, size_t num) {
|
|||
for (size_t i = 0; i < num; ++i)
|
||||
new (raw_memory + array_offset + i * sizeof(ElementType)) ElementType();
|
||||
|
||||
// Need to return the pointer at the start of the array so that the indexing in unique_ptr<T[]> works
|
||||
// Need to return the pointer at the start of the array so that
|
||||
// the indexing in unique_ptr<T[]> works.
|
||||
return reinterpret_cast<ElementType*>(raw_memory + array_offset);
|
||||
}
|
||||
|
||||
|
|
19
src/misc.cpp
19
src/misc.cpp
|
@ -112,14 +112,16 @@ class Logger {
|
|||
|
||||
|
||||
// Returns the full name of the current Stockfish version.
|
||||
// For local dev compiles we try to append the commit sha and commit date
|
||||
// from git if that fails only the local compilation date is set and "nogit" is specified:
|
||||
// Stockfish dev-YYYYMMDD-SHA
|
||||
// or
|
||||
// Stockfish dev-YYYYMMDD-nogit
|
||||
//
|
||||
// For local dev compiles we try to append the commit SHA and
|
||||
// commit date from git. If that fails only the local compilation
|
||||
// date is set and "nogit" is specified:
|
||||
// Stockfish dev-YYYYMMDD-SHA
|
||||
// or
|
||||
// Stockfish dev-YYYYMMDD-nogit
|
||||
//
|
||||
// For releases (non-dev builds) we only include the version number:
|
||||
// Stockfish version
|
||||
// Stockfish version
|
||||
std::string engine_info(bool to_uci) {
|
||||
std::stringstream ss;
|
||||
ss << "Stockfish " << version << std::setfill('0');
|
||||
|
@ -131,8 +133,9 @@ std::string engine_info(bool to_uci) {
|
|||
ss << stringify(GIT_DATE);
|
||||
#else
|
||||
constexpr std::string_view months("Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec");
|
||||
std::string month, day, year;
|
||||
std::stringstream date(__DATE__); // From compiler, format is "Sep 21 2008"
|
||||
|
||||
std::string month, day, year;
|
||||
std::stringstream date(__DATE__); // From compiler, format is "Sep 21 2008"
|
||||
|
||||
date >> month >> day >> year;
|
||||
ss << year << std::setw(2) << std::setfill('0') << (1 + months.find(month) / 4)
|
||||
|
|
|
@ -59,8 +59,8 @@ enum Stages {
|
|||
QCHECK
|
||||
};
|
||||
|
||||
// Sort moves in descending order up to and including
|
||||
// a given limit. The order of moves smaller than the limit is left unspecified.
|
||||
// Sort moves in descending order up to and including a given limit.
|
||||
// The order of moves smaller than the limit is left unspecified.
|
||||
void partial_insertion_sort(ExtMove* begin, ExtMove* end, int limit) {
|
||||
|
||||
for (ExtMove *sortedEnd = begin, *p = begin + 1; p < end; ++p)
|
||||
|
@ -125,8 +125,8 @@ MovePicker::MovePicker(const Position& p,
|
|||
stage = (pos.checkers() ? EVASION_TT : QSEARCH_TT) + !(ttm && pos.pseudo_legal(ttm));
|
||||
}
|
||||
|
||||
// Constructor for ProbCut: we generate captures with SEE greater
|
||||
// than or equal to the given threshold.
|
||||
// Constructor for ProbCut: we generate captures with SEE greater than or equal
|
||||
// to the given threshold.
|
||||
MovePicker::MovePicker(const Position& p, Move ttm, int th, const CapturePieceToHistory* cph) :
|
||||
pos(p),
|
||||
captureHistory(cph),
|
||||
|
|
|
@ -354,13 +354,18 @@ class FeatureTransformer {
|
|||
|
||||
for (IndexType j = 0; j < NumOutputChunks; ++j)
|
||||
{
|
||||
// What we want to do is multiply inputs in a pairwise manner (after clipping), and then shift right by 9.
|
||||
// Instead, we shift left by 7, and use mulhi, stripping the bottom 16 bits, effectively shifting right by 16,
|
||||
// resulting in a net shift of 9 bits. We use mulhi because it maintains the sign of the multiplication (unlike mullo),
|
||||
// allowing us to make use of packus to clip 2 of the inputs, resulting in a save of 2 "vec_max_16" calls.
|
||||
// A special case is when we use NEON, where we shift left by 6 instead, because the instruction "vqdmulhq_s16"
|
||||
// also doubles the return value after the multiplication, adding an extra shift to the left by 1, so we
|
||||
// compensate by shifting less before the multiplication.
|
||||
// What we want to do is multiply inputs in a pairwise manner
|
||||
// (after clipping), and then shift right by 9. Instead, we
|
||||
// shift left by 7, and use mulhi, stripping the bottom 16 bits,
|
||||
// effectively shifting right by 16, resulting in a net shift
|
||||
// of 9 bits. We use mulhi because it maintains the sign of
|
||||
// the multiplication (unlike mullo), allowing us to make use
|
||||
// of packus to clip 2 of the inputs, resulting in a save of 2
|
||||
// "vec_max_16" calls. A special case is when we use NEON,
|
||||
// where we shift left by 6 instead, because the instruction
|
||||
// "vqdmulhq_s16" also doubles the return value after the
|
||||
// multiplication, adding an extra shift to the left by 1, so
|
||||
// we compensate by shifting less before the multiplication.
|
||||
|
||||
#if defined(USE_SSE2)
|
||||
constexpr int shift = 7;
|
||||
|
@ -426,10 +431,10 @@ class FeatureTransformer {
|
|||
}
|
||||
|
||||
// NOTE: The parameter states_to_update is an array of position states.
|
||||
// All states must be sequential, that is states_to_update[i] must either be reachable
|
||||
// by repeatedly applying ->previous from states_to_update[i+1].
|
||||
// computed_st must be reachable by repeatedly applying ->previous on
|
||||
// states_to_update[0].
|
||||
// All states must be sequential, that is states_to_update[i] must
|
||||
// either be reachable by repeatedly applying ->previous from
|
||||
// states_to_update[i+1], and computed_st must be reachable by
|
||||
// repeatedly applying ->previous on states_to_update[0].
|
||||
template<Color Perspective, size_t N>
|
||||
void update_accumulator_incremental(const Position& pos,
|
||||
StateInfo* computed_st,
|
||||
|
@ -446,7 +451,7 @@ class FeatureTransformer {
|
|||
|
||||
#ifdef VECTOR
|
||||
// Gcc-10.2 unnecessarily spills AVX2 registers if this array
|
||||
// is defined in the VECTOR code below, once in each branch
|
||||
// is defined in the VECTOR code below, once in each branch.
|
||||
vec_t acc[NumRegs];
|
||||
psqt_vec_t psqt[NumPsqtRegs];
|
||||
#endif
|
||||
|
@ -474,7 +479,8 @@ class FeatureTransformer {
|
|||
|
||||
StateInfo* st = computed_st;
|
||||
|
||||
// Now update the accumulators listed in states_to_update[], where the last element is a sentinel.
|
||||
// Now update the accumulators listed in states_to_update[],
|
||||
// where the last element is a sentinel.
|
||||
#ifdef VECTOR
|
||||
|
||||
if (N == 1 && (removed[0].size() == 1 || removed[0].size() == 2) && added[0].size() == 1)
|
||||
|
@ -794,7 +800,7 @@ class FeatureTransformer {
|
|||
}
|
||||
|
||||
// The accumulator of the refresh entry has been updated.
|
||||
// Now copy its content to the actual accumulator we were refreshing
|
||||
// Now copy its content to the actual accumulator we were refreshing.
|
||||
|
||||
std::memcpy(accumulator.accumulation[Perspective], entry.accumulation,
|
||||
sizeof(BiasType) * HalfDimensions);
|
||||
|
@ -827,7 +833,7 @@ class FeatureTransformer {
|
|||
|
||||
if ((oldest_st->*accPtr).computed[Perspective])
|
||||
{
|
||||
// Only update current position accumulator to minimize work.
|
||||
// Only update current position accumulator to minimize work
|
||||
StateInfo* states_to_update[1] = {pos.state()};
|
||||
update_accumulator_incremental<Perspective, 1>(pos, oldest_st, states_to_update);
|
||||
}
|
||||
|
@ -846,8 +852,8 @@ class FeatureTransformer {
|
|||
if (next == nullptr)
|
||||
return;
|
||||
|
||||
// Now update the accumulators listed in states_to_update[], where the last element is a sentinel.
|
||||
// Currently we update 2 accumulators.
|
||||
// Now update the accumulators listed in states_to_update[], where
|
||||
// the last element is a sentinel. Currently we update two accumulators:
|
||||
// 1. for the current position
|
||||
// 2. the next accumulator after the computed one
|
||||
// The heuristic may change in the future.
|
||||
|
|
203
src/numa.h
203
src/numa.h
|
@ -37,8 +37,8 @@
|
|||
|
||||
#include "memory.h"
|
||||
|
||||
// We support linux very well, but we explicitly do NOT support Android, because there's
|
||||
// no affected systems, not worth maintaining.
|
||||
// We support linux very well, but we explicitly do NOT support Android,
|
||||
// because there is no affected systems, not worth maintaining.
|
||||
#if defined(__linux__) && !defined(__ANDROID__)
|
||||
#if !defined(_GNU_SOURCE)
|
||||
#define _GNU_SOURCE
|
||||
|
@ -81,9 +81,9 @@ using NumaIndex = size_t;
|
|||
inline CpuIndex get_hardware_concurrency() {
|
||||
CpuIndex concurrency = std::thread::hardware_concurrency();
|
||||
|
||||
// Get all processors across all processor groups on windows, since ::hardware_concurrency
|
||||
// only returns the number of processors in the first group, because only these
|
||||
// are available to std::thread.
|
||||
// Get all processors across all processor groups on windows, since
|
||||
// hardware_concurrency() only returns the number of processors in
|
||||
// the first group, because only these are available to std::thread.
|
||||
#ifdef _WIN64
|
||||
concurrency = std::max<CpuIndex>(concurrency, GetActiveProcessorCount(ALL_PROCESSOR_GROUPS));
|
||||
#endif
|
||||
|
@ -101,7 +101,7 @@ struct WindowsAffinity {
|
|||
|
||||
// We also provide diagnostic for when the affinity is set to nullopt
|
||||
// whether it was due to being indeterminate. If affinity is indeterminate
|
||||
// it's best to assume it is not set at all, so consistent with the meaning
|
||||
// it is best to assume it is not set at all, so consistent with the meaning
|
||||
// of the nullopt affinity.
|
||||
bool isNewDeterminate = true;
|
||||
bool isOldDeterminate = true;
|
||||
|
@ -119,23 +119,25 @@ struct WindowsAffinity {
|
|||
}
|
||||
|
||||
// Since Windows 11 and Windows Server 2022 thread affinities can span
|
||||
// processor groups and can be set as such by a new WinAPI function.
|
||||
// However, we may need to force using the old API if we detect
|
||||
// that the process has affinity set by the old API already and we want to override that.
|
||||
// Due to the limitations of the old API we can't detect its use reliably.
|
||||
// There will be cases where we detect not use but it has actually been used and vice versa.
|
||||
// processor groups and can be set as such by a new WinAPI function. However,
|
||||
// we may need to force using the old API if we detect that the process has
|
||||
// affinity set by the old API already and we want to override that. Due to the
|
||||
// limitations of the old API we cannot detect its use reliably. There will be
|
||||
// cases where we detect not use but it has actually been used and vice versa.
|
||||
|
||||
bool likely_used_old_api() const { return oldApi.has_value() || !isOldDeterminate; }
|
||||
};
|
||||
|
||||
inline std::pair<BOOL, std::vector<USHORT>> get_process_group_affinity() {
|
||||
|
||||
// GetProcessGroupAffinity requires the GroupArray argument to be
|
||||
// aligned to 4 bytes instead of just 2.
|
||||
static constexpr size_t GroupArrayMinimumAlignment = 4;
|
||||
static_assert(GroupArrayMinimumAlignment >= alignof(USHORT));
|
||||
|
||||
// The function should succeed the second time, but it may fail if the group
|
||||
// affinity has changed between GetProcessGroupAffinity calls.
|
||||
// In such case we consider this a hard error, as we can't work with unstable affinities
|
||||
// affinity has changed between GetProcessGroupAffinity calls. In such case
|
||||
// we consider this a hard error, as we Cannot work with unstable affinities
|
||||
// anyway.
|
||||
static constexpr int MAX_TRIES = 2;
|
||||
USHORT GroupCount = 1;
|
||||
|
@ -165,10 +167,10 @@ inline std::pair<BOOL, std::vector<USHORT>> get_process_group_affinity() {
|
|||
}
|
||||
|
||||
// On Windows there are two ways to set affinity, and therefore 2 ways to get it.
|
||||
// These are not consistent, so we have to check both.
|
||||
// In some cases it is actually not possible to determine affinity.
|
||||
// For example when two different threads have affinity on different processor groups,
|
||||
// set using SetThreadAffinityMask, we can't retrieve the actual affinities.
|
||||
// These are not consistent, so we have to check both. In some cases it is actually
|
||||
// not possible to determine affinity. For example when two different threads have
|
||||
// affinity on different processor groups, set using SetThreadAffinityMask, we cannot
|
||||
// retrieve the actual affinities.
|
||||
// From documentation on GetProcessAffinityMask:
|
||||
// > If the calling process contains threads in multiple groups,
|
||||
// > the function returns zero for both affinity masks.
|
||||
|
@ -196,8 +198,8 @@ inline WindowsAffinity get_process_affinity() {
|
|||
}
|
||||
else if (RequiredMaskCount > 0)
|
||||
{
|
||||
// If RequiredMaskCount then these affinities were never set, but it's not consistent
|
||||
// so GetProcessAffinityMask may still return some affinity.
|
||||
// If RequiredMaskCount then these affinities were never set, but it's
|
||||
// not consistent so GetProcessAffinityMask may still return some affinity.
|
||||
auto groupAffinities = std::make_unique<GROUP_AFFINITY[]>(RequiredMaskCount);
|
||||
|
||||
status = GetThreadSelectedCpuSetMasks_f(GetCurrentThread(), groupAffinities.get(),
|
||||
|
@ -233,7 +235,7 @@ inline WindowsAffinity get_process_affinity() {
|
|||
DWORD_PTR proc, sys;
|
||||
status = GetProcessAffinityMask(GetCurrentProcess(), &proc, &sys);
|
||||
|
||||
// If proc == 0 then we can't determine affinity because it spans processor groups.
|
||||
// If proc == 0 then we cannot determine affinity because it spans processor groups.
|
||||
// On Windows 11 and Server 2022 it will instead
|
||||
// > If, however, hHandle specifies a handle to the current process, the function
|
||||
// > always uses the calling thread's primary group (which by default is the same
|
||||
|
@ -246,10 +248,12 @@ inline WindowsAffinity get_process_affinity() {
|
|||
return affinity;
|
||||
}
|
||||
|
||||
// If SetProcessAffinityMask was never called the affinity
|
||||
// must span all processor groups, but if it was called it must only span one.
|
||||
// If SetProcessAffinityMask was never called the affinity must span
|
||||
// all processor groups, but if it was called it must only span one.
|
||||
|
||||
std::vector<USHORT> groupAffinity; // We need to capture this later and capturing
|
||||
// from structured bindings requires c++20.
|
||||
|
||||
std::tie(status, groupAffinity) = get_process_group_affinity();
|
||||
if (status == 0)
|
||||
{
|
||||
|
@ -282,11 +286,12 @@ inline WindowsAffinity get_process_affinity() {
|
|||
// If we got here it means that either SetProcessAffinityMask was never set
|
||||
// or we're on Windows 11/Server 2022.
|
||||
|
||||
// Since Windows 11 and Windows Server 2022 the behaviour of GetProcessAffinityMask changed
|
||||
// > If, however, hHandle specifies a handle to the current process, the function
|
||||
// > always uses the calling thread's primary group (which by default is the same
|
||||
// > as the process' primary group) in order to set the
|
||||
// > lpProcessAffinityMask and lpSystemAffinityMask.
|
||||
// Since Windows 11 and Windows Server 2022 the behaviour of
|
||||
// GetProcessAffinityMask changed:
|
||||
// > If, however, hHandle specifies a handle to the current process,
|
||||
// > the function always uses the calling thread's primary group
|
||||
// > (which by default is the same as the process' primary group)
|
||||
// > in order to set the lpProcessAffinityMask and lpSystemAffinityMask.
|
||||
// In which case we can actually retrieve the full affinity.
|
||||
|
||||
if (GetThreadSelectedCpuSetMasks_f != nullptr)
|
||||
|
@ -300,9 +305,11 @@ inline WindowsAffinity get_process_affinity() {
|
|||
const int numActiveProcessors =
|
||||
GetActiveProcessorCount(static_cast<WORD>(procGroupIndex));
|
||||
|
||||
// We have to schedule to 2 different processors and & the affinities we get.
|
||||
// Otherwise our processor choice could influence the resulting affinity.
|
||||
// We assume the processor IDs within the group are filled sequentially from 0.
|
||||
// We have to schedule to two different processors
|
||||
// and & the affinities we get. Otherwise our processor
|
||||
// choice could influence the resulting affinity.
|
||||
// We assume the processor IDs within the group are
|
||||
// filled sequentially from 0.
|
||||
uint64_t procCombined = std::numeric_limits<uint64_t>::max();
|
||||
uint64_t sysCombined = std::numeric_limits<uint64_t>::max();
|
||||
|
||||
|
@ -346,8 +353,9 @@ inline WindowsAffinity get_process_affinity() {
|
|||
}
|
||||
}
|
||||
|
||||
// We have to detect the case where the affinity was not set, or is set to all processors
|
||||
// so that we correctly produce as std::nullopt result.
|
||||
// We have to detect the case where the affinity was not set,
|
||||
// or is set to all processors so that we correctly produce as
|
||||
// std::nullopt result.
|
||||
if (!isAffinityFull)
|
||||
{
|
||||
affinity.oldApi = std::move(cpus);
|
||||
|
@ -369,16 +377,16 @@ inline std::set<CpuIndex> get_process_affinity() {
|
|||
|
||||
std::set<CpuIndex> cpus;
|
||||
|
||||
// For unsupported systems, or in case of a soft error, we may assume all processors
|
||||
// are available for use.
|
||||
// For unsupported systems, or in case of a soft error, we may assume
|
||||
// all processors are available for use.
|
||||
[[maybe_unused]] auto set_to_all_cpus = [&]() {
|
||||
for (CpuIndex c = 0; c < SYSTEM_THREADS_NB; ++c)
|
||||
cpus.insert(c);
|
||||
};
|
||||
|
||||
// cpu_set_t by default holds 1024 entries. This may not be enough soon,
|
||||
// but there is no easy way to determine how many threads there actually is.
|
||||
// In this case we just choose a reasonable upper bound.
|
||||
// but there is no easy way to determine how many threads there actually
|
||||
// is. In this case we just choose a reasonable upper bound.
|
||||
static constexpr CpuIndex MaxNumCpus = 1024 * 64;
|
||||
|
||||
cpu_set_t* mask = CPU_ALLOC(MaxNumCpus);
|
||||
|
@ -437,19 +445,19 @@ class NumaReplicatedAccessToken {
|
|||
NumaIndex n;
|
||||
};
|
||||
|
||||
// Designed as immutable, because there is no good reason to alter an already existing config
|
||||
// in a way that doesn't require recreating it completely, and it would be complex and expensive
|
||||
// to maintain class invariants.
|
||||
// The CPU (processor) numbers always correspond to the actual numbering used by the system.
|
||||
// The NUMA node numbers MAY NOT correspond to the system's numbering of the NUMA nodes.
|
||||
// In particular, empty nodes may be removed, or the user may create custom nodes.
|
||||
// It is guaranteed that NUMA nodes are NOT empty, i.e. every node exposed by NumaConfig
|
||||
// has at least one processor assigned.
|
||||
// Designed as immutable, because there is no good reason to alter an already
|
||||
// existing config in a way that doesn't require recreating it completely, and
|
||||
// it would be complex and expensive to maintain class invariants.
|
||||
// The CPU (processor) numbers always correspond to the actual numbering used
|
||||
// by the system. The NUMA node numbers MAY NOT correspond to the system's
|
||||
// numbering of the NUMA nodes. In particular, empty nodes may be removed, or
|
||||
// the user may create custom nodes. It is guaranteed that NUMA nodes are NOT
|
||||
// empty: every node exposed by NumaConfig has at least one processor assigned.
|
||||
//
|
||||
// We use startup affinities so as not to modify its own behaviour in time.
|
||||
//
|
||||
// Until Stockfish doesn't support exceptions all places where an exception should be thrown
|
||||
// are replaced by std::exit.
|
||||
// Since Stockfish doesn't support exceptions all places where an exception
|
||||
// should be thrown are replaced by std::exit.
|
||||
class NumaConfig {
|
||||
public:
|
||||
NumaConfig() :
|
||||
|
@ -460,9 +468,9 @@ class NumaConfig {
|
|||
}
|
||||
|
||||
// This function queries the system for the mapping of processors to NUMA nodes.
|
||||
// On Linux we read from standardized kernel sysfs, with a fallback to single NUMA node.
|
||||
// On Windows we utilize GetNumaProcessorNodeEx, which has its quirks, see
|
||||
// comment for Windows implementation of get_process_affinity
|
||||
// On Linux we read from standardized kernel sysfs, with a fallback to single NUMA
|
||||
// node. On Windows we utilize GetNumaProcessorNodeEx, which has its quirks, see
|
||||
// comment for Windows implementation of get_process_affinity.
|
||||
static NumaConfig from_system([[maybe_unused]] bool respectProcessAffinity = true) {
|
||||
NumaConfig cfg = empty();
|
||||
|
||||
|
@ -479,7 +487,6 @@ class NumaConfig {
|
|||
|
||||
// On Linux things are straightforward, since there's no processor groups and
|
||||
// any thread can be scheduled on all processors.
|
||||
|
||||
// We try to gather this information from the sysfs first
|
||||
// https://www.kernel.org/doc/Documentation/ABI/stable/sysfs-devices-node
|
||||
|
||||
|
@ -504,9 +511,9 @@ class NumaConfig {
|
|||
std::string path =
|
||||
std::string("/sys/devices/system/node/node") + std::to_string(n) + "/cpulist";
|
||||
auto cpuIdsStr = read_file_to_string(path);
|
||||
// Now, we only bail if the file does not exist. Some nodes may be empty, that's fine.
|
||||
// An empty node still has a file that appears to have some whitespace, so we need
|
||||
// to handle that.
|
||||
// Now, we only bail if the file does not exist. Some nodes may be
|
||||
// empty, that's fine. An empty node still has a file that appears
|
||||
// to have some whitespace, so we need to handle that.
|
||||
if (!cpuIdsStr.has_value())
|
||||
{
|
||||
fallback();
|
||||
|
@ -538,9 +545,10 @@ class NumaConfig {
|
|||
if (respectProcessAffinity)
|
||||
allowedCpus = STARTUP_PROCESSOR_AFFINITY.get_combined();
|
||||
|
||||
// The affinity can't be determined in all cases on Windows, but we at least guarantee
|
||||
// that the number of allowed processors is >= number of processors in the affinity mask.
|
||||
// In case the user is not satisfied they must set the processor numbers explicitly.
|
||||
// The affinity cannot be determined in all cases on Windows,
|
||||
// but we at least guarantee that the number of allowed processors
|
||||
// is >= number of processors in the affinity mask. In case the user
|
||||
// is not satisfied they must set the processor numbers explicitly.
|
||||
auto is_cpu_allowed = [&allowedCpus](CpuIndex c) {
|
||||
return !allowedCpus.has_value() || allowedCpus->count(c) == 1;
|
||||
};
|
||||
|
@ -711,21 +719,22 @@ class NumaConfig {
|
|||
}
|
||||
|
||||
bool suggests_binding_threads(CpuIndex numThreads) const {
|
||||
// If we can reasonably determine that the threads can't be contained
|
||||
// If we can reasonably determine that the threads cannot be contained
|
||||
// by the OS within the first NUMA node then we advise distributing
|
||||
// and binding threads. When the threads are not bound we can only use
|
||||
// NUMA memory replicated objects from the first node, so when the OS
|
||||
// has to schedule on other nodes we lose performance.
|
||||
// We also suggest binding if there's enough threads to distribute among nodes
|
||||
// with minimal disparity.
|
||||
// We try to ignore small nodes, in particular the empty ones.
|
||||
// has to schedule on other nodes we lose performance. We also suggest
|
||||
// binding if there's enough threads to distribute among nodes with minimal
|
||||
// disparity. We try to ignore small nodes, in particular the empty ones.
|
||||
|
||||
// If the affinity set by the user does not match the affinity given by the OS
|
||||
// then binding is necessary to ensure the threads are running on correct processors.
|
||||
// If the affinity set by the user does not match the affinity given by
|
||||
// the OS then binding is necessary to ensure the threads are running on
|
||||
// correct processors.
|
||||
if (customAffinity)
|
||||
return true;
|
||||
|
||||
// We obviously can't distribute a single thread, so a single thread should never be bound.
|
||||
// We obviously cannot distribute a single thread, so a single thread
|
||||
// should never be bound.
|
||||
if (numThreads <= 1)
|
||||
return false;
|
||||
|
||||
|
@ -754,8 +763,8 @@ class NumaConfig {
|
|||
|
||||
if (nodes.size() == 1)
|
||||
{
|
||||
// special case for when there's no NUMA nodes
|
||||
// doesn't buy us much, but let's keep the default path simple
|
||||
// Special case for when there's no NUMA nodes. This doesn't buy us
|
||||
// much, but let's keep the default path simple.
|
||||
ns.resize(numThreads, NumaIndex{0});
|
||||
}
|
||||
else
|
||||
|
@ -769,9 +778,11 @@ class NumaConfig {
|
|||
{
|
||||
float fill =
|
||||
static_cast<float>(occupation[n] + 1) / static_cast<float>(nodes[n].size());
|
||||
// NOTE: Do we want to perhaps fill the first available node up to 50% first before considering other nodes?
|
||||
// Probably not, because it would interfere with running multiple instances. We basically shouldn't
|
||||
// favor any particular node.
|
||||
// NOTE: Do we want to perhaps fill the first available node
|
||||
// up to 50% first before considering other nodes?
|
||||
// Probably not, because it would interfere with running
|
||||
// multiple instances. We basically shouldn't favor any
|
||||
// particular node.
|
||||
if (fill < bestNodeFill)
|
||||
{
|
||||
bestNode = n;
|
||||
|
@ -816,18 +827,19 @@ class NumaConfig {
|
|||
|
||||
#elif defined(_WIN64)
|
||||
|
||||
// Requires Windows 11. No good way to set thread affinity spanning processor groups before that.
|
||||
// Requires Windows 11. No good way to set thread affinity spanning
|
||||
// processor groups before that.
|
||||
HMODULE k32 = GetModuleHandle(TEXT("Kernel32.dll"));
|
||||
auto SetThreadSelectedCpuSetMasks_f = SetThreadSelectedCpuSetMasks_t(
|
||||
(void (*)()) GetProcAddress(k32, "SetThreadSelectedCpuSetMasks"));
|
||||
|
||||
// We ALWAYS set affinity with the new API if available,
|
||||
// because there's no downsides, and we forcibly keep it consistent
|
||||
// with the old API should we need to use it. I.e. we always keep this as a superset
|
||||
// of what we set with SetThreadGroupAffinity.
|
||||
// We ALWAYS set affinity with the new API if available, because
|
||||
// there's no downsides, and we forcibly keep it consistent with
|
||||
// the old API should we need to use it. I.e. we always keep this
|
||||
// as a superset of what we set with SetThreadGroupAffinity.
|
||||
if (SetThreadSelectedCpuSetMasks_f != nullptr)
|
||||
{
|
||||
// Only available on Windows 11 and Windows Server 2022 onwards.
|
||||
// Only available on Windows 11 and Windows Server 2022 onwards
|
||||
const USHORT numProcGroups = USHORT(
|
||||
((highestCpuIndex + 1) + WIN_PROCESSOR_GROUP_SIZE - 1) / WIN_PROCESSOR_GROUP_SIZE);
|
||||
auto groupAffinities = std::make_unique<GROUP_AFFINITY[]>(numProcGroups);
|
||||
|
@ -857,22 +869,25 @@ class NumaConfig {
|
|||
// Sometimes we need to force the old API, but do not use it unless necessary.
|
||||
if (SetThreadSelectedCpuSetMasks_f == nullptr || STARTUP_USE_OLD_AFFINITY_API)
|
||||
{
|
||||
// On earlier windows version (since windows 7) we can't run a single thread
|
||||
// On earlier windows version (since windows 7) we cannot run a single thread
|
||||
// on multiple processor groups, so we need to restrict the group.
|
||||
// We assume the group of the first processor listed for this node.
|
||||
// Processors from outside this group will not be assigned for this thread.
|
||||
// Normally this won't be an issue because windows used to assign NUMA nodes
|
||||
// such that they can't span processor groups. However, since Windows 10 Build 20348
|
||||
// the behaviour changed, so there's a small window of versions between this and Windows 11
|
||||
// that might exhibit problems with not all processors being utilized.
|
||||
// We handle this in NumaConfig::from_system by manually splitting the nodes when
|
||||
// we detect that there's no function to set affinity spanning processor nodes.
|
||||
// This is required because otherwise our thread distribution code may produce
|
||||
// suboptimal results.
|
||||
// such that they cannot span processor groups. However, since Windows 10
|
||||
// Build 20348 the behaviour changed, so there's a small window of versions
|
||||
// between this and Windows 11 that might exhibit problems with not all
|
||||
// processors being utilized.
|
||||
//
|
||||
// We handle this in NumaConfig::from_system by manually splitting the
|
||||
// nodes when we detect that there is no function to set affinity spanning
|
||||
// processor nodes. This is required because otherwise our thread distribution
|
||||
// code may produce suboptimal results.
|
||||
//
|
||||
// See https://learn.microsoft.com/en-us/windows/win32/procthread/numa-support
|
||||
GROUP_AFFINITY affinity;
|
||||
std::memset(&affinity, 0, sizeof(GROUP_AFFINITY));
|
||||
// We use an ordered set so we're guaranteed to get the smallest cpu number here.
|
||||
// We use an ordered set to be sure to get the smallest cpu number here.
|
||||
const size_t forcedProcGroupIndex = *(nodes[n].begin()) / WIN_PROCESSOR_GROUP_SIZE;
|
||||
affinity.Group = static_cast<WORD>(forcedProcGroupIndex);
|
||||
for (CpuIndex c : nodes[n])
|
||||
|
@ -894,8 +909,8 @@ class NumaConfig {
|
|||
if (status == 0)
|
||||
std::exit(EXIT_FAILURE);
|
||||
|
||||
// We yield this thread just to be sure it gets rescheduled.
|
||||
// This is defensive, allowed because this code is not performance critical.
|
||||
// We yield this thread just to be sure it gets rescheduled. This is
|
||||
// defensive, allowed because this code is not performance critical.
|
||||
SwitchToThread();
|
||||
}
|
||||
|
||||
|
@ -1013,8 +1028,8 @@ class NumaConfig {
|
|||
|
||||
class NumaReplicationContext;
|
||||
|
||||
// Instances of this class are tracked by the NumaReplicationContext instance
|
||||
// NumaReplicationContext informs all tracked instances whenever NUMA configuration changes.
|
||||
// Instances of this class are tracked by the NumaReplicationContext instance.
|
||||
// NumaReplicationContext informs all tracked instances when NUMA configuration changes.
|
||||
class NumaReplicatedBase {
|
||||
public:
|
||||
NumaReplicatedBase(NumaReplicationContext& ctx);
|
||||
|
@ -1034,9 +1049,9 @@ class NumaReplicatedBase {
|
|||
NumaReplicationContext* context;
|
||||
};
|
||||
|
||||
// We force boxing with a unique_ptr. If this becomes an issue due to added indirection we
|
||||
// may need to add an option for a custom boxing type.
|
||||
// When the NUMA config changes the value stored at the index 0 is replicated to other nodes.
|
||||
// We force boxing with a unique_ptr. If this becomes an issue due to added
|
||||
// indirection we may need to add an option for a custom boxing type. When the
|
||||
// NUMA config changes the value stored at the index 0 is replicated to other nodes.
|
||||
template<typename T>
|
||||
class NumaReplicated: public NumaReplicatedBase {
|
||||
public:
|
||||
|
@ -1090,8 +1105,8 @@ class NumaReplicated: public NumaReplicatedBase {
|
|||
}
|
||||
|
||||
void on_numa_config_changed() override {
|
||||
// Use the first one as the source. It doesn't matter which one we use, because they all must
|
||||
// be identical, but the first one is guaranteed to exist.
|
||||
// Use the first one as the source. It doesn't matter which one we use,
|
||||
// because they all must be identical, but the first one is guaranteed to exist.
|
||||
auto source = std::move(instances[0]);
|
||||
replicate_from(std::move(*source));
|
||||
}
|
||||
|
@ -1167,7 +1182,7 @@ class NumaReplicationContext {
|
|||
private:
|
||||
NumaConfig config;
|
||||
|
||||
// std::set uses std::less by default, which is required for pointer comparison to be defined.
|
||||
// std::set uses std::less by default, which is required for pointer comparison
|
||||
std::set<NumaReplicatedBase*> trackedReplicatedObjects;
|
||||
};
|
||||
|
||||
|
|
|
@ -315,8 +315,8 @@ inline bool Position::capture(Move m) const {
|
|||
}
|
||||
|
||||
// Returns true if a move is generated from the capture stage, having also
|
||||
// queen promotions covered, i.e. consistency with the capture stage move generation
|
||||
// is needed to avoid the generation of duplicate moves.
|
||||
// queen promotions covered, i.e. consistency with the capture stage move
|
||||
// generation is needed to avoid the generation of duplicate moves.
|
||||
inline bool Position::capture_stage(Move m) const {
|
||||
assert(m.is_ok());
|
||||
return capture(m) || m.promotion_type() == QUEEN;
|
||||
|
|
268
src/search.cpp
268
src/search.cpp
|
@ -78,7 +78,8 @@ constexpr int futility_move_count(bool improving, Depth depth) {
|
|||
return improving ? (3 + depth * depth) : (3 + depth * depth) / 2;
|
||||
}
|
||||
|
||||
// Add correctionHistory value to raw staticEval and guarantee evaluation does not hit the tablebase range
|
||||
// Add correctionHistory value to raw staticEval and guarantee evaluation
|
||||
// does not hit the tablebase range.
|
||||
Value to_corrected_static_eval(Value v, const Worker& w, const Position& pos) {
|
||||
auto cv = w.correctionHistory[pos.side_to_move()][pawn_structure_index<Correction>(pos)];
|
||||
v += cv * std::abs(cv) / 5073;
|
||||
|
@ -333,8 +334,8 @@ void Search::Worker::iterative_deepening() {
|
|||
int failedHighCnt = 0;
|
||||
while (true)
|
||||
{
|
||||
// Adjust the effective depth searched, but ensure at least one effective increment
|
||||
// for every four searchAgain steps (see issue #2717).
|
||||
// Adjust the effective depth searched, but ensure at least one
|
||||
// effective increment for every four searchAgain steps (see issue #2717).
|
||||
Depth adjustedDepth =
|
||||
std::max(1, rootDepth - failedHighCnt - 3 * (searchAgainCounter + 1) / 4);
|
||||
rootDelta = beta - alpha;
|
||||
|
@ -354,15 +355,15 @@ void Search::Worker::iterative_deepening() {
|
|||
if (threads.stop)
|
||||
break;
|
||||
|
||||
// When failing high/low give some update before a re-search.
|
||||
// To avoid excessive output that could hang GUIs like Fritz 19, only start
|
||||
// When failing high/low give some update before a re-search. To avoid
|
||||
// excessive output that could hang GUIs like Fritz 19, only start
|
||||
// at nodes > 10M (rather than depth N, which can be reached quickly)
|
||||
if (mainThread && multiPV == 1 && (bestValue <= alpha || bestValue >= beta)
|
||||
&& nodes > 10000000)
|
||||
main_manager()->pv(*this, threads, tt, rootDepth);
|
||||
|
||||
// In case of failing low/high increase aspiration window and
|
||||
// re-search, otherwise exit the loop.
|
||||
// In case of failing low/high increase aspiration window and re-search,
|
||||
// otherwise exit the loop.
|
||||
if (bestValue <= alpha)
|
||||
{
|
||||
beta = (alpha + beta) / 2;
|
||||
|
@ -390,10 +391,11 @@ void Search::Worker::iterative_deepening() {
|
|||
|
||||
if (mainThread
|
||||
&& (threads.stop || pvIdx + 1 == multiPV || nodes > 10000000)
|
||||
// A thread that aborted search can have mated-in/TB-loss PV and score
|
||||
// that cannot be trusted, i.e. it can be delayed or refuted if we would have
|
||||
// had time to fully search other root-moves. Thus we suppress this output and
|
||||
// below pick a proven score/PV for this thread (from the previous iteration).
|
||||
// A thread that aborted search can have mated-in/TB-loss PV and
|
||||
// score that cannot be trusted, i.e. it can be delayed or refuted
|
||||
// if we would have had time to fully search other root-moves. Thus
|
||||
// we suppress this output and below pick a proven score/PV for this
|
||||
// thread (from the previous iteration).
|
||||
&& !(threads.abortedSearch && rootMoves[0].uciScore <= VALUE_TB_LOSS_IN_MAX_PLY))
|
||||
main_manager()->pv(*this, threads, tt, rootDepth);
|
||||
|
||||
|
@ -504,6 +506,7 @@ void Search::Worker::iterative_deepening() {
|
|||
skill.best ? skill.best : skill.pick_best(rootMoves, multiPV)));
|
||||
}
|
||||
|
||||
// Reset histories, usually before a new game
|
||||
void Search::Worker::clear() {
|
||||
mainHistory.fill(0);
|
||||
captureHistory.fill(-700);
|
||||
|
@ -523,7 +526,7 @@ void Search::Worker::clear() {
|
|||
}
|
||||
|
||||
|
||||
// Main search function for both PV and non-PV nodes.
|
||||
// Main search function for both PV and non-PV nodes
|
||||
template<NodeType nodeType>
|
||||
Value Search::Worker::search(
|
||||
Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, bool cutNode) {
|
||||
|
@ -538,7 +541,7 @@ Value Search::Worker::search(
|
|||
// Limit the depth if extensions made it too large
|
||||
depth = std::min(depth, MAX_PLY - 1);
|
||||
|
||||
// Check if we have an upcoming move that draws by repetition.
|
||||
// Check if we have an upcoming move that draws by repetition
|
||||
if (!rootNode && alpha < VALUE_DRAW && pos.upcoming_repetition(ss->ply))
|
||||
{
|
||||
alpha = value_draw(this->nodes);
|
||||
|
@ -611,7 +614,7 @@ Value Search::Worker::search(
|
|||
Square prevSq = ((ss - 1)->currentMove).is_ok() ? ((ss - 1)->currentMove).to_sq() : SQ_NONE;
|
||||
ss->statScore = 0;
|
||||
|
||||
// Step 4. Transposition table lookup.
|
||||
// Step 4. Transposition table lookup
|
||||
excludedMove = ss->excludedMove;
|
||||
posKey = pos.key();
|
||||
auto [ttHit, ttData, ttWriter] = tt.probe(posKey);
|
||||
|
@ -676,7 +679,7 @@ Value Search::Worker::search(
|
|||
|
||||
Value tbValue = VALUE_TB - ss->ply;
|
||||
|
||||
// use the range VALUE_TB to VALUE_TB_WIN_IN_MAX_PLY to score
|
||||
// Use the range VALUE_TB to VALUE_TB_WIN_IN_MAX_PLY to score
|
||||
value = wdl < -drawScore ? -tbValue
|
||||
: wdl > drawScore ? tbValue
|
||||
: VALUE_DRAW + 2 * wdl * drawScore;
|
||||
|
@ -771,8 +774,8 @@ Value Search::Worker::search(
|
|||
opponentWorsening = ss->staticEval + (ss - 1)->staticEval > 2;
|
||||
|
||||
// Step 7. Razoring (~1 Elo)
|
||||
// If eval is really low check with qsearch if it can exceed alpha, if it can't,
|
||||
// return a fail low.
|
||||
// If eval is really low, check with qsearch if we can exceed alpha. If the
|
||||
// search suggests we cannot exceed alpha, return a speculative fail low.
|
||||
if (eval < alpha - 494 - 290 * depth * depth)
|
||||
{
|
||||
value = qsearch<NonPV>(pos, ss, alpha - 1, alpha);
|
||||
|
@ -836,27 +839,26 @@ Value Search::Worker::search(
|
|||
if (PvNode && !ttData.move)
|
||||
depth -= 3;
|
||||
|
||||
// Use qsearch if depth <= 0.
|
||||
// Use qsearch if depth <= 0
|
||||
if (depth <= 0)
|
||||
return qsearch<PV>(pos, ss, alpha, beta);
|
||||
|
||||
// For cutNodes, if depth is high enough, decrease depth by 2 if there is no ttMove, or
|
||||
// by 1 if there is a ttMove with an upper bound.
|
||||
// For cutNodes, if depth is high enough, decrease depth by 2 if there is no ttMove,
|
||||
// or by 1 if there is a ttMove with an upper bound.
|
||||
if (cutNode && depth >= 7 && (!ttData.move || ttData.bound == BOUND_UPPER))
|
||||
depth -= 1 + !ttData.move;
|
||||
|
||||
// Step 11. ProbCut (~10 Elo)
|
||||
// If we have a good enough capture (or queen promotion) and a reduced search returns a value
|
||||
// much above beta, we can (almost) safely prune the previous move.
|
||||
// If we have a good enough capture (or queen promotion) and a reduced search
|
||||
// returns a value much above beta, we can (almost) safely prune the previous move.
|
||||
probCutBeta = beta + 184 - 53 * improving;
|
||||
if (
|
||||
!PvNode && depth > 3
|
||||
&& std::abs(beta) < VALUE_TB_WIN_IN_MAX_PLY
|
||||
// If value from transposition table is lower than probCutBeta, don't attempt probCut
|
||||
// there and in further interactions with transposition table cutoff depth is set to depth - 3
|
||||
// because probCut search has depth set to depth - 4 but we also do a move before it
|
||||
// So effective depth is equal to depth - 3
|
||||
&& !(ttData.depth >= depth - 3 && ttData.value != VALUE_NONE && ttData.value < probCutBeta))
|
||||
if (!PvNode && depth > 3
|
||||
&& std::abs(beta) < VALUE_TB_WIN_IN_MAX_PLY
|
||||
// If value from transposition table is lower than probCutBeta, don't attempt
|
||||
// probCut there and in further interactions with transposition table cutoff
|
||||
// depth is set to depth - 3 because probCut search has depth set to depth - 4
|
||||
// but we also do a move before it. So effective depth is equal to depth - 3.
|
||||
&& !(ttData.depth >= depth - 3 && ttData.value != VALUE_NONE && ttData.value < probCutBeta))
|
||||
{
|
||||
assert(probCutBeta < VALUE_INFINITE && probCutBeta > beta);
|
||||
|
||||
|
@ -870,7 +872,6 @@ Value Search::Worker::search(
|
|||
if (move == excludedMove)
|
||||
continue;
|
||||
|
||||
// Check for legality
|
||||
if (!pos.legal(move))
|
||||
continue;
|
||||
|
||||
|
@ -1050,18 +1051,18 @@ moves_loop: // When in check, search starts here
|
|||
// We take care to not overdo to avoid search getting stuck.
|
||||
if (ss->ply < thisThread->rootDepth * 2)
|
||||
{
|
||||
// Singular extension search (~76 Elo, ~170 nElo). If all moves but one fail
|
||||
// low on a search of (alpha-s, beta-s), and just one fails high on (alpha, beta),
|
||||
// then that move is singular and should be extended. To verify this we do
|
||||
// a reduced search on the position excluding the ttMove and if the result
|
||||
// is lower than ttValue minus a margin, then we will extend the ttMove.
|
||||
// Recursive singular search is avoided.
|
||||
// Singular extension search (~76 Elo, ~170 nElo). If all moves but one
|
||||
// fail low on a search of (alpha-s, beta-s), and just one fails high on
|
||||
// (alpha, beta), then that move is singular and should be extended. To
|
||||
// verify this we do a reduced search on the position excluding the ttMove
|
||||
// and if the result is lower than ttValue minus a margin, then we will
|
||||
// extend the ttMove. Recursive singular search is avoided.
|
||||
|
||||
// Note: the depth margin and singularBeta margin are known for having non-linear
|
||||
// scaling. Their values are optimized to time controls of 180+1.8 and longer
|
||||
// so changing them requires tests at these types of time controls.
|
||||
// Generally, higher singularBeta (i.e closer to ttValue) and lower extension
|
||||
// margins scale well.
|
||||
// Note: the depth margin and singularBeta margin are known for having
|
||||
// non-linear scaling. Their values are optimized to time controls of
|
||||
// 180+1.8 and longer so changing them requires tests at these types of
|
||||
// time controls. Generally, higher singularBeta (i.e closer to ttValue)
|
||||
// and lower extension margins scale well.
|
||||
|
||||
if (!rootNode && move == ttData.move && !excludedMove
|
||||
&& depth >= 4 - (thisThread->completedDepth > 36) + ss->ttPv
|
||||
|
@ -1089,28 +1090,31 @@ moves_loop: // When in check, search starts here
|
|||
|
||||
// Multi-cut pruning
|
||||
// Our ttMove is assumed to fail high based on the bound of the TT entry,
|
||||
// and if after excluding the ttMove with a reduced search we fail high over the original beta,
|
||||
// we assume this expected cut-node is not singular (multiple moves fail high),
|
||||
// and we can prune the whole subtree by returning a softbound.
|
||||
// and if after excluding the ttMove with a reduced search we fail high
|
||||
// over the original beta, we assume this expected cut-node is not
|
||||
// singular (multiple moves fail high), and we can prune the whole
|
||||
// subtree by returning a softbound.
|
||||
else if (value >= beta && std::abs(value) < VALUE_TB_WIN_IN_MAX_PLY)
|
||||
return value;
|
||||
|
||||
// Negative extensions
|
||||
// If other moves failed high over (ttValue - margin) without the ttMove on a reduced search,
|
||||
// but we cannot do multi-cut because (ttValue - margin) is lower than the original beta,
|
||||
// we do not know if the ttMove is singular or can do a multi-cut,
|
||||
// so we reduce the ttMove in favor of other moves based on some conditions:
|
||||
// If other moves failed high over (ttValue - margin) without the
|
||||
// ttMove on a reduced search, but we cannot do multi-cut because
|
||||
// (ttValue - margin) is lower than the original beta, we do not know
|
||||
// if the ttMove is singular or can do a multi-cut, so we reduce the
|
||||
// ttMove in favor of other moves based on some conditions:
|
||||
|
||||
// If the ttMove is assumed to fail high over current beta (~7 Elo)
|
||||
else if (ttData.value >= beta)
|
||||
extension = -3;
|
||||
|
||||
// If we are on a cutNode but the ttMove is not assumed to fail high over current beta (~1 Elo)
|
||||
// If we are on a cutNode but the ttMove is not assumed to fail high
|
||||
// over current beta (~1 Elo)
|
||||
else if (cutNode)
|
||||
extension = -2;
|
||||
}
|
||||
|
||||
// Extension for capturing the previous moved piece (~0 Elo on STC, ~1 Elo on LTC)
|
||||
// Extension for capturing the previous moved piece (~1 Elo at LTC)
|
||||
else if (PvNode && move.to_sq() == prevSq
|
||||
&& thisThread->captureHistory[movedPiece][move.to_sq()]
|
||||
[type_of(pos.piece_on(move.to_sq()))]
|
||||
|
@ -1136,9 +1140,9 @@ moves_loop: // When in check, search starts here
|
|||
pos.do_move(move, st, givesCheck);
|
||||
|
||||
// These reduction adjustments have proven non-linear scaling.
|
||||
// They are optimized to time controls of 180 + 1.8 and longer so
|
||||
// changing them or adding conditions that are similar
|
||||
// requires tests at these types of time controls.
|
||||
// They are optimized to time controls of 180 + 1.8 and longer,
|
||||
// so changing them or adding conditions that are similar requires
|
||||
// tests at these types of time controls.
|
||||
|
||||
// Decrease reduction if position is or has been on the PV (~7 Elo)
|
||||
if (ss->ttPv)
|
||||
|
@ -1148,7 +1152,7 @@ moves_loop: // When in check, search starts here
|
|||
if (PvNode)
|
||||
r--;
|
||||
|
||||
// These reduction adjustments have no proven non-linear scaling.
|
||||
// These reduction adjustments have no proven non-linear scaling
|
||||
|
||||
// Increase reduction for cut nodes (~4 Elo)
|
||||
if (cutNode)
|
||||
|
@ -1163,8 +1167,8 @@ moves_loop: // When in check, search starts here
|
|||
if ((ss + 1)->cutoffCnt > 3)
|
||||
r += 1 + !(PvNode || cutNode);
|
||||
|
||||
// For first picked move (ttMove) reduce reduction
|
||||
// but never allow it to go below 0 (~3 Elo)
|
||||
// For first picked move (ttMove) reduce reduction, but never allow
|
||||
// reduction to go below 0 (~3 Elo)
|
||||
else if (move == ttData.move)
|
||||
r = std::max(0, r - 2);
|
||||
|
||||
|
@ -1190,8 +1194,8 @@ moves_loop: // When in check, search starts here
|
|||
// Do a full-depth search when reduced LMR search fails high
|
||||
if (value > alpha && d < newDepth)
|
||||
{
|
||||
// Adjust full-depth search based on LMR results - if the result
|
||||
// was good enough search deeper, if it was bad enough search shallower.
|
||||
// Adjust full-depth search based on LMR results - if the result was
|
||||
// good enough search deeper, if it was bad enough search shallower.
|
||||
const bool doDeeperSearch = value > (bestValue + 35 + 2 * newDepth); // (~1 Elo)
|
||||
const bool doShallowerSearch = value < bestValue + newDepth; // (~2 Elo)
|
||||
|
||||
|
@ -1237,8 +1241,8 @@ moves_loop: // When in check, search starts here
|
|||
|
||||
// Step 20. Check for a new best move
|
||||
// Finished searching the move. If a stop occurred, the return value of
|
||||
// the search cannot be trusted, and we return immediately without
|
||||
// updating best move, PV and TT.
|
||||
// the search cannot be trusted, and we return immediately without updating
|
||||
// best move, principal variation nor transposition table.
|
||||
if (threads.stop.load(std::memory_order_relaxed))
|
||||
return VALUE_ZERO;
|
||||
|
||||
|
@ -1351,7 +1355,8 @@ moves_loop: // When in check, search starts here
|
|||
if (!moveCount)
|
||||
bestValue = excludedMove ? alpha : ss->inCheck ? mated_in(ss->ply) : VALUE_DRAW;
|
||||
|
||||
// If there is a move that produces search value greater than alpha we update the stats of searched moves
|
||||
// If there is a move that produces search value greater than alpha,
|
||||
// we update the stats of searched moves.
|
||||
else if (bestMove)
|
||||
update_all_stats(pos, ss, *this, bestMove, prevSq, quietsSearched, quietCount,
|
||||
capturesSearched, captureCount, depth);
|
||||
|
@ -1385,8 +1390,8 @@ moves_loop: // When in check, search starts here
|
|||
if (bestValue <= alpha)
|
||||
ss->ttPv = ss->ttPv || ((ss - 1)->ttPv && depth > 3);
|
||||
|
||||
// Write gathered information in transposition table
|
||||
// Static evaluation is saved as it was before correction history
|
||||
// Write gathered information in transposition table. Note that the
|
||||
// static evaluation is saved as it was before correction history.
|
||||
if (!excludedMove && !(rootNode && thisThread->pvIdx))
|
||||
ttWriter.write(posKey, value_to_tt(bestValue, ss->ply), ss->ttPv,
|
||||
bestValue >= beta ? BOUND_LOWER
|
||||
|
@ -1410,12 +1415,12 @@ moves_loop: // When in check, search starts here
|
|||
}
|
||||
|
||||
|
||||
// Quiescence search function, which is called by the main search function with zero depth, or
|
||||
// recursively with further decreasing depth per call. With depth <= 0, we "should" be using
|
||||
// static eval only, but tactical moves may confuse the static eval. To fight this horizon effect,
|
||||
// we implement this qsearch of tactical moves only.
|
||||
// See https://www.chessprogramming.org/Horizon_Effect and https://www.chessprogramming.org/Quiescence_Search
|
||||
// (~155 Elo)
|
||||
// Quiescence search function, which is called by the main search function with
|
||||
// depth zero, or recursively with further decreasing depth. With depth <= 0, we
|
||||
// "should" be using static eval only, but tactical moves may confuse the static eval.
|
||||
// To fight this horizon effect, we implement this qsearch of tactical moves (~155 Elo).
|
||||
// See https://www.chessprogramming.org/Horizon_Effect
|
||||
// and https://www.chessprogramming.org/Quiescence_Search
|
||||
template<NodeType nodeType>
|
||||
Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth) {
|
||||
|
||||
|
@ -1426,7 +1431,7 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta,
|
|||
assert(PvNode || (alpha == beta - 1));
|
||||
assert(depth <= 0);
|
||||
|
||||
// Check if we have an upcoming move that draws by repetition. (~1 Elo)
|
||||
// Check if we have an upcoming move that draws by repetition (~1 Elo)
|
||||
if (alpha < VALUE_DRAW && pos.upcoming_repetition(ss->ply))
|
||||
{
|
||||
alpha = value_draw(this->nodes);
|
||||
|
@ -1469,9 +1474,10 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta,
|
|||
|
||||
assert(0 <= ss->ply && ss->ply < MAX_PLY);
|
||||
|
||||
// Note that unlike regular search, which stores the literal depth into the TT, from QS we
|
||||
// only store the current movegen stage as "depth". If in check, we search all evasions and
|
||||
// thus store DEPTH_QS_CHECKS. (Evasions may be quiet, and _CHECKS includes quiets.)
|
||||
// Note that unlike regular search, which stores the literal depth into the
|
||||
// transposition table, from qsearch we only store the current movegen stage
|
||||
// as "depth". If in check, we search all evasions and thus store DEPTH_QS_CHECKS.
|
||||
// Evasions may be quiet, and _CHECKS includes quiets.
|
||||
Depth qsTtDepth = ss->inCheck || depth >= DEPTH_QS_CHECKS ? DEPTH_QS_CHECKS : DEPTH_QS_NORMAL;
|
||||
|
||||
// Step 3. Transposition table lookup
|
||||
|
@ -1512,7 +1518,7 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta,
|
|||
}
|
||||
else
|
||||
{
|
||||
// In case of null move search, use previous static eval with a different sign
|
||||
// In case of null move search, use previous static eval with opposite sign
|
||||
unadjustedStaticEval =
|
||||
(ss - 1)->currentMove != Move::null()
|
||||
? evaluate(networks[numaAccessToken], pos, refreshTable, thisThread->optimism[us])
|
||||
|
@ -1542,21 +1548,20 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta,
|
|||
const PieceToHistory* contHist[] = {(ss - 1)->continuationHistory,
|
||||
(ss - 2)->continuationHistory};
|
||||
|
||||
// Initialize a MovePicker object for the current position, and prepare to search the moves.
|
||||
// We presently use two stages of qs movegen, first captures+checks, then captures only.
|
||||
// (When in check, we simply search all evasions.)
|
||||
// (Presently, having the checks stage is worth only 1 Elo, and may be removable in the near future,
|
||||
// which would result in only a single stage of QS movegen.)
|
||||
// Initialize a MovePicker object for the current position, and prepare to search
|
||||
// the moves. We presently use two stages of move generator in quiescence search:
|
||||
// first captures+checks, then captures only (but when in check, we simply search
|
||||
// all evasions).
|
||||
Square prevSq = ((ss - 1)->currentMove).is_ok() ? ((ss - 1)->currentMove).to_sq() : SQ_NONE;
|
||||
MovePicker mp(pos, ttData.move, depth, &thisThread->mainHistory, &thisThread->captureHistory,
|
||||
contHist, &thisThread->pawnHistory);
|
||||
|
||||
// Step 5. Loop through all pseudo-legal moves until no moves remain or a beta cutoff occurs.
|
||||
// Step 5. Loop through all pseudo-legal moves until no moves remain or a beta
|
||||
// cutoff occurs.
|
||||
while ((move = mp.next_move()) != Move::none())
|
||||
{
|
||||
assert(move.is_ok());
|
||||
|
||||
// Check for legality
|
||||
if (!pos.legal(move))
|
||||
continue;
|
||||
|
||||
|
@ -1577,24 +1582,24 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta,
|
|||
|
||||
Value futilityValue = futilityBase + PieceValue[pos.piece_on(move.to_sq())];
|
||||
|
||||
// If static eval + value of piece we are going to capture is much lower
|
||||
// than alpha we can prune this move. (~2 Elo)
|
||||
// If static eval + value of piece we are going to capture is
|
||||
// much lower than alpha, we can prune this move. (~2 Elo)
|
||||
if (futilityValue <= alpha)
|
||||
{
|
||||
bestValue = std::max(bestValue, futilityValue);
|
||||
continue;
|
||||
}
|
||||
|
||||
// If static eval is much lower than alpha and move is not winning material
|
||||
// we can prune this move. (~2 Elo)
|
||||
// If static eval is much lower than alpha and move is
|
||||
// not winning material, we can prune this move. (~2 Elo)
|
||||
if (futilityBase <= alpha && !pos.see_ge(move, 1))
|
||||
{
|
||||
bestValue = std::max(bestValue, futilityBase);
|
||||
continue;
|
||||
}
|
||||
|
||||
// If static exchange evaluation is much worse than what is needed to not
|
||||
// fall below alpha we can prune this move.
|
||||
// If static exchange evaluation is much worse than what
|
||||
// is needed to not fall below alpha, we can prune this move.
|
||||
if (futilityBase > alpha && !pos.see_ge(move, (alpha - futilityBase) * 4))
|
||||
{
|
||||
bestValue = alpha;
|
||||
|
@ -1654,8 +1659,8 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta,
|
|||
}
|
||||
|
||||
// Step 9. Check for mate
|
||||
// All legal moves have been searched. A special case: if we're in check
|
||||
// and no legal moves were found, it is checkmate.
|
||||
// All legal moves have been searched. A special case: if we are
|
||||
// in check and no legal moves were found, it is checkmate.
|
||||
if (ss->inCheck && bestValue == -VALUE_INFINITE)
|
||||
{
|
||||
assert(!MoveList<LEGAL>(pos).size());
|
||||
|
@ -1665,8 +1670,8 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta,
|
|||
if (std::abs(bestValue) < VALUE_TB_WIN_IN_MAX_PLY && bestValue >= beta)
|
||||
bestValue = (3 * bestValue + beta) / 4;
|
||||
|
||||
// Save gathered info in transposition table
|
||||
// Static evaluation is saved as it was before adjustment by correction history
|
||||
// Save gathered info in transposition table. The static evaluation
|
||||
// is saved as it was before adjustment by correction history.
|
||||
ttWriter.write(posKey, value_to_tt(bestValue, ss->ply), pvHit,
|
||||
bestValue >= beta ? BOUND_LOWER : BOUND_UPPER, qsTtDepth, bestMove,
|
||||
unadjustedStaticEval, tt.generation());
|
||||
|
@ -1697,8 +1702,8 @@ TimePoint Search::Worker::elapsed_time() const { return main_manager()->tm.elaps
|
|||
|
||||
|
||||
namespace {
|
||||
// Adjusts a mate or TB score from "plies to mate from the root"
|
||||
// to "plies to mate from the current position". Standard scores are unchanged.
|
||||
// Adjusts a mate or TB score from "plies to mate from the root" to
|
||||
// "plies to mate from the current position". Standard scores are unchanged.
|
||||
// The function is called before storing a value in the transposition table.
|
||||
Value value_to_tt(Value v, int ply) {
|
||||
|
||||
|
@ -1707,11 +1712,11 @@ Value value_to_tt(Value v, int ply) {
|
|||
}
|
||||
|
||||
|
||||
// Inverse of value_to_tt(): it adjusts a mate or TB score
|
||||
// from the transposition table (which refers to the plies to mate/be mated from
|
||||
// current position) to "plies to mate/be mated (TB win/loss) from the root".
|
||||
// However, to avoid potentially false mate or TB scores related to the 50 moves rule
|
||||
// and the graph history interaction, we return the highest non-TB score instead.
|
||||
// Inverse of value_to_tt(): it adjusts a mate or TB score from the transposition
|
||||
// table (which refers to the plies to mate/be mated from current position) to
|
||||
// "plies to mate/be mated (TB win/loss) from the root". However, to avoid
|
||||
// potentially false mate or TB scores related to the 50 moves rule and the
|
||||
// graph history interaction, we return the highest non-TB score instead.
|
||||
Value value_from_tt(Value v, int ply, int r50c) {
|
||||
|
||||
if (v == VALUE_NONE)
|
||||
|
@ -1810,8 +1815,8 @@ void update_all_stats(const Position& pos,
|
|||
}
|
||||
|
||||
|
||||
// Updates histories of the move pairs formed
|
||||
// by moves at ply -1, -2, -3, -4, and -6 with current move.
|
||||
// Updates histories of the move pairs formed by moves
|
||||
// at ply -1, -2, -3, -4, and -6 with current move.
|
||||
void update_continuation_histories(Stack* ss, Piece pc, Square to, int bonus) {
|
||||
|
||||
bonus = bonus * 52 / 64;
|
||||
|
@ -1859,8 +1864,8 @@ void update_quiet_stats(
|
|||
|
||||
}
|
||||
|
||||
// When playing with strength handicap, choose the best move among a set of RootMoves
|
||||
// using a statistical rule dependent on 'level'. Idea by Heinz van Saanen.
|
||||
// When playing with strength handicap, choose the best move among a set of
|
||||
// RootMoves using a statistical rule dependent on 'level'. Idea by Heinz van Saanen.
|
||||
Move Skill::pick_best(const RootMoves& rootMoves, size_t multiPV) {
|
||||
static PRNG rng(now()); // PRNG sequence should be non-deterministic
|
||||
|
||||
|
@ -1891,8 +1896,8 @@ Move Skill::pick_best(const RootMoves& rootMoves, size_t multiPV) {
|
|||
}
|
||||
|
||||
|
||||
// Used to print debug info and, more importantly,
|
||||
// to detect when we are out of available time and thus stop the search.
|
||||
// Used to print debug info and, more importantly, to detect
|
||||
// when we are out of available time and thus stop the search.
|
||||
void SearchManager::check_time(Search::Worker& worker) {
|
||||
if (--callsCnt > 0)
|
||||
return;
|
||||
|
@ -1926,8 +1931,9 @@ void SearchManager::check_time(Search::Worker& worker) {
|
|||
}
|
||||
|
||||
// Used to correct and extend PVs for moves that have a TB (but not a mate) score.
|
||||
// Keeps the search based PV for as long as it is verified to maintain the game outcome, truncates afterwards.
|
||||
// Finally, extends to mate the PV, providing a possible continuation (but not a proven mating line).
|
||||
// Keeps the search based PV for as long as it is verified to maintain the game
|
||||
// outcome, truncates afterwards. Finally, extends to mate the PV, providing a
|
||||
// possible continuation (but not a proven mating line).
|
||||
void syzygy_extend_pv(const OptionsMap& options,
|
||||
const Search::LimitsType& limits,
|
||||
Position& pos,
|
||||
|
@ -1937,7 +1943,7 @@ void syzygy_extend_pv(const OptionsMap& options,
|
|||
auto t_start = std::chrono::steady_clock::now();
|
||||
int moveOverhead = int(options["Move Overhead"]);
|
||||
|
||||
// Do not use more than moveOverhead / 2 time, if time management is active.
|
||||
// Do not use more than moveOverhead / 2 time, if time management is active
|
||||
auto time_abort = [&t_start, &moveOverhead, &limits]() -> bool {
|
||||
auto t_end = std::chrono::steady_clock::now();
|
||||
return limits.use_time_management()
|
||||
|
@ -1968,7 +1974,7 @@ void syzygy_extend_pv(const OptionsMap& options,
|
|||
auto& st = sts.emplace_back();
|
||||
pos.do_move(pvMove, st);
|
||||
|
||||
// don't allow for repetitions or drawing moves along the PV in TB regime.
|
||||
// Do not allow for repetitions or drawing moves along the PV in TB regime
|
||||
if (config.rootInTB && pos.is_draw(ply))
|
||||
{
|
||||
pos.undo_move(pvMove);
|
||||
|
@ -1976,17 +1982,18 @@ void syzygy_extend_pv(const OptionsMap& options,
|
|||
break;
|
||||
}
|
||||
|
||||
// Full PV shown will thus be validated and end TB.
|
||||
// If we can't validate the full PV in time, we don't show it.
|
||||
// Full PV shown will thus be validated and end in TB.
|
||||
// If we cannot validate the full PV in time, we do not show it.
|
||||
if (config.rootInTB && time_abort())
|
||||
break;
|
||||
}
|
||||
|
||||
// resize the PV to the correct part
|
||||
// Resize the PV to the correct part
|
||||
rootMove.pv.resize(ply);
|
||||
|
||||
// Step 2, now extend the PV to mate, as if the user explores syzygy-tables.info using
|
||||
// top ranked moves (minimal DTZ), which gives optimal mates only for simple endgames e.g. KRvK
|
||||
// Step 2, now extend the PV to mate, as if the user explored syzygy-tables.info
|
||||
// using top ranked moves (minimal DTZ), which gives optimal mates only for simple
|
||||
// endgames e.g. KRvK.
|
||||
while (!pos.is_draw(0))
|
||||
{
|
||||
if (time_abort())
|
||||
|
@ -1998,8 +2005,8 @@ void syzygy_extend_pv(const OptionsMap& options,
|
|||
auto& rm = legalMoves.emplace_back(m);
|
||||
StateInfo tmpSI;
|
||||
pos.do_move(m, tmpSI);
|
||||
// Give a score of each move to break DTZ ties
|
||||
// restricting opponent mobility, but not giving the opponent a capture.
|
||||
// Give a score of each move to break DTZ ties restricting opponent mobility,
|
||||
// but not giving the opponent a capture.
|
||||
for (const auto& mOpp : MoveList<LEGAL>(pos))
|
||||
rm.tbRank -= pos.capture(mOpp) ? 100 : 1;
|
||||
pos.undo_move(m);
|
||||
|
@ -2009,16 +2016,16 @@ void syzygy_extend_pv(const OptionsMap& options,
|
|||
if (legalMoves.size() == 0)
|
||||
break;
|
||||
|
||||
// sort moves according to their above assigned rank,
|
||||
// Sort moves according to their above assigned rank.
|
||||
// This will break ties for moves with equal DTZ in rank_root_moves.
|
||||
std::stable_sort(
|
||||
legalMoves.begin(), legalMoves.end(),
|
||||
[](const Search::RootMove& a, const Search::RootMove& b) { return a.tbRank > b.tbRank; });
|
||||
|
||||
// The winning side tries to minimize DTZ, the losing side maximizes it.
|
||||
// The winning side tries to minimize DTZ, the losing side maximizes it
|
||||
Tablebases::Config config = Tablebases::rank_root_moves(options, pos, legalMoves, true);
|
||||
|
||||
// If DTZ is not available we might not find a mate, so we bail out.
|
||||
// If DTZ is not available we might not find a mate, so we bail out
|
||||
if (!config.rootInTB || config.cardinality > 0)
|
||||
break;
|
||||
|
||||
|
@ -2030,23 +2037,24 @@ void syzygy_extend_pv(const OptionsMap& options,
|
|||
pos.do_move(pvMove, st);
|
||||
}
|
||||
|
||||
// Finding a draw in this function is an exceptional case, that cannot happen during engine game play,
|
||||
// since we have a winning score, and play correctly with TB support.
|
||||
// However, it can be that a position is draw due to the 50 move rule if it has been been reached
|
||||
// on the board with a non-optimal 50 move counter e.g. 8/8/6k1/3B4/3K4/4N3/8/8 w - - 54 106
|
||||
// which TB with dtz counter rounding cannot always correctly rank. See also
|
||||
// Finding a draw in this function is an exceptional case, that cannot happen
|
||||
// during engine game play, since we have a winning score, and play correctly
|
||||
// with TB support. However, it can be that a position is draw due to the 50 move
|
||||
// rule if it has been been reached on the board with a non-optimal 50 move counter
|
||||
// (e.g. 8/8/6k1/3B4/3K4/4N3/8/8 w - - 54 106 ) which TB with dtz counter rounding
|
||||
// cannot always correctly rank. See also
|
||||
// https://github.com/official-stockfish/Stockfish/issues/5175#issuecomment-2058893495
|
||||
// We adjust the score to match the found PV. Note that a TB loss score can be displayed
|
||||
// if the engine did not find a drawing move yet, but eventually search will figure it out.
|
||||
// E.g. 1kq5/q2r4/5K2/8/8/8/8/7Q w - - 96 1
|
||||
// We adjust the score to match the found PV. Note that a TB loss score can be
|
||||
// displayed if the engine did not find a drawing move yet, but eventually search
|
||||
// will figure it out (e.g. 1kq5/q2r4/5K2/8/8/8/8/7Q w - - 96 1 )
|
||||
if (pos.is_draw(0))
|
||||
v = VALUE_DRAW;
|
||||
|
||||
// Undo the PV moves.
|
||||
// Undo the PV moves
|
||||
for (auto it = rootMove.pv.rbegin(); it != rootMove.pv.rend(); ++it)
|
||||
pos.undo_move(*it);
|
||||
|
||||
// Inform if we couldn't get a full extension in time.
|
||||
// Inform if we couldn't get a full extension in time
|
||||
if (time_abort())
|
||||
sync_cout
|
||||
<< "info string Syzygy based PV extension requires more time, increase Move Overhead as needed."
|
||||
|
@ -2092,7 +2100,7 @@ void SearchManager::pv(Search::Worker& worker,
|
|||
for (Move m : rootMoves[i].pv)
|
||||
pv += UCIEngine::move(m, pos.is_chess960()) + " ";
|
||||
|
||||
// remove last whitespace
|
||||
// Remove last whitespace
|
||||
if (!pv.empty())
|
||||
pv.pop_back();
|
||||
|
||||
|
|
|
@ -236,8 +236,8 @@ class Worker {
|
|||
public:
|
||||
Worker(SharedState&, std::unique_ptr<ISearchManager>, size_t, NumaReplicatedAccessToken);
|
||||
|
||||
// Called at instantiation to initialize Reductions tables
|
||||
// Reset histories, usually before a new game
|
||||
// Called at instantiation to initialize reductions tables.
|
||||
// Reset histories, usually before a new game.
|
||||
void clear();
|
||||
|
||||
// Called when the program receives the UCI 'go' command.
|
||||
|
@ -256,7 +256,7 @@ class Worker {
|
|||
private:
|
||||
void iterative_deepening();
|
||||
|
||||
// Main search function for both PV and non-PV nodes
|
||||
// This is the main search function, for both PV and non-PV nodes
|
||||
template<NodeType nodeType>
|
||||
Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, bool cutNode);
|
||||
|
||||
|
@ -266,8 +266,7 @@ class Worker {
|
|||
|
||||
Depth reduction(bool i, Depth d, int mn, int delta) const;
|
||||
|
||||
// Get a pointer to the search manager, only allowed to be called by the
|
||||
// main thread.
|
||||
// Pointer to the search manager, only allowed to be called by the main thread
|
||||
SearchManager* main_manager() const {
|
||||
assert(threadIdx == 0);
|
||||
return static_cast<SearchManager*>(manager.get());
|
||||
|
|
|
@ -50,8 +50,8 @@ Thread::Thread(Search::SharedState& sharedState,
|
|||
|
||||
run_custom_job([this, &binder, &sharedState, &sm, n]() {
|
||||
// Use the binder to [maybe] bind the threads to a NUMA node before doing
|
||||
// the Worker allocation.
|
||||
// Ideally we would also allocate the SearchManager here, but that's minor.
|
||||
// the Worker allocation. Ideally we would also allocate the SearchManager
|
||||
// here, but that's minor.
|
||||
this->numaAccessToken = binder();
|
||||
this->worker =
|
||||
std::make_unique<Search::Worker>(sharedState, std::move(sm), n, this->numaAccessToken);
|
||||
|
@ -72,27 +72,26 @@ Thread::~Thread() {
|
|||
stdThread.join();
|
||||
}
|
||||
|
||||
|
||||
// Wakes up the thread that will start the search
|
||||
void Thread::start_searching() {
|
||||
assert(worker != nullptr);
|
||||
run_custom_job([this]() { worker->start_searching(); });
|
||||
}
|
||||
|
||||
// Wakes up the thread that will start the search
|
||||
// Clears the histories for the thread worker (usually before a new game)
|
||||
void Thread::clear_worker() {
|
||||
assert(worker != nullptr);
|
||||
run_custom_job([this]() { worker->clear(); });
|
||||
}
|
||||
|
||||
// Blocks on the condition variable
|
||||
// until the thread has finished searching.
|
||||
// Blocks on the condition variable until the thread has finished searching
|
||||
void Thread::wait_for_search_finished() {
|
||||
|
||||
std::unique_lock<std::mutex> lk(mutex);
|
||||
cv.wait(lk, [&] { return !searching; });
|
||||
}
|
||||
|
||||
// Launching a function in the thread
|
||||
void Thread::run_custom_job(std::function<void()> f) {
|
||||
{
|
||||
std::unique_lock<std::mutex> lk(mutex);
|
||||
|
@ -103,8 +102,8 @@ void Thread::run_custom_job(std::function<void()> f) {
|
|||
cv.notify_one();
|
||||
}
|
||||
|
||||
// Thread gets parked here, blocked on the
|
||||
// condition variable, when it has no work to do.
|
||||
// Thread gets parked here, blocked on the condition variable
|
||||
// when the thread has no work to do.
|
||||
|
||||
void Thread::idle_loop() {
|
||||
while (true)
|
||||
|
@ -233,8 +232,9 @@ void ThreadPool::wait_on_thread(size_t threadId) {
|
|||
|
||||
size_t ThreadPool::num_threads() const { return threads.size(); }
|
||||
|
||||
// Wakes up main thread waiting in idle_loop() and
|
||||
// returns immediately. Main thread will wake up other threads and start the search.
|
||||
|
||||
// Wakes up main thread waiting in idle_loop() and returns immediately.
|
||||
// Main thread will wake up other threads and start the search.
|
||||
void ThreadPool::start_thinking(const OptionsMap& options,
|
||||
Position& pos,
|
||||
StateListPtr& states,
|
||||
|
@ -274,8 +274,8 @@ void ThreadPool::start_thinking(const OptionsMap& options,
|
|||
// We use Position::set() to set root position across threads. But there are
|
||||
// some StateInfo fields (previous, pliesFromNull, capturedPiece) that cannot
|
||||
// be deduced from a fen string, so set() clears them and they are set from
|
||||
// setupStates->back() later. The rootState is per thread, earlier states are shared
|
||||
// since they are read-only.
|
||||
// setupStates->back() later. The rootState is per thread, earlier states are
|
||||
// shared since they are read-only.
|
||||
for (auto&& th : threads)
|
||||
{
|
||||
th->run_custom_job([&]() {
|
||||
|
@ -335,7 +335,7 @@ Thread* ThreadPool::get_best_thread() const {
|
|||
const bool newThreadInProvenLoss =
|
||||
newThreadScore != -VALUE_INFINITE && newThreadScore <= VALUE_TB_LOSS_IN_MAX_PLY;
|
||||
|
||||
// Note that we make sure not to pick a thread with truncated-PV for better viewer experience.
|
||||
// We make sure not to pick a thread with truncated principal variation
|
||||
const bool betterVotingValue =
|
||||
thread_voting_value(th.get()) * int(newThreadPV.size() > 2)
|
||||
> thread_voting_value(bestThread) * int(bestThreadPV.size() > 2);
|
||||
|
@ -363,8 +363,8 @@ Thread* ThreadPool::get_best_thread() const {
|
|||
}
|
||||
|
||||
|
||||
// Start non-main threads
|
||||
// Will be invoked by main thread after it has started searching
|
||||
// Start non-main threads.
|
||||
// Will be invoked by main thread after it has started searching.
|
||||
void ThreadPool::start_searching() {
|
||||
|
||||
for (auto&& th : threads)
|
||||
|
@ -374,7 +374,6 @@ void ThreadPool::start_searching() {
|
|||
|
||||
|
||||
// Wait for non-main threads
|
||||
|
||||
void ThreadPool::wait_for_search_finished() const {
|
||||
|
||||
for (auto&& th : threads)
|
||||
|
|
30
src/types.h
30
src/types.h
|
@ -137,9 +137,9 @@ enum Bound {
|
|||
BOUND_EXACT = BOUND_UPPER | BOUND_LOWER
|
||||
};
|
||||
|
||||
// Value is used as an alias for int16_t, this is done to differentiate between
|
||||
// a search value and any other integer value. The values used in search are always
|
||||
// supposed to be in the range (-VALUE_NONE, VALUE_NONE] and should not exceed this range.
|
||||
// Value is used as an alias for int, this is done to differentiate between a search
|
||||
// value and any other integer value. The values used in search are always supposed
|
||||
// to be in the range (-VALUE_NONE, VALUE_NONE] and should not exceed this range.
|
||||
using Value = int;
|
||||
|
||||
constexpr Value VALUE_ZERO = 0;
|
||||
|
@ -187,15 +187,20 @@ constexpr Value PieceValue[PIECE_NB] = {
|
|||
using Depth = int;
|
||||
|
||||
enum : int {
|
||||
// The following DEPTH_ constants are used for TT entries and QS movegen stages. In regular search,
|
||||
// TT depth is literal: the search depth (effort) used to make the corresponding TT value.
|
||||
// In qsearch, however, TT entries only store the current QS movegen stage (which should thus compare
|
||||
// The following DEPTH_ constants are used for transposition table entries
|
||||
// and quiescence search move generation stages. In regular search, the
|
||||
// depth stored in the transposition table is literal: the search depth
|
||||
// (effort) used to make the corresponding transposition table value. In
|
||||
// quiescence search, however, the transposition table entries only store
|
||||
// the current quiescence move generation stage (which should thus compare
|
||||
// lower than any regular search depth).
|
||||
DEPTH_QS_CHECKS = 0,
|
||||
DEPTH_QS_NORMAL = -1,
|
||||
// For TT entries where no searching at all was done (whether regular or qsearch) we use
|
||||
// _UNSEARCHED, which should thus compare lower than any QS or regular depth. _ENTRY_OFFSET is used
|
||||
// only for the TT entry occupancy check (see tt.cpp), and should thus be lower than _UNSEARCHED.
|
||||
// For transposition table entries where no searching at all was done
|
||||
// (whether regular or qsearch) we use DEPTH_UNSEARCHED, which should thus
|
||||
// compare lower than any quiescence or regular depth. DEPTH_ENTRY_OFFSET
|
||||
// is used only for the transposition table entry occupancy check (see tt.cpp),
|
||||
// and should thus be lower than DEPTH_UNSEARCHED.
|
||||
DEPTH_UNSEARCHED = -2,
|
||||
DEPTH_ENTRY_OFFSET = -3
|
||||
};
|
||||
|
@ -356,9 +361,10 @@ enum MoveType {
|
|||
// bit 14-15: special move flag: promotion (1), en passant (2), castling (3)
|
||||
// NOTE: en passant bit is set only when a pawn can be captured
|
||||
//
|
||||
// Special cases are Move::none() and Move::null(). We can sneak these in because in
|
||||
// any normal move destination square is always different from origin square
|
||||
// while Move::none() and Move::null() have the same origin and destination square.
|
||||
// Special cases are Move::none() and Move::null(). We can sneak these in because
|
||||
// in any normal move the destination square and origin square are always different,
|
||||
// but Move::none() and Move::null() have the same origin and destination square.
|
||||
|
||||
class Move {
|
||||
public:
|
||||
Move() = default;
|
||||
|
|
Loading…
Add table
Reference in a new issue