mirror of
https://github.com/sockspls/badfish
synced 2025-04-30 08:43:09 +00:00

On a system with multiple NUMA nodes, this patch avoids unneeded replicated (e.g. 8x for a single threaded run), reducting memory use in that case. Lazy initialization forced before search. Passed STC: https://tests.stockfishchess.org/tests/view/66a28c524ff211be9d4ecdd4 LLR: 2.96 (-2.94,2.94) <-1.75,0.25> Total: 691776 W: 179429 L: 179927 D: 332420 Ptnml(0-2): 2573, 79370, 182547, 78778, 2620 closes https://github.com/official-stockfish/Stockfish/pull/5515 No functional change
410 lines
14 KiB
C++
410 lines
14 KiB
C++
/*
|
|
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
|
Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file)
|
|
|
|
Stockfish is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
Stockfish is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#include "thread.h"
|
|
|
|
#include <algorithm>
|
|
#include <cassert>
|
|
#include <deque>
|
|
#include <memory>
|
|
#include <string>
|
|
#include <unordered_map>
|
|
#include <utility>
|
|
|
|
#include "movegen.h"
|
|
#include "search.h"
|
|
#include "syzygy/tbprobe.h"
|
|
#include "timeman.h"
|
|
#include "types.h"
|
|
#include "uci.h"
|
|
#include "ucioption.h"
|
|
|
|
namespace Stockfish {
|
|
|
|
// Constructor launches the thread and waits until it goes to sleep
|
|
// in idle_loop(). Note that 'searching' and 'exit' should be already set.
|
|
Thread::Thread(Search::SharedState& sharedState,
|
|
std::unique_ptr<Search::ISearchManager> sm,
|
|
size_t n,
|
|
OptionalThreadToNumaNodeBinder binder) :
|
|
idx(n),
|
|
nthreads(sharedState.options["Threads"]),
|
|
stdThread(&Thread::idle_loop, this) {
|
|
|
|
wait_for_search_finished();
|
|
|
|
run_custom_job([this, &binder, &sharedState, &sm, n]() {
|
|
// Use the binder to [maybe] bind the threads to a NUMA node before doing
|
|
// the Worker allocation. Ideally we would also allocate the SearchManager
|
|
// here, but that's minor.
|
|
this->numaAccessToken = binder();
|
|
this->worker =
|
|
std::make_unique<Search::Worker>(sharedState, std::move(sm), n, this->numaAccessToken);
|
|
});
|
|
|
|
wait_for_search_finished();
|
|
}
|
|
|
|
|
|
// Destructor wakes up the thread in idle_loop() and waits
|
|
// for its termination. Thread should be already waiting.
|
|
Thread::~Thread() {
|
|
|
|
assert(!searching);
|
|
|
|
exit = true;
|
|
start_searching();
|
|
stdThread.join();
|
|
}
|
|
|
|
// Wakes up the thread that will start the search
|
|
void Thread::start_searching() {
|
|
assert(worker != nullptr);
|
|
run_custom_job([this]() { worker->start_searching(); });
|
|
}
|
|
|
|
// Clears the histories for the thread worker (usually before a new game)
|
|
void Thread::clear_worker() {
|
|
assert(worker != nullptr);
|
|
run_custom_job([this]() { worker->clear(); });
|
|
}
|
|
|
|
// Blocks on the condition variable until the thread has finished searching
|
|
void Thread::wait_for_search_finished() {
|
|
|
|
std::unique_lock<std::mutex> lk(mutex);
|
|
cv.wait(lk, [&] { return !searching; });
|
|
}
|
|
|
|
// Launching a function in the thread
|
|
void Thread::run_custom_job(std::function<void()> f) {
|
|
{
|
|
std::unique_lock<std::mutex> lk(mutex);
|
|
cv.wait(lk, [&] { return !searching; });
|
|
jobFunc = std::move(f);
|
|
searching = true;
|
|
}
|
|
cv.notify_one();
|
|
}
|
|
|
|
void Thread::ensure_network_replicated() { worker->ensure_network_replicated(); }
|
|
|
|
// Thread gets parked here, blocked on the condition variable
|
|
// when the thread has no work to do.
|
|
|
|
void Thread::idle_loop() {
|
|
while (true)
|
|
{
|
|
std::unique_lock<std::mutex> lk(mutex);
|
|
searching = false;
|
|
cv.notify_one(); // Wake up anyone waiting for search finished
|
|
cv.wait(lk, [&] { return searching; });
|
|
|
|
if (exit)
|
|
return;
|
|
|
|
std::function<void()> job = std::move(jobFunc);
|
|
jobFunc = nullptr;
|
|
|
|
lk.unlock();
|
|
|
|
if (job)
|
|
job();
|
|
}
|
|
}
|
|
|
|
Search::SearchManager* ThreadPool::main_manager() { return main_thread()->worker->main_manager(); }
|
|
|
|
uint64_t ThreadPool::nodes_searched() const { return accumulate(&Search::Worker::nodes); }
|
|
uint64_t ThreadPool::tb_hits() const { return accumulate(&Search::Worker::tbHits); }
|
|
|
|
// Creates/destroys threads to match the requested number.
|
|
// Created and launched threads will immediately go to sleep in idle_loop.
|
|
// Upon resizing, threads are recreated to allow for binding if necessary.
|
|
void ThreadPool::set(const NumaConfig& numaConfig,
|
|
Search::SharedState sharedState,
|
|
const Search::SearchManager::UpdateContext& updateContext) {
|
|
|
|
if (threads.size() > 0) // destroy any existing thread(s)
|
|
{
|
|
main_thread()->wait_for_search_finished();
|
|
|
|
threads.clear();
|
|
|
|
boundThreadToNumaNode.clear();
|
|
}
|
|
|
|
const size_t requested = sharedState.options["Threads"];
|
|
|
|
if (requested > 0) // create new thread(s)
|
|
{
|
|
// Binding threads may be problematic when there's multiple NUMA nodes and
|
|
// multiple Stockfish instances running. In particular, if each instance
|
|
// runs a single thread then they would all be mapped to the first NUMA node.
|
|
// This is undesirable, and so the default behaviour (i.e. when the user does not
|
|
// change the NumaConfig UCI setting) is to not bind the threads to processors
|
|
// unless we know for sure that we span NUMA nodes and replication is required.
|
|
const std::string numaPolicy(sharedState.options["NumaPolicy"]);
|
|
const bool doBindThreads = [&]() {
|
|
if (numaPolicy == "none")
|
|
return false;
|
|
|
|
if (numaPolicy == "auto")
|
|
return numaConfig.suggests_binding_threads(requested);
|
|
|
|
// numaPolicy == "system", or explicitly set by the user
|
|
return true;
|
|
}();
|
|
|
|
boundThreadToNumaNode = doBindThreads
|
|
? numaConfig.distribute_threads_among_numa_nodes(requested)
|
|
: std::vector<NumaIndex>{};
|
|
|
|
while (threads.size() < requested)
|
|
{
|
|
const size_t threadId = threads.size();
|
|
const NumaIndex numaId = doBindThreads ? boundThreadToNumaNode[threadId] : 0;
|
|
auto manager = threadId == 0 ? std::unique_ptr<Search::ISearchManager>(
|
|
std::make_unique<Search::SearchManager>(updateContext))
|
|
: std::make_unique<Search::NullSearchManager>();
|
|
|
|
// When not binding threads we want to force all access to happen
|
|
// from the same NUMA node, because in case of NUMA replicated memory
|
|
// accesses we don't want to trash cache in case the threads get scheduled
|
|
// on the same NUMA node.
|
|
auto binder = doBindThreads ? OptionalThreadToNumaNodeBinder(numaConfig, numaId)
|
|
: OptionalThreadToNumaNodeBinder(numaId);
|
|
|
|
threads.emplace_back(
|
|
std::make_unique<Thread>(sharedState, std::move(manager), threadId, binder));
|
|
}
|
|
|
|
clear();
|
|
|
|
main_thread()->wait_for_search_finished();
|
|
}
|
|
}
|
|
|
|
|
|
// Sets threadPool data to initial values
|
|
void ThreadPool::clear() {
|
|
if (threads.size() == 0)
|
|
return;
|
|
|
|
for (auto&& th : threads)
|
|
th->clear_worker();
|
|
|
|
for (auto&& th : threads)
|
|
th->wait_for_search_finished();
|
|
|
|
// These two affect the time taken on the first move of a game:
|
|
main_manager()->bestPreviousAverageScore = VALUE_INFINITE;
|
|
main_manager()->previousTimeReduction = 0.85;
|
|
|
|
main_manager()->callsCnt = 0;
|
|
main_manager()->bestPreviousScore = VALUE_INFINITE;
|
|
main_manager()->originalTimeAdjust = -1;
|
|
main_manager()->tm.clear();
|
|
}
|
|
|
|
void ThreadPool::run_on_thread(size_t threadId, std::function<void()> f) {
|
|
assert(threads.size() > threadId);
|
|
threads[threadId]->run_custom_job(std::move(f));
|
|
}
|
|
|
|
void ThreadPool::wait_on_thread(size_t threadId) {
|
|
assert(threads.size() > threadId);
|
|
threads[threadId]->wait_for_search_finished();
|
|
}
|
|
|
|
size_t ThreadPool::num_threads() const { return threads.size(); }
|
|
|
|
|
|
// Wakes up main thread waiting in idle_loop() and returns immediately.
|
|
// Main thread will wake up other threads and start the search.
|
|
void ThreadPool::start_thinking(const OptionsMap& options,
|
|
Position& pos,
|
|
StateListPtr& states,
|
|
Search::LimitsType limits) {
|
|
|
|
main_thread()->wait_for_search_finished();
|
|
|
|
main_manager()->stopOnPonderhit = stop = abortedSearch = false;
|
|
main_manager()->ponder = limits.ponderMode;
|
|
|
|
increaseDepth = true;
|
|
|
|
Search::RootMoves rootMoves;
|
|
const auto legalmoves = MoveList<LEGAL>(pos);
|
|
|
|
for (const auto& uciMove : limits.searchmoves)
|
|
{
|
|
auto move = UCIEngine::to_move(pos, uciMove);
|
|
|
|
if (std::find(legalmoves.begin(), legalmoves.end(), move) != legalmoves.end())
|
|
rootMoves.emplace_back(move);
|
|
}
|
|
|
|
if (rootMoves.empty())
|
|
for (const auto& m : legalmoves)
|
|
rootMoves.emplace_back(m);
|
|
|
|
Tablebases::Config tbConfig = Tablebases::rank_root_moves(options, pos, rootMoves);
|
|
|
|
// After ownership transfer 'states' becomes empty, so if we stop the search
|
|
// and call 'go' again without setting a new position states.get() == nullptr.
|
|
assert(states.get() || setupStates.get());
|
|
|
|
if (states.get())
|
|
setupStates = std::move(states); // Ownership transfer, states is now empty
|
|
|
|
// We use Position::set() to set root position across threads. But there are
|
|
// some StateInfo fields (previous, pliesFromNull, capturedPiece) that cannot
|
|
// be deduced from a fen string, so set() clears them and they are set from
|
|
// setupStates->back() later. The rootState is per thread, earlier states are
|
|
// shared since they are read-only.
|
|
for (auto&& th : threads)
|
|
{
|
|
th->run_custom_job([&]() {
|
|
th->worker->limits = limits;
|
|
th->worker->nodes = th->worker->tbHits = th->worker->nmpMinPly =
|
|
th->worker->bestMoveChanges = 0;
|
|
th->worker->rootDepth = th->worker->completedDepth = 0;
|
|
th->worker->rootMoves = rootMoves;
|
|
th->worker->rootPos.set(pos.fen(), pos.is_chess960(), &th->worker->rootState);
|
|
th->worker->rootState = setupStates->back();
|
|
th->worker->tbConfig = tbConfig;
|
|
});
|
|
}
|
|
|
|
for (auto&& th : threads)
|
|
th->wait_for_search_finished();
|
|
|
|
main_thread()->start_searching();
|
|
}
|
|
|
|
Thread* ThreadPool::get_best_thread() const {
|
|
|
|
Thread* bestThread = threads.front().get();
|
|
Value minScore = VALUE_NONE;
|
|
|
|
std::unordered_map<Move, int64_t, Move::MoveHash> votes(
|
|
2 * std::min(size(), bestThread->worker->rootMoves.size()));
|
|
|
|
// Find the minimum score of all threads
|
|
for (auto&& th : threads)
|
|
minScore = std::min(minScore, th->worker->rootMoves[0].score);
|
|
|
|
// Vote according to score and depth, and select the best thread
|
|
auto thread_voting_value = [minScore](Thread* th) {
|
|
return (th->worker->rootMoves[0].score - minScore + 14) * int(th->worker->completedDepth);
|
|
};
|
|
|
|
for (auto&& th : threads)
|
|
votes[th->worker->rootMoves[0].pv[0]] += thread_voting_value(th.get());
|
|
|
|
for (auto&& th : threads)
|
|
{
|
|
const auto bestThreadScore = bestThread->worker->rootMoves[0].score;
|
|
const auto newThreadScore = th->worker->rootMoves[0].score;
|
|
|
|
const auto& bestThreadPV = bestThread->worker->rootMoves[0].pv;
|
|
const auto& newThreadPV = th->worker->rootMoves[0].pv;
|
|
|
|
const auto bestThreadMoveVote = votes[bestThreadPV[0]];
|
|
const auto newThreadMoveVote = votes[newThreadPV[0]];
|
|
|
|
const bool bestThreadInProvenWin = bestThreadScore >= VALUE_TB_WIN_IN_MAX_PLY;
|
|
const bool newThreadInProvenWin = newThreadScore >= VALUE_TB_WIN_IN_MAX_PLY;
|
|
|
|
const bool bestThreadInProvenLoss =
|
|
bestThreadScore != -VALUE_INFINITE && bestThreadScore <= VALUE_TB_LOSS_IN_MAX_PLY;
|
|
const bool newThreadInProvenLoss =
|
|
newThreadScore != -VALUE_INFINITE && newThreadScore <= VALUE_TB_LOSS_IN_MAX_PLY;
|
|
|
|
// We make sure not to pick a thread with truncated principal variation
|
|
const bool betterVotingValue =
|
|
thread_voting_value(th.get()) * int(newThreadPV.size() > 2)
|
|
> thread_voting_value(bestThread) * int(bestThreadPV.size() > 2);
|
|
|
|
if (bestThreadInProvenWin)
|
|
{
|
|
// Make sure we pick the shortest mate / TB conversion
|
|
if (newThreadScore > bestThreadScore)
|
|
bestThread = th.get();
|
|
}
|
|
else if (bestThreadInProvenLoss)
|
|
{
|
|
// Make sure we pick the shortest mated / TB conversion
|
|
if (newThreadInProvenLoss && newThreadScore < bestThreadScore)
|
|
bestThread = th.get();
|
|
}
|
|
else if (newThreadInProvenWin || newThreadInProvenLoss
|
|
|| (newThreadScore > VALUE_TB_LOSS_IN_MAX_PLY
|
|
&& (newThreadMoveVote > bestThreadMoveVote
|
|
|| (newThreadMoveVote == bestThreadMoveVote && betterVotingValue))))
|
|
bestThread = th.get();
|
|
}
|
|
|
|
return bestThread;
|
|
}
|
|
|
|
|
|
// Start non-main threads.
|
|
// Will be invoked by main thread after it has started searching.
|
|
void ThreadPool::start_searching() {
|
|
|
|
for (auto&& th : threads)
|
|
if (th != threads.front())
|
|
th->start_searching();
|
|
}
|
|
|
|
|
|
// Wait for non-main threads
|
|
void ThreadPool::wait_for_search_finished() const {
|
|
|
|
for (auto&& th : threads)
|
|
if (th != threads.front())
|
|
th->wait_for_search_finished();
|
|
}
|
|
|
|
std::vector<size_t> ThreadPool::get_bound_thread_count_by_numa_node() const {
|
|
std::vector<size_t> counts;
|
|
|
|
if (!boundThreadToNumaNode.empty())
|
|
{
|
|
NumaIndex highestNumaNode = 0;
|
|
for (NumaIndex n : boundThreadToNumaNode)
|
|
if (n > highestNumaNode)
|
|
highestNumaNode = n;
|
|
|
|
counts.resize(highestNumaNode + 1, 0);
|
|
|
|
for (NumaIndex n : boundThreadToNumaNode)
|
|
counts[n] += 1;
|
|
}
|
|
|
|
return counts;
|
|
}
|
|
|
|
void ThreadPool::ensure_network_replicated() {
|
|
for (auto&& th : threads)
|
|
th->ensure_network_replicated();
|
|
}
|
|
|
|
} // namespace Stockfish
|