/* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include "thread.h" #include #include #include #include #include #include #include #include "movegen.h" #include "search.h" #include "syzygy/tbprobe.h" #include "timeman.h" #include "types.h" #include "uci.h" #include "ucioption.h" namespace Stockfish { // Constructor launches the thread and waits until it goes to sleep // in idle_loop(). Note that 'searching' and 'exit' should be already set. Thread::Thread(Search::SharedState& sharedState, std::unique_ptr sm, size_t n, OptionalThreadToNumaNodeBinder binder) : idx(n), nthreads(sharedState.options["Threads"]), stdThread(&Thread::idle_loop, this) { wait_for_search_finished(); run_custom_job([this, &binder, &sharedState, &sm, n]() { // Use the binder to [maybe] bind the threads to a NUMA node before doing // the Worker allocation. // Ideally we would also allocate the SearchManager here, but that's minor. this->numaAccessToken = binder(); this->worker = std::make_unique(sharedState, std::move(sm), n, this->numaAccessToken); }); wait_for_search_finished(); } // Destructor wakes up the thread in idle_loop() and waits // for its termination. Thread should be already waiting. Thread::~Thread() { assert(!searching); exit = true; start_searching(); stdThread.join(); } // Wakes up the thread that will start the search void Thread::start_searching() { assert(worker != nullptr); run_custom_job([this]() { worker->start_searching(); }); } // Wakes up the thread that will start the search void Thread::clear_worker() { assert(worker != nullptr); run_custom_job([this]() { worker->clear(); }); } // Blocks on the condition variable // until the thread has finished searching. void Thread::wait_for_search_finished() { std::unique_lock lk(mutex); cv.wait(lk, [&] { return !searching; }); } void Thread::run_custom_job(std::function f) { { std::unique_lock lk(mutex); cv.wait(lk, [&] { return !searching; }); jobFunc = std::move(f); searching = true; } cv.notify_one(); } // Thread gets parked here, blocked on the // condition variable, when it has no work to do. void Thread::idle_loop() { while (true) { std::unique_lock lk(mutex); searching = false; cv.notify_one(); // Wake up anyone waiting for search finished cv.wait(lk, [&] { return searching; }); if (exit) return; std::function job = std::move(jobFunc); jobFunc = nullptr; lk.unlock(); if (job) job(); } } Search::SearchManager* ThreadPool::main_manager() { return static_cast(main_thread()->worker.get()->manager.get()); } uint64_t ThreadPool::nodes_searched() const { return accumulate(&Search::Worker::nodes); } uint64_t ThreadPool::tb_hits() const { return accumulate(&Search::Worker::tbHits); } // Creates/destroys threads to match the requested number. // Created and launched threads will immediately go to sleep in idle_loop. // Upon resizing, threads are recreated to allow for binding if necessary. void ThreadPool::set(const NumaConfig& numaConfig, Search::SharedState sharedState, const Search::SearchManager::UpdateContext& updateContext) { if (threads.size() > 0) // destroy any existing thread(s) { main_thread()->wait_for_search_finished(); threads.clear(); boundThreadToNumaNode.clear(); } const size_t requested = sharedState.options["Threads"]; if (requested > 0) // create new thread(s) { // Binding threads may be problematic when there's multiple NUMA nodes and // multiple Stockfish instances running. In particular, if each instance // runs a single thread then they would all be mapped to the first NUMA node. // This is undesirable, and so the default behaviour (i.e. when the user does not // change the NumaConfig UCI setting) is to not bind the threads to processors // unless we know for sure that we span NUMA nodes and replication is required. const std::string numaPolicy(sharedState.options["NumaPolicy"]); const bool doBindThreads = [&]() { if (numaPolicy == "none") return false; if (numaPolicy == "auto") return numaConfig.suggests_binding_threads(requested); // numaPolicy == "system", or explicitly set by the user return true; }(); boundThreadToNumaNode = doBindThreads ? numaConfig.distribute_threads_among_numa_nodes(requested) : std::vector{}; while (threads.size() < requested) { const size_t threadId = threads.size(); const NumaIndex numaId = doBindThreads ? boundThreadToNumaNode[threadId] : 0; auto manager = threadId == 0 ? std::unique_ptr( std::make_unique(updateContext)) : std::make_unique(); // When not binding threads we want to force all access to happen // from the same NUMA node, because in case of NUMA replicated memory // accesses we don't want to trash cache in case the threads get scheduled // on the same NUMA node. auto binder = doBindThreads ? OptionalThreadToNumaNodeBinder(numaConfig, numaId) : OptionalThreadToNumaNodeBinder(numaId); threads.emplace_back( std::make_unique(sharedState, std::move(manager), threadId, binder)); } clear(); main_thread()->wait_for_search_finished(); } } // Sets threadPool data to initial values void ThreadPool::clear() { if (threads.size() == 0) return; for (auto&& th : threads) th->clear_worker(); for (auto&& th : threads) th->wait_for_search_finished(); main_manager()->callsCnt = 0; main_manager()->bestPreviousScore = VALUE_INFINITE; main_manager()->bestPreviousAverageScore = VALUE_INFINITE; main_manager()->originalPly = -1; main_manager()->originalTimeAdjust = -1; main_manager()->previousTimeReduction = 1.0; main_manager()->tm.clear(); } void ThreadPool::run_on_thread(size_t threadId, std::function f) { assert(threads.size() > threadId); threads[threadId]->run_custom_job(std::move(f)); } void ThreadPool::wait_on_thread(size_t threadId) { assert(threads.size() > threadId); threads[threadId]->wait_for_search_finished(); } size_t ThreadPool::num_threads() const { return threads.size(); } // Wakes up main thread waiting in idle_loop() and // returns immediately. Main thread will wake up other threads and start the search. void ThreadPool::start_thinking(const OptionsMap& options, Position& pos, StateListPtr& states, Search::LimitsType limits) { main_thread()->wait_for_search_finished(); main_manager()->stopOnPonderhit = stop = abortedSearch = false; main_manager()->ponder = limits.ponderMode; increaseDepth = true; Search::RootMoves rootMoves; const auto legalmoves = MoveList(pos); for (const auto& uciMove : limits.searchmoves) { auto move = UCIEngine::to_move(pos, uciMove); if (std::find(legalmoves.begin(), legalmoves.end(), move) != legalmoves.end()) rootMoves.emplace_back(move); } if (rootMoves.empty()) for (const auto& m : legalmoves) rootMoves.emplace_back(m); Tablebases::Config tbConfig = Tablebases::rank_root_moves(options, pos, rootMoves); // After ownership transfer 'states' becomes empty, so if we stop the search // and call 'go' again without setting a new position states.get() == nullptr. assert(states.get() || setupStates.get()); if (states.get()) setupStates = std::move(states); // Ownership transfer, states is now empty // We use Position::set() to set root position across threads. But there are // some StateInfo fields (previous, pliesFromNull, capturedPiece) that cannot // be deduced from a fen string, so set() clears them and they are set from // setupStates->back() later. The rootState is per thread, earlier states are shared // since they are read-only. for (auto&& th : threads) { th->run_custom_job([&]() { th->worker->limits = limits; th->worker->nodes = th->worker->tbHits = th->worker->nmpMinPly = th->worker->bestMoveChanges = 0; th->worker->rootDepth = th->worker->completedDepth = 0; th->worker->rootMoves = rootMoves; th->worker->rootPos.set(pos.fen(), pos.is_chess960(), &th->worker->rootState); th->worker->rootState = setupStates->back(); th->worker->tbConfig = tbConfig; }); } for (auto&& th : threads) th->wait_for_search_finished(); main_thread()->start_searching(); } Thread* ThreadPool::get_best_thread() const { Thread* bestThread = threads.front().get(); Value minScore = VALUE_NONE; std::unordered_map votes( 2 * std::min(size(), bestThread->worker->rootMoves.size())); // Find the minimum score of all threads for (auto&& th : threads) minScore = std::min(minScore, th->worker->rootMoves[0].score); // Vote according to score and depth, and select the best thread auto thread_voting_value = [minScore](Thread* th) { return (th->worker->rootMoves[0].score - minScore + 14) * int(th->worker->completedDepth); }; for (auto&& th : threads) votes[th->worker->rootMoves[0].pv[0]] += thread_voting_value(th.get()); for (auto&& th : threads) { const auto bestThreadScore = bestThread->worker->rootMoves[0].score; const auto newThreadScore = th->worker->rootMoves[0].score; const auto& bestThreadPV = bestThread->worker->rootMoves[0].pv; const auto& newThreadPV = th->worker->rootMoves[0].pv; const auto bestThreadMoveVote = votes[bestThreadPV[0]]; const auto newThreadMoveVote = votes[newThreadPV[0]]; const bool bestThreadInProvenWin = bestThreadScore >= VALUE_TB_WIN_IN_MAX_PLY; const bool newThreadInProvenWin = newThreadScore >= VALUE_TB_WIN_IN_MAX_PLY; const bool bestThreadInProvenLoss = bestThreadScore != -VALUE_INFINITE && bestThreadScore <= VALUE_TB_LOSS_IN_MAX_PLY; const bool newThreadInProvenLoss = newThreadScore != -VALUE_INFINITE && newThreadScore <= VALUE_TB_LOSS_IN_MAX_PLY; // Note that we make sure not to pick a thread with truncated-PV for better viewer experience. const bool betterVotingValue = thread_voting_value(th.get()) * int(newThreadPV.size() > 2) > thread_voting_value(bestThread) * int(bestThreadPV.size() > 2); if (bestThreadInProvenWin) { // Make sure we pick the shortest mate / TB conversion if (newThreadScore > bestThreadScore) bestThread = th.get(); } else if (bestThreadInProvenLoss) { // Make sure we pick the shortest mated / TB conversion if (newThreadInProvenLoss && newThreadScore < bestThreadScore) bestThread = th.get(); } else if (newThreadInProvenWin || newThreadInProvenLoss || (newThreadScore > VALUE_TB_LOSS_IN_MAX_PLY && (newThreadMoveVote > bestThreadMoveVote || (newThreadMoveVote == bestThreadMoveVote && betterVotingValue)))) bestThread = th.get(); } return bestThread; } // Start non-main threads // Will be invoked by main thread after it has started searching void ThreadPool::start_searching() { for (auto&& th : threads) if (th != threads.front()) th->start_searching(); } // Wait for non-main threads void ThreadPool::wait_for_search_finished() const { for (auto&& th : threads) if (th != threads.front()) th->wait_for_search_finished(); } std::vector ThreadPool::get_bound_thread_count_by_numa_node() const { std::vector counts; if (!boundThreadToNumaNode.empty()) { NumaIndex highestNumaNode = 0; for (NumaIndex n : boundThreadToNumaNode) if (n > highestNumaNode) highestNumaNode = n; counts.resize(highestNumaNode + 1, 0); for (NumaIndex n : boundThreadToNumaNode) counts[n] += 1; } return counts; } } // namespace Stockfish