From 2d668a3cfcf1a8c276f8e2fd4204a53bdc23227a Mon Sep 17 00:00:00 2001 From: mbootsector Date: Tue, 6 Oct 2015 08:15:17 +0200 Subject: [PATCH] Lazy smp Start all threads searching on root position and use only the shared TT table as synching scheme. It seems this scheme scales better than YBWC for high number of threads. Tested at very LTC (120+0.1) with 23 threads ELO: 35.52 +-9.6 (95%) LOS: 100.0% Total: 1109 W: 183 L: 70 D: 856 Tested at LTC with 23 threads ELO: 34.41 +-9.9 (95%) LOS: 100.0% Total: 1094 W: 184 L: 76 D: 834 Tested at LTC with 7 threads ELO: 8.76 +-5.0 (95%) LOS: 100.0% Total: 5000 W: 735 L: 609 D: 3656 Tested at STC with 7 threads ELO: 16.76 +-5.4 (95%) LOS: 100.0% Total: 5000 W: 899 L: 658 D: 3443 Bench: 8397672 --- src/benchmark.cpp | 2 +- src/movepick.cpp | 7 - src/search.cpp | 781 +++++++++++++++++++--------------------------- src/search.h | 2 - src/thread.cpp | 161 +--------- src/thread.h | 73 +---- src/timeman.h | 3 +- 7 files changed, 345 insertions(+), 684 deletions(-) diff --git a/src/benchmark.cpp b/src/benchmark.cpp index 5c4c4e36..c683a381 100644 --- a/src/benchmark.cpp +++ b/src/benchmark.cpp @@ -158,7 +158,7 @@ void benchmark(const Position& current, istream& is) { Search::StateStackPtr st; Threads.start_thinking(pos, limits, st); Threads.main()->join(); - nodes += Search::RootPos.nodes_searched(); + nodes += Threads.nodes_searched(); } } diff --git a/src/movepick.cpp b/src/movepick.cpp index 7cf3e607..1f01aaaf 100644 --- a/src/movepick.cpp +++ b/src/movepick.cpp @@ -320,10 +320,3 @@ Move MovePicker::next_move() { } } } - - -/// Version of next_move() to use at split point nodes where the move is grabbed -/// from the split point's shared MovePicker object. This function is not thread -/// safe so must be lock protected by the caller. -template<> -Move MovePicker::next_move() { return ss->splitPoint->movePicker->next_move(); } diff --git a/src/search.cpp b/src/search.cpp index 4988dc8e..61796406 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -39,9 +39,8 @@ namespace Search { volatile SignalsType Signals; LimitsType Limits; - RootMoveVector RootMoves; - Position RootPos; StateStackPtr SetupStates; + CounterMovesHistoryStats CounterMovesHistory; } namespace Tablebases { @@ -107,7 +106,7 @@ namespace { assert(newPv.size() >= 3); - // Keep track of how many times in a row 3rd ply remains stable + // Keep track of how many times in a row 3rd ply remains stable stableCnt = (newPv[2] == pv[2]) ? stableCnt + 1 : 0; if (!std::equal(newPv.begin(), newPv.begin() + 3, pv)) @@ -128,21 +127,17 @@ namespace { Move pv[3]; }; - size_t PVIdx; EasyMoveManager EasyMove; double BestMoveChanges; Value DrawValue[COLOR_NB]; - HistoryStats History; - CounterMovesHistoryStats CounterMovesHistory; - MovesStats Countermoves; - template + template Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, bool cutNode); template Value qsearch(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth); - void id_loop(Position& pos); + void id_loop(); Value value_to_tt(Value v, int ply); Value value_from_tt(Value v, int ply); void update_pv(Move* pv, Move move, Move* childPv); @@ -185,9 +180,12 @@ void Search::init() { void Search::reset () { TT.clear(); - History.clear(); CounterMovesHistory.clear(); - Countermoves.clear(); + + for (Thread* th : Threads) { + th->History.clear(); + th->Countermoves.clear(); + } } @@ -227,8 +225,9 @@ template uint64_t Search::perft(Position& pos, Depth depth); void Search::think() { - Color us = RootPos.side_to_move(); - Time.init(Limits, us, RootPos.game_ply(), now()); + MainThread* mth = Threads.main(); // Shorthand + Color us = mth->pos.side_to_move(); + Time.init(Limits, us, mth->pos.game_ply(), now()); int contempt = Options["Contempt"] * PawnValueEg / 100; // From centipawns DrawValue[ us] = VALUE_DRAW - Value(contempt); @@ -247,21 +246,21 @@ void Search::think() { TB::ProbeDepth = DEPTH_ZERO; } - if (RootMoves.empty()) + if (mth->rootMoves.empty()) { - RootMoves.push_back(RootMove(MOVE_NONE)); + mth->rootMoves.push_back(RootMove(MOVE_NONE)); sync_cout << "info depth 0 score " - << UCI::value(RootPos.checkers() ? -VALUE_MATE : VALUE_DRAW) + << UCI::value(mth->pos.checkers() ? -VALUE_MATE : VALUE_DRAW) << sync_endl; } else { - if (TB::Cardinality >= RootPos.count(WHITE) - + RootPos.count(BLACK)) + if (TB::Cardinality >= mth->pos.count(WHITE) + + mth->pos.count(BLACK)) { // If the current root position is in the tablebases then RootMoves // contains only moves that preserve the draw or win. - TB::RootInTB = Tablebases::root_probe(RootPos, RootMoves, TB::Score); + TB::RootInTB = Tablebases::root_probe(mth->pos, mth->rootMoves, TB::Score); if (TB::RootInTB) TB::Cardinality = 0; // Do not probe tablebases during the search @@ -269,7 +268,7 @@ void Search::think() { else // If DTZ tables are missing, use WDL tables as a fallback { // Filter out moves that do not preserve a draw or win - TB::RootInTB = Tablebases::root_probe_wdl(RootPos, RootMoves, TB::Score); + TB::RootInTB = Tablebases::root_probe_wdl(mth->pos, mth->rootMoves, TB::Score); // Only probe during search if winning if (TB::Score <= VALUE_DRAW) @@ -278,7 +277,7 @@ void Search::think() { if (TB::RootInTB) { - TB::Hits = RootMoves.size(); + TB::Hits = mth->rootMoves.size(); if (!TB::UseRule50) TB::Score = TB::Score > VALUE_DRAW ? VALUE_MATE - MAX_PLY - 1 @@ -287,16 +286,23 @@ void Search::think() { } } + // Prepare the threads. for (Thread* th : Threads) { th->maxPly = 0; + th->rootDepth = DEPTH_ZERO; + if (th != mth) { + Position pos(mth->pos, th); + th->pos = pos; + th->rootMoves = mth->rootMoves; + } th->notify_one(); // Wake up all the threads } Threads.timer->run = true; Threads.timer->notify_one(); // Start the recurring timer - id_loop(RootPos); // Let's start searching ! + id_loop(); // Let's start searching ! Threads.timer->run = false; } @@ -304,7 +310,7 @@ void Search::think() { // When playing in 'nodes as time' mode, subtract the searched nodes from // the available ones before to exit. if (Limits.npmsec) - Time.availableNodes += Limits.inc[us] - RootPos.nodes_searched(); + Time.availableNodes += Limits.inc[us] - Threads.nodes_searched(); // When we reach the maximum depth, we can arrive here without a raise of // Signals.stop. However, if we are pondering or in an infinite search, @@ -314,194 +320,265 @@ void Search::think() { if (!Signals.stop && (Limits.ponder || Limits.infinite)) { Signals.stopOnPonderhit = true; - RootPos.this_thread()->wait_for(Signals.stop); + mth->pos.this_thread()->wait_for(Signals.stop); } - sync_cout << "bestmove " << UCI::move(RootMoves[0].pv[0], RootPos.is_chess960()); + sync_cout << "bestmove " << UCI::move(mth->rootMoves[0].pv[0], mth->pos.is_chess960()); - if (RootMoves[0].pv.size() > 1 || RootMoves[0].extract_ponder_from_tt(RootPos)) - std::cout << " ponder " << UCI::move(RootMoves[0].pv[1], RootPos.is_chess960()); + if (mth->rootMoves[0].pv.size() > 1 || mth->rootMoves[0].extract_ponder_from_tt(mth->pos)) + std::cout << " ponder " << UCI::move(mth->rootMoves[0].pv[1], mth->pos.is_chess960()); std::cout << sync_endl; + } +// Thread::id_loop() is the main iterative deepening loop. It calls search() repeatedly +// with increasing depth until the allocated thinking time has been consumed, +// user stops the search, or the maximum search depth is reached. + +void Thread::id_loop() { + + Threads.main()->mutex.lock(); + Threads.main()->slavesMask.set(idx); + Threads.main()->mutex.unlock(); + + Value bestValue, alpha, beta, delta; + + Move easyMove = EasyMove.get(pos.key()); + EasyMove.clear(); + + Stack *ss = stack+2; // To allow referencing (ss-2) and (ss+2) + std::memset(stack, 0, 5 * sizeof(Stack)); + + bestValue = delta = alpha = -VALUE_INFINITE; + beta = VALUE_INFINITE; + + size_t multiPV = Options["MultiPV"]; + Skill skill(Options["Skill Level"]); + + // When playing with strength handicap enable MultiPV search that we will + // use behind the scenes to retrieve a set of possible moves. + if (skill.enabled()) + multiPV = std::max(multiPV, (size_t)4); + + multiPV = std::min(multiPV, rootMoves.size()); + + // Iterative deepening loop until requested to stop or target depth reached + while (true) + { + // Set up our new depth. + + // The main thread modifies other threads rootDepth, + // if it is <= main thread depth. The new depth will take effect + // after the other thread returns to id_loop(). + if (this == Threads.main()) { + ++rootDepth; + for (Thread* th : Threads) { + if (th != this) { + if (th->rootDepth <= rootDepth) { + th->rootDepth = rootDepth + (Depth)(((th->idx - 1) / 2) + 1); + } + } + } + } + else { + // This can cause a thread to search with the same depth for many iterations. + rootDepth = Threads.main()->rootDepth + (Depth)(((this->idx - 1) / 2) + 1); + } + + if (rootDepth >= DEPTH_MAX || Signals.stop || (Limits.depth && rootDepth > Limits.depth)) + break; + + // Age out PV variability metric + if (this == Threads.main()) + BestMoveChanges *= 0.5; + + // Save the last iteration's scores before first PV line is searched and + // all the move scores except the (new) PV are set to -VALUE_INFINITE. + for (RootMove& rm : rootMoves) + rm.previousScore = rm.score; + + // MultiPV loop. We perform a full root search for each PV line + for (PVIdx = 0; PVIdx < multiPV && !Signals.stop; ++PVIdx) + { + // Reset aspiration window starting size + if (rootDepth >= 5 * ONE_PLY) + { + delta = Value(16); + alpha = std::max(rootMoves[PVIdx].previousScore - delta,-VALUE_INFINITE); + beta = std::min(rootMoves[PVIdx].previousScore + delta, VALUE_INFINITE); + } +/* + sync_cout << "*T" << idx << " - I'm starting iteration " << rootDepth << "."; + std::cout << " We are searching "; + for (Thread* th : Threads) + std::cout << th->rootDepth << " "; + std::cout << "." << sync_endl; +*/ + // Start with a small aspiration window and, in the case of a fail + // high/low, re-search with a bigger window until we're not failing + // high/low anymore. + while (true) + { + bestValue = search(pos, ss, alpha, beta, rootDepth, false); + + // Bring the best move to the front. It is critical that sorting + // is done with a stable algorithm because all the values but the + // first and eventually the new best one are set to -VALUE_INFINITE + // and we want to keep the same order for all the moves except the + // new PV that goes to the front. Note that in case of MultiPV + // search the already searched PV lines are preserved. + + std::stable_sort(rootMoves.begin() + PVIdx, rootMoves.end()); + + // Write PV back to transposition table in case the relevant + // entries have been overwritten during the search. + for (size_t i = 0; i <= PVIdx; ++i) + rootMoves[i].insert_pv_in_tt(pos); + + // If search has been stopped break immediately. Sorting and + // writing PV back to TT is safe because RootMoves is still + // valid, although it refers to previous iteration. + if (Signals.stop) + break; + + if (this == Threads.main()) { + // When failing high/low give some update (without cluttering + // the UI) before a re-search. + if (multiPV == 1 + && (bestValue <= alpha || bestValue >= beta) + && Time.elapsed() > 3000) + sync_cout << UCI::pv(pos, rootDepth, alpha, beta) << sync_endl; + } + + // In case of failing low/high increase aspiration window and + // re-search, otherwise exit the loop. + if (bestValue <= alpha) + { + beta = (alpha + beta) / 2; + alpha = std::max(bestValue - delta, -VALUE_INFINITE); + if (this == Threads.main()) { + Signals.failedLowAtRoot = true; + Signals.stopOnPonderhit = false; + } + } + else if (bestValue >= beta) + { + alpha = (alpha + beta) / 2; + beta = std::min(bestValue + delta, VALUE_INFINITE); + } + else + break; + + delta += delta / 2; + + assert(alpha >= -VALUE_INFINITE && beta <= VALUE_INFINITE); + } + + // Sort the PV lines searched so far and update the GUI + std::stable_sort(rootMoves.begin(), rootMoves.begin() + PVIdx + 1); + + if (this != Threads.main()) + break; + + if (Signals.stop) + sync_cout << "info nodes " << Threads.nodes_searched() + << " time " << Time.elapsed() << sync_endl; + + else if (PVIdx + 1 == multiPV || Time.elapsed() > 3000) + sync_cout << UCI::pv(pos, rootDepth, alpha, beta) << sync_endl; + } + + if (this != Threads.main()) + continue; + + // If skill level is enabled and time is up, pick a sub-optimal best move + if (skill.enabled() && skill.time_to_pick(rootDepth)) + skill.pick_best(multiPV); + + // Have we found a "mate in x"? + if ( Limits.mate + && bestValue >= VALUE_MATE_IN_MAX_PLY + && VALUE_MATE - bestValue <= 2 * Limits.mate) + Signals.stop = true; + + // Do we have time for the next iteration? Can we stop searching now? + if (Limits.use_time_management()) + { + if (!Signals.stop && !Signals.stopOnPonderhit) + { + // Take some extra time if the best move has changed + if (rootDepth > 4 * ONE_PLY && multiPV == 1) + Time.pv_instability(BestMoveChanges); + + // Stop the search if only one legal move is available or all + // of the available time has been used or we matched an easyMove + // from the previous search and just did a fast verification. + if ( rootMoves.size() == 1 + || Time.elapsed() > Time.available() + || ( rootMoves[0].pv[0] == easyMove + && BestMoveChanges < 0.03 + && Time.elapsed() > Time.available() / 10)) + { + // If we are allowed to ponder do not stop the search now but + // keep pondering until the GUI sends "ponderhit" or "stop". + if (Limits.ponder) + Signals.stopOnPonderhit = true; + else + Signals.stop = true; + } + } + + if (rootMoves[0].pv.size() >= 3) + EasyMove.update(pos, rootMoves[0].pv); + else + EasyMove.clear(); + } + } + + searching = false; + + Threads.main()->mutex.lock(); + Threads.main()->slavesMask.reset(idx); + Threads.main()->mutex.unlock(); +} + + + namespace { - // id_loop() is the main iterative deepening loop. It calls search() repeatedly - // with increasing depth until the allocated thinking time has been consumed, - // user stops the search, or the maximum search depth is reached. + // id_loop() calls Thread::id_loop() - void id_loop(Position& pos) { + void id_loop() { - Stack stack[MAX_PLY+4], *ss = stack+2; // To allow referencing (ss-2) and (ss+2) - Depth depth; - Value bestValue, alpha, beta, delta; - - Move easyMove = EasyMove.get(pos.key()); - EasyMove.clear(); - - std::memset(ss-2, 0, 5 * sizeof(Stack)); - - depth = DEPTH_ZERO; BestMoveChanges = 0; - bestValue = delta = alpha = -VALUE_INFINITE; - beta = VALUE_INFINITE; TT.new_search(); - size_t multiPV = Options["MultiPV"]; - Skill skill(Options["Skill Level"]); + // Start search for the other threads. + for (Thread* th : Threads) + th->searching = true; - // When playing with strength handicap enable MultiPV search that we will - // use behind the scenes to retrieve a set of possible moves. - if (skill.enabled()) - multiPV = std::max(multiPV, (size_t)4); + Threads.main()->id_loop(); - multiPV = std::min(multiPV, RootMoves.size()); + // Force a quicker exit of fixed depth searches. + Signals.stop = true; - // Iterative deepening loop until requested to stop or target depth reached - while (++depth < DEPTH_MAX && !Signals.stop && (!Limits.depth || depth <= Limits.depth)) - { - // Age out PV variability metric - BestMoveChanges *= 0.5; - - // Save the last iteration's scores before first PV line is searched and - // all the move scores except the (new) PV are set to -VALUE_INFINITE. - for (RootMove& rm : RootMoves) - rm.previousScore = rm.score; - - // MultiPV loop. We perform a full root search for each PV line - for (PVIdx = 0; PVIdx < multiPV && !Signals.stop; ++PVIdx) - { - // Reset aspiration window starting size - if (depth >= 5 * ONE_PLY) - { - delta = Value(16); - alpha = std::max(RootMoves[PVIdx].previousScore - delta,-VALUE_INFINITE); - beta = std::min(RootMoves[PVIdx].previousScore + delta, VALUE_INFINITE); - } - - // Start with a small aspiration window and, in the case of a fail - // high/low, re-search with a bigger window until we're not failing - // high/low anymore. - while (true) - { - bestValue = search(pos, ss, alpha, beta, depth, false); - - // Bring the best move to the front. It is critical that sorting - // is done with a stable algorithm because all the values but the - // first and eventually the new best one are set to -VALUE_INFINITE - // and we want to keep the same order for all the moves except the - // new PV that goes to the front. Note that in case of MultiPV - // search the already searched PV lines are preserved. - std::stable_sort(RootMoves.begin() + PVIdx, RootMoves.end()); - - // Write PV back to transposition table in case the relevant - // entries have been overwritten during the search. - for (size_t i = 0; i <= PVIdx; ++i) - RootMoves[i].insert_pv_in_tt(pos); - - // If search has been stopped break immediately. Sorting and - // writing PV back to TT is safe because RootMoves is still - // valid, although it refers to previous iteration. - if (Signals.stop) - break; - - // When failing high/low give some update (without cluttering - // the UI) before a re-search. - if ( multiPV == 1 - && (bestValue <= alpha || bestValue >= beta) - && Time.elapsed() > 3000) - sync_cout << UCI::pv(pos, depth, alpha, beta) << sync_endl; - - // In case of failing low/high increase aspiration window and - // re-search, otherwise exit the loop. - if (bestValue <= alpha) - { - beta = (alpha + beta) / 2; - alpha = std::max(bestValue - delta, -VALUE_INFINITE); - - Signals.failedLowAtRoot = true; - Signals.stopOnPonderhit = false; - } - else if (bestValue >= beta) - { - alpha = (alpha + beta) / 2; - beta = std::min(bestValue + delta, VALUE_INFINITE); - } - else - break; - - delta += delta / 2; - - assert(alpha >= -VALUE_INFINITE && beta <= VALUE_INFINITE); - } - - // Sort the PV lines searched so far and update the GUI - std::stable_sort(RootMoves.begin(), RootMoves.begin() + PVIdx + 1); - - if (Signals.stop) - sync_cout << "info nodes " << RootPos.nodes_searched() - << " time " << Time.elapsed() << sync_endl; - - else if (PVIdx + 1 == multiPV || Time.elapsed() > 3000) - sync_cout << UCI::pv(pos, depth, alpha, beta) << sync_endl; - } - - // If skill level is enabled and time is up, pick a sub-optimal best move - if (skill.enabled() && skill.time_to_pick(depth)) - skill.pick_best(multiPV); - - // Have we found a "mate in x"? - if ( Limits.mate - && bestValue >= VALUE_MATE_IN_MAX_PLY - && VALUE_MATE - bestValue <= 2 * Limits.mate) - Signals.stop = true; - - // Do we have time for the next iteration? Can we stop searching now? - if (Limits.use_time_management()) - { - if (!Signals.stop && !Signals.stopOnPonderhit) - { - // Take some extra time if the best move has changed - if (depth > 4 * ONE_PLY && multiPV == 1) - Time.pv_instability(BestMoveChanges); - - // Stop the search if only one legal move is available or all - // of the available time has been used or we matched an easyMove - // from the previous search and just did a fast verification. - if ( RootMoves.size() == 1 - || Time.elapsed() > Time.available() - || ( RootMoves[0].pv[0] == easyMove - && BestMoveChanges < 0.03 - && Time.elapsed() > Time.available() / 10)) - { - // If we are allowed to ponder do not stop the search now but - // keep pondering until the GUI sends "ponderhit" or "stop". - if (Limits.ponder) - Signals.stopOnPonderhit = true; - else - Signals.stop = true; - } - } - - if (RootMoves[0].pv.size() >= 3) - EasyMove.update(pos, RootMoves[0].pv); - else - EasyMove.clear(); - } - } + // Wait until all threads have finished. + while (Threads.main()->slavesMask != 0) {} // Clear any candidate easy move that wasn't stable for the last search // iterations; the second condition prevents consecutive fast moves. if (EasyMove.stableCnt < 6 || Time.elapsed() < Time.available()) EasyMove.clear(); + size_t multiPV = Options["MultiPV"]; + Skill skill(Options["Skill Level"]); // If skill level is enabled, swap best PV line with the sub-optimal one if (skill.enabled()) - std::swap(RootMoves[0], *std::find(RootMoves.begin(), - RootMoves.end(), skill.best_move(multiPV))); + std::swap(Threads.main()->rootMoves[0], *std::find(Threads.main()->rootMoves.begin(), + Threads.main()->rootMoves.end(), skill.best_move(multiPV))); } @@ -512,7 +589,7 @@ namespace { // repeat all this work again. We also don't need to store anything to the hash // table here: This is taken care of after we return from the split point. - template + template Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, bool cutNode) { const bool RootNode = NT == Root; @@ -525,7 +602,6 @@ namespace { Move pv[MAX_PLY+1], quietsSearched[64]; StateInfo st; TTEntry* tte; - SplitPoint* splitPoint; Key posKey; Move ttMove, move, excludedMove, bestMove; Depth extension, newDepth, predictedDepth; @@ -537,22 +613,6 @@ namespace { // Step 1. Initialize node Thread* thisThread = pos.this_thread(); inCheck = pos.checkers(); - - if (SpNode) - { - splitPoint = ss->splitPoint; - bestMove = splitPoint->bestMove; - bestValue = splitPoint->bestValue; - tte = nullptr; - ttHit = false; - ttMove = excludedMove = MOVE_NONE; - ttValue = VALUE_NONE; - - assert(splitPoint->bestValue > -VALUE_INFINITE && splitPoint->moveCount > 0); - - goto moves_loop; - } - moveCount = quietCount = ss->moveCount = 0; bestValue = -VALUE_INFINITE; ss->ply = (ss-1)->ply + 1; @@ -591,7 +651,7 @@ namespace { excludedMove = ss->excludedMove; posKey = excludedMove ? pos.exclusion_key() : pos.key(); tte = TT.probe(posKey, ttHit); - ss->ttMove = ttMove = RootNode ? RootMoves[PVIdx].pv[0] : ttHit ? tte->move() : MOVE_NONE; + ss->ttMove = ttMove = RootNode ? thisThread->rootMoves[thisThread->PVIdx].pv[0] : ttHit ? tte->move() : MOVE_NONE; ttValue = ttHit ? value_from_tt(tte->value(), ss->ply) : VALUE_NONE; // At non-PV nodes we check for a fail high/low. We don't prune at PV nodes @@ -710,7 +770,7 @@ namespace { pos.do_null_move(st); (ss+1)->skipEarlyPruning = true; nullValue = depth-R < ONE_PLY ? -qsearch(pos, ss+1, -beta, -beta+1, DEPTH_ZERO) - : - search(pos, ss+1, -beta, -beta+1, depth-R, !cutNode); + : - search(pos, ss+1, -beta, -beta+1, depth-R, !cutNode); (ss+1)->skipEarlyPruning = false; pos.undo_null_move(); @@ -726,7 +786,7 @@ namespace { // Do verification search at high depths ss->skipEarlyPruning = true; Value v = depth-R < ONE_PLY ? qsearch(pos, ss, beta-1, beta, DEPTH_ZERO) - : search(pos, ss, beta-1, beta, depth-R, false); + : search(pos, ss, beta-1, beta, depth-R, false); ss->skipEarlyPruning = false; if (v >= beta) @@ -749,7 +809,7 @@ namespace { assert((ss-1)->currentMove != MOVE_NONE); assert((ss-1)->currentMove != MOVE_NULL); - MovePicker mp(pos, ttMove, History, CounterMovesHistory, PieceValue[MG][pos.captured_piece_type()]); + MovePicker mp(pos, ttMove, thisThread->History, CounterMovesHistory, PieceValue[MG][pos.captured_piece_type()]); CheckInfo ci(pos); while ((move = mp.next_move()) != MOVE_NONE) @@ -757,7 +817,7 @@ namespace { { ss->currentMove = move; pos.do_move(move, st, pos.gives_check(move, ci)); - value = -search(pos, ss+1, -rbeta, -rbeta+1, rdepth, !cutNode); + value = -search(pos, ss+1, -rbeta, -rbeta+1, rdepth, !cutNode); pos.undo_move(move); if (value >= rbeta) return value; @@ -771,19 +831,19 @@ namespace { { Depth d = depth - 2 * ONE_PLY - (PvNode ? DEPTH_ZERO : depth / 4); ss->skipEarlyPruning = true; - search(pos, ss, alpha, beta, d, true); + search(pos, ss, alpha, beta, d, true); ss->skipEarlyPruning = false; tte = TT.probe(posKey, ttHit); ttMove = ttHit ? tte->move() : MOVE_NONE; } -moves_loop: // When in check and at SpNode search starts from here +moves_loop: // When in check search starts from here Square prevMoveSq = to_sq((ss-1)->currentMove); - Move countermove = Countermoves[pos.piece_on(prevMoveSq)][prevMoveSq]; + Move countermove = thisThread->Countermoves[pos.piece_on(prevMoveSq)][prevMoveSq]; - MovePicker mp(pos, ttMove, depth, History, CounterMovesHistory, countermove, ss); + MovePicker mp(pos, ttMove, depth, thisThread->History, CounterMovesHistory, countermove, ss); CheckInfo ci(pos); value = bestValue; // Workaround a bogus 'uninitialized' warning under gcc improving = ss->staticEval >= (ss-2)->staticEval @@ -791,7 +851,6 @@ moves_loop: // When in check and at SpNode search starts from here ||(ss-2)->staticEval == VALUE_NONE; singularExtensionNode = !RootNode - && !SpNode && depth >= 8 * ONE_PLY && ttMove != MOVE_NONE /* && ttValue != VALUE_NONE Already implicit in the next condition */ @@ -802,7 +861,7 @@ moves_loop: // When in check and at SpNode search starts from here // Step 11. Loop through moves // Loop through all pseudo-legal moves until no moves remain or a beta cutoff occurs - while ((move = mp.next_move()) != MOVE_NONE) + while ((move = mp.next_move()) != MOVE_NONE) { assert(is_ok(move)); @@ -812,29 +871,19 @@ moves_loop: // When in check and at SpNode search starts from here // At root obey the "searchmoves" option and skip moves not listed in Root // Move List. As a consequence any illegal move is also skipped. In MultiPV // mode we also skip PV moves which have been already searched. - if (RootNode && !std::count(RootMoves.begin() + PVIdx, RootMoves.end(), move)) + if (RootNode && !std::count(thisThread->rootMoves.begin() + thisThread->PVIdx, thisThread->rootMoves.end(), move)) continue; - if (SpNode) - { - // Shared counter cannot be decremented later if the move turns out to be illegal - if (!pos.legal(move, ci.pinned)) - continue; + ss->moveCount = ++moveCount; - ss->moveCount = moveCount = ++splitPoint->moveCount; - splitPoint->spinlock.release(); - } - else - ss->moveCount = ++moveCount; - - if (RootNode) + if (RootNode && thisThread == Threads.main()) { Signals.firstRootMove = (moveCount == 1); - if (thisThread == Threads.main() && Time.elapsed() > 3000) + if (Time.elapsed() > 3000) sync_cout << "info depth " << depth / ONE_PLY << " currmove " << UCI::move(move, pos.is_chess960()) - << " currmovenumber " << moveCount + PVIdx << sync_endl; + << " currmovenumber " << moveCount + thisThread->PVIdx << sync_endl; } if (PvNode) @@ -864,7 +913,7 @@ moves_loop: // When in check and at SpNode search starts from here Value rBeta = ttValue - 2 * depth / ONE_PLY; ss->excludedMove = move; ss->skipEarlyPruning = true; - value = search(pos, ss, rBeta - 1, rBeta, depth / 2, cutNode); + value = search(pos, ss, rBeta - 1, rBeta, depth / 2, cutNode); ss->skipEarlyPruning = false; ss->excludedMove = MOVE_NONE; @@ -887,9 +936,6 @@ moves_loop: // When in check and at SpNode search starts from here if ( depth < 16 * ONE_PLY && moveCount >= FutilityMoveCounts[improving][depth]) { - if (SpNode) - splitPoint->spinlock.acquire(); - continue; } @@ -903,13 +949,6 @@ moves_loop: // When in check and at SpNode search starts from here if (futilityValue <= alpha) { bestValue = std::max(bestValue, futilityValue); - - if (SpNode) - { - splitPoint->spinlock.acquire(); - if (bestValue > splitPoint->bestValue) - splitPoint->bestValue = bestValue; - } continue; } } @@ -917,9 +956,6 @@ moves_loop: // When in check and at SpNode search starts from here // Prune moves with negative SEE at low depths if (predictedDepth < 4 * ONE_PLY && pos.see_sign(move) < VALUE_ZERO) { - if (SpNode) - splitPoint->spinlock.acquire(); - continue; } } @@ -928,7 +964,7 @@ moves_loop: // When in check and at SpNode search starts from here prefetch(TT.first_entry(pos.key_after(move))); // Check for legality just before making the move - if (!RootNode && !SpNode && !pos.legal(move, ci.pinned)) + if (!RootNode && !pos.legal(move, ci.pinned)) { ss->moveCount = --moveCount; continue; @@ -950,12 +986,12 @@ moves_loop: // When in check and at SpNode search starts from here ss->reduction = reduction(improving, depth, moveCount); if ( (!PvNode && cutNode) - || ( History[pos.piece_on(to_sq(move))][to_sq(move)] < VALUE_ZERO + || ( thisThread->History[pos.piece_on(to_sq(move))][to_sq(move)] < VALUE_ZERO && CounterMovesHistory[pos.piece_on(prevMoveSq)][prevMoveSq] [pos.piece_on(to_sq(move))][to_sq(move)] <= VALUE_ZERO)) ss->reduction += ONE_PLY; - if ( History[pos.piece_on(to_sq(move))][to_sq(move)] > VALUE_ZERO + if ( thisThread->History[pos.piece_on(to_sq(move))][to_sq(move)] > VALUE_ZERO && CounterMovesHistory[pos.piece_on(prevMoveSq)][prevMoveSq] [pos.piece_on(to_sq(move))][to_sq(move)] > VALUE_ZERO) ss->reduction = std::max(DEPTH_ZERO, ss->reduction - ONE_PLY); @@ -968,10 +1004,8 @@ moves_loop: // When in check and at SpNode search starts from here ss->reduction = std::max(DEPTH_ZERO, ss->reduction - ONE_PLY); Depth d = std::max(newDepth - ss->reduction, ONE_PLY); - if (SpNode) - alpha = splitPoint->alpha; - value = -search(pos, ss+1, -(alpha+1), -alpha, d, true); + value = -search(pos, ss+1, -(alpha+1), -alpha, d, true); doFullDepthSearch = (value > alpha && ss->reduction != DEPTH_ZERO); ss->reduction = DEPTH_ZERO; @@ -982,13 +1016,10 @@ moves_loop: // When in check and at SpNode search starts from here // Step 16. Full depth search, when LMR is skipped or fails high if (doFullDepthSearch) { - if (SpNode) - alpha = splitPoint->alpha; - value = newDepth < ONE_PLY ? givesCheck ? -qsearch(pos, ss+1, -(alpha+1), -alpha, DEPTH_ZERO) : -qsearch(pos, ss+1, -(alpha+1), -alpha, DEPTH_ZERO) - : - search(pos, ss+1, -(alpha+1), -alpha, newDepth, !cutNode); + : - search(pos, ss+1, -(alpha+1), -alpha, newDepth, !cutNode); } // For PV nodes only, do a full PV search on the first move or after a fail @@ -1002,7 +1033,7 @@ moves_loop: // When in check and at SpNode search starts from here value = newDepth < ONE_PLY ? givesCheck ? -qsearch(pos, ss+1, -beta, -alpha, DEPTH_ZERO) : -qsearch(pos, ss+1, -beta, -alpha, DEPTH_ZERO) - : - search(pos, ss+1, -beta, -alpha, newDepth, false); + : - search(pos, ss+1, -beta, -alpha, newDepth, false); } // Step 17. Undo move @@ -1011,22 +1042,15 @@ moves_loop: // When in check and at SpNode search starts from here assert(value > -VALUE_INFINITE && value < VALUE_INFINITE); // Step 18. Check for new best move - if (SpNode) - { - splitPoint->spinlock.acquire(); - bestValue = splitPoint->bestValue; - alpha = splitPoint->alpha; - } - // Finished searching the move. If a stop or a cutoff occurred, the return // value of the search cannot be trusted, and we return immediately without // updating best move, PV and TT. - if (Signals.stop || thisThread->cutoff_occurred()) + if (Signals.stop) return VALUE_ZERO; if (RootNode) { - RootMove& rm = *std::find(RootMoves.begin(), RootMoves.end(), move); + RootMove& rm = *std::find(thisThread->rootMoves.begin(), thisThread->rootMoves.end(), move); // PV move or new best move ? if (moveCount == 1 || value > alpha) @@ -1042,7 +1066,7 @@ moves_loop: // When in check and at SpNode search starts from here // We record how often the best move has been changed in each // iteration. This information is used for time management: When // the best move changes frequently, we allocate some more time. - if (moveCount > 1) + if (moveCount > 1 && thisThread == Threads.main()) ++BestMoveChanges; } else @@ -1054,7 +1078,7 @@ moves_loop: // When in check and at SpNode search starts from here if (value > bestValue) { - bestValue = SpNode ? splitPoint->bestValue = value : value; + bestValue = value; if (value > alpha) { @@ -1064,54 +1088,25 @@ moves_loop: // When in check and at SpNode search starts from here && (move != EasyMove.get(pos.key()) || moveCount > 1)) EasyMove.clear(); - bestMove = SpNode ? splitPoint->bestMove = move : move; + bestMove = move; if (PvNode && !RootNode) // Update pv even in fail-high case - update_pv(SpNode ? splitPoint->ss->pv : ss->pv, move, (ss+1)->pv); + update_pv(ss->pv, move, (ss+1)->pv); if (PvNode && value < beta) // Update alpha! Always alpha < beta - alpha = SpNode ? splitPoint->alpha = value : value; + alpha = value; else { assert(value >= beta); // Fail high - - if (SpNode) - splitPoint->cutoff = true; - break; } } } - if (!SpNode && !captureOrPromotion && move != bestMove && quietCount < 64) + if (!captureOrPromotion && move != bestMove && quietCount < 64) quietsSearched[quietCount++] = move; - - // Step 19. Check for splitting the search - if ( !SpNode - && Threads.size() >= 2 - && depth >= Threads.minimumSplitDepth - && ( !thisThread->activeSplitPoint - || !thisThread->activeSplitPoint->allSlavesSearching - || ( Threads.size() > MAX_SLAVES_PER_SPLITPOINT - && thisThread->activeSplitPoint->slavesMask.count() == MAX_SLAVES_PER_SPLITPOINT)) - && thisThread->splitPointsSize < MAX_SPLITPOINTS_PER_THREAD) - { - assert(bestValue > -VALUE_INFINITE && bestValue < beta); - - thisThread->split(pos, ss, alpha, beta, &bestValue, &bestMove, - depth, moveCount, &mp, NT, cutNode); - - if (Signals.stop || thisThread->cutoff_occurred()) - return VALUE_ZERO; - - if (bestValue >= beta) - break; - } } - if (SpNode) - return bestValue; - // Following condition would detect a stop or a cutoff set only after move // loop has been completed. But in this case bestValue is valid because we // have fully searched our subtree, and we can anyhow save the result in TT. @@ -1262,7 +1257,7 @@ moves_loop: // When in check and at SpNode search starts from here // to search the moves. Because the depth is <= 0 here, only captures, // queen promotions and checks (only if depth >= DEPTH_QS_CHECKS) will // be generated. - MovePicker mp(pos, ttMove, depth, History, CounterMovesHistory, to_sq((ss-1)->currentMove)); + MovePicker mp(pos, ttMove, depth, pos.this_thread()->History, CounterMovesHistory, to_sq((ss-1)->currentMove)); CheckInfo ci(pos); // Loop through the moves until no moves remain or a beta cutoff occurs @@ -1413,23 +1408,25 @@ moves_loop: // When in check and at SpNode search starts from here ss->killers[0] = move; } + Thread *th = pos.this_thread(); // Shorthand + Value bonus = Value((depth / ONE_PLY) * (depth / ONE_PLY)); Square prevSq = to_sq((ss-1)->currentMove); HistoryStats& cmh = CounterMovesHistory[pos.piece_on(prevSq)][prevSq]; - History.updateH(pos.moved_piece(move), to_sq(move), bonus); + th->History.updateH(pos.moved_piece(move), to_sq(move), bonus); if (is_ok((ss-1)->currentMove)) { - Countermoves.update(pos.piece_on(prevSq), prevSq, move); + th->Countermoves.update(pos.piece_on(prevSq), prevSq, move); cmh.updateCMH(pos.moved_piece(move), to_sq(move), bonus); } // Decrease all the other played quiet moves for (int i = 0; i < quietsCnt; ++i) { - History.updateH(pos.moved_piece(quiets[i]), to_sq(quiets[i]), -bonus); + th->History.updateH(pos.moved_piece(quiets[i]), to_sq(quiets[i]), -bonus); if (is_ok((ss-1)->currentMove)) cmh.updateCMH(pos.moved_piece(quiets[i]), to_sq(quiets[i]), -bonus); @@ -1451,10 +1448,11 @@ moves_loop: // When in check and at SpNode search starts from here Move Skill::pick_best(size_t multiPV) { // PRNG sequence should be non-deterministic, so we seed it with the time at init + MainThread* mth = Threads.main(); // Shorthand static PRNG rng(now()); // RootMoves are already sorted by score in descending order - int variance = std::min(RootMoves[0].score - RootMoves[multiPV - 1].score, PawnValueMg); + int variance = std::min(mth->rootMoves[0].score - mth->rootMoves[multiPV - 1].score, PawnValueMg); int weakness = 120 - 2 * level; int maxScore = -VALUE_INFINITE; @@ -1464,13 +1462,13 @@ moves_loop: // When in check and at SpNode search starts from here for (size_t i = 0; i < multiPV; ++i) { // This is our magic formula - int push = ( weakness * int(RootMoves[0].score - RootMoves[i].score) + int push = ( weakness * int(mth->rootMoves[0].score - mth->rootMoves[i].score) + variance * (rng.rand() % weakness)) / 128; - if (RootMoves[i].score + push > maxScore) + if (mth->rootMoves[i].score + push > maxScore) { - maxScore = RootMoves[i].score + push; - best = RootMoves[i].pv[0]; + maxScore = mth->rootMoves[i].score + push; + best = mth->rootMoves[i].pv[0]; } } return best; @@ -1484,24 +1482,22 @@ moves_loop: // When in check and at SpNode search starts from here string UCI::pv(const Position& pos, Depth depth, Value alpha, Value beta) { + Thread* thisThread = pos.this_thread(); std::stringstream ss; int elapsed = Time.elapsed() + 1; - size_t multiPV = std::min((size_t)Options["MultiPV"], RootMoves.size()); - int selDepth = 0; - - for (Thread* th : Threads) - if (th->maxPly > selDepth) - selDepth = th->maxPly; + size_t multiPV = std::min((size_t)Options["MultiPV"], thisThread->rootMoves.size()); + int selDepth = thisThread->maxPly; + uint64_t nodes = Threads.nodes_searched(); for (size_t i = 0; i < multiPV; ++i) { - bool updated = (i <= PVIdx); + bool updated = (i <= thisThread->PVIdx); if (depth == ONE_PLY && !updated) continue; Depth d = updated ? depth : depth - ONE_PLY; - Value v = updated ? RootMoves[i].score : RootMoves[i].previousScore; + Value v = updated ? thisThread->rootMoves[i].score : thisThread->rootMoves[i].previousScore; bool tb = TB::RootInTB && abs(v) < VALUE_MATE - MAX_PLY; v = tb ? TB::Score : v; @@ -1515,11 +1511,11 @@ string UCI::pv(const Position& pos, Depth depth, Value alpha, Value beta) { << " multipv " << i + 1 << " score " << UCI::value(v); - if (!tb && i == PVIdx) + if (!tb && i == thisThread->PVIdx) ss << (v >= beta ? " lowerbound" : v <= alpha ? " upperbound" : ""); - ss << " nodes " << pos.nodes_searched() - << " nps " << pos.nodes_searched() * 1000 / elapsed; + ss << " nodes " << nodes + << " nps " << nodes * 1000 / elapsed; if (elapsed > 1000) // Earlier makes little sense ss << " hashfull " << TT.hashfull(); @@ -1528,7 +1524,7 @@ string UCI::pv(const Position& pos, Depth depth, Value alpha, Value beta) { << " time " << elapsed << " pv"; - for (Move m : RootMoves[i].pv) + for (Move m : thisThread->rootMoves[i].pv) ss << " " << UCI::move(m, pos.is_chess960()); } @@ -1593,140 +1589,24 @@ bool RootMove::extract_ponder_from_tt(Position& pos) void Thread::idle_loop() { - // Pointer 'this_sp' is not null only if we are called from split(), and not - // at the thread creation. This means we are the split point's master. - SplitPoint* this_sp = activeSplitPoint; - - assert(!this_sp || (this_sp->master == this && searching)); - - while (!exit && !(this_sp && this_sp->slavesMask.none())) + while (!exit) { // If this thread has been assigned work, launch a search - while (searching) - { - spinlock.acquire(); + if (searching) + this->id_loop(); - assert(activeSplitPoint); - SplitPoint* sp = activeSplitPoint; - - spinlock.release(); - - Stack stack[MAX_PLY+4], *ss = stack+2; // To allow referencing (ss-2) and (ss+2) - Position pos(*sp->pos, this); - - std::memcpy(ss-2, sp->ss-2, 5 * sizeof(Stack)); - ss->splitPoint = sp; - - sp->spinlock.acquire(); - - assert(activePosition == nullptr); - - activePosition = &pos; - - if (sp->nodeType == NonPV) - search(pos, ss, sp->alpha, sp->beta, sp->depth, sp->cutNode); - - else if (sp->nodeType == PV) - search(pos, ss, sp->alpha, sp->beta, sp->depth, sp->cutNode); - - else if (sp->nodeType == Root) - search(pos, ss, sp->alpha, sp->beta, sp->depth, sp->cutNode); - - else - assert(false); - - assert(searching); - - spinlock.acquire(); - - searching = false; - activePosition = nullptr; - - spinlock.release(); - - sp->slavesMask.reset(idx); - sp->allSlavesSearching = false; - sp->nodes += pos.nodes_searched(); - - // After releasing the lock we can't access any SplitPoint related data - // in a safe way because it could have been released under our feet by - // the sp master. - sp->spinlock.release(); - - // Try to late join to another split point if none of its slaves has - // already finished. - SplitPoint* bestSp = NULL; - int minLevel = INT_MAX; - - for (Thread* th : Threads) - { - const size_t size = th->splitPointsSize; // Local copy - sp = size ? &th->splitPoints[size - 1] : nullptr; - - if ( sp - && sp->allSlavesSearching - && sp->slavesMask.count() < MAX_SLAVES_PER_SPLITPOINT - && can_join(sp)) - { - assert(this != th); - assert(!(this_sp && this_sp->slavesMask.none())); - assert(Threads.size() > 2); - - // Prefer to join to SP with few parents to reduce the probability - // that a cut-off occurs above us, and hence we waste our work. - int level = 0; - for (SplitPoint* p = th->activeSplitPoint; p; p = p->parentSplitPoint) - level++; - - if (level < minLevel) - { - bestSp = sp; - minLevel = level; - } - } - } - - if (bestSp) - { - sp = bestSp; - - // Recheck the conditions under lock protection - sp->spinlock.acquire(); - - if ( sp->allSlavesSearching - && sp->slavesMask.count() < MAX_SLAVES_PER_SPLITPOINT) - { - spinlock.acquire(); - - if (can_join(sp)) - { - sp->slavesMask.set(idx); - activeSplitPoint = sp; - searching = true; - } - - spinlock.release(); - } - - sp->spinlock.release(); - } - } - - // If search is finished then sleep, otherwise just yield + // If search is finished then sleep if (!Threads.main()->thinking) { - assert(!this_sp); - std::unique_lock lk(mutex); while (!exit && !Threads.main()->thinking) sleepCondition.wait(lk); } - else - std::this_thread::yield(); // Wait for a new job or for our slaves to finish } } + /// check_time() is called by the timer thread when the timer triggers. It is /// used to print debug info and, more importantly, to detect when we are out of /// available time and thus stop the search. @@ -1761,28 +1641,7 @@ void check_time() { else if (Limits.nodes) { - int64_t nodes = RootPos.nodes_searched(); - - // Loop across all split points and sum accumulated SplitPoint nodes plus - // all the currently active positions nodes. - // FIXME: Racy... - for (Thread* th : Threads) - for (size_t i = 0; i < th->splitPointsSize; ++i) - { - SplitPoint& sp = th->splitPoints[i]; - - sp.spinlock.acquire(); - - nodes += sp.nodes; - - for (size_t idx = 0; idx < Threads.size(); ++idx) - if (sp.slavesMask.test(idx) && Threads[idx]->activePosition) - nodes += Threads[idx]->activePosition->nodes_searched(); - - sp.spinlock.release(); - } - - if (nodes >= Limits.nodes) + if ((int64_t)Threads.nodes_searched() >= Limits.nodes) Signals.stop = true; } } diff --git a/src/search.h b/src/search.h index 5ba95fe0..eaf978ec 100644 --- a/src/search.h +++ b/src/search.h @@ -101,8 +101,6 @@ typedef std::unique_ptr> StateStackPtr; extern volatile SignalsType Signals; extern LimitsType Limits; -extern RootMoveVector RootMoves; -extern Position RootPos; extern StateStackPtr SetupStates; void init(); diff --git a/src/thread.cpp b/src/thread.cpp index cdb0d541..f23d7820 100644 --- a/src/thread.cpp +++ b/src/thread.cpp @@ -82,143 +82,10 @@ Thread::Thread() /* : splitPoints() */ { // Initialization of non POD broken in searching = false; maxPly = 0; - splitPointsSize = 0; - activeSplitPoint = nullptr; - activePosition = nullptr; idx = Threads.size(); // Starts from 0 } -// Thread::cutoff_occurred() checks whether a beta cutoff has occurred in the -// current active split point, or in some ancestor of the split point. - -bool Thread::cutoff_occurred() const { - - for (SplitPoint* sp = activeSplitPoint; sp; sp = sp->parentSplitPoint) - if (sp->cutoff) - return true; - - return false; -} - - -// Thread::can_join() checks whether the thread is available to join the split -// point 'sp'. An obvious requirement is that thread must be idle. With more than -// two threads, this is not sufficient: If the thread is the master of some split -// point, it is only available as a slave for the split points below his active -// one (the "helpful master" concept in YBWC terminology). - -bool Thread::can_join(const SplitPoint* sp) const { - - if (searching) - return false; - - // Make a local copy to be sure it doesn't become zero under our feet while - // testing next condition and so leading to an out of bounds access. - const size_t size = splitPointsSize; - - // No split points means that the thread is available as a slave for any - // other thread otherwise apply the "helpful master" concept if possible. - return !size || splitPoints[size - 1].slavesMask.test(sp->master->idx); -} - - -// Thread::split() does the actual work of distributing the work at a node between -// several available threads. If it does not succeed in splitting the node -// (because no idle threads are available), the function immediately returns. -// If splitting is possible, a SplitPoint object is initialized with all the -// data that must be copied to the helper threads and then helper threads are -// informed that they have been assigned work. This will cause them to instantly -// leave their idle loops and call search(). When all threads have returned from -// search() then split() returns. - -void Thread::split(Position& pos, Stack* ss, Value alpha, Value beta, Value* bestValue, - Move* bestMove, Depth depth, int moveCount, - MovePicker* movePicker, int nodeType, bool cutNode) { - - assert(searching); - assert(-VALUE_INFINITE < *bestValue && *bestValue <= alpha && alpha < beta && beta <= VALUE_INFINITE); - assert(depth >= Threads.minimumSplitDepth); - assert(splitPointsSize < MAX_SPLITPOINTS_PER_THREAD); - - // Pick and init the next available split point - SplitPoint& sp = splitPoints[splitPointsSize]; - - sp.spinlock.acquire(); // No contention here until we don't increment splitPointsSize - - sp.master = this; - sp.parentSplitPoint = activeSplitPoint; - sp.slavesMask = 0, sp.slavesMask.set(idx); - sp.depth = depth; - sp.bestValue = *bestValue; - sp.bestMove = *bestMove; - sp.alpha = alpha; - sp.beta = beta; - sp.nodeType = nodeType; - sp.cutNode = cutNode; - sp.movePicker = movePicker; - sp.moveCount = moveCount; - sp.pos = &pos; - sp.nodes = 0; - sp.cutoff = false; - sp.ss = ss; - sp.allSlavesSearching = true; // Must be set under lock protection - - ++splitPointsSize; - activeSplitPoint = &sp; - activePosition = nullptr; - - // Try to allocate available threads - Thread* slave; - - while ( sp.slavesMask.count() < MAX_SLAVES_PER_SPLITPOINT - && (slave = Threads.available_slave(&sp)) != nullptr) - { - slave->spinlock.acquire(); - - if (slave->can_join(activeSplitPoint)) - { - activeSplitPoint->slavesMask.set(slave->idx); - slave->activeSplitPoint = activeSplitPoint; - slave->searching = true; - } - - slave->spinlock.release(); - } - - // Everything is set up. The master thread enters the idle loop, from which - // it will instantly launch a search, because its 'searching' flag is set. - // The thread will return from the idle loop when all slaves have finished - // their work at this split point. - sp.spinlock.release(); - - Thread::idle_loop(); // Force a call to base class idle_loop() - - // In the helpful master concept, a master can help only a sub-tree of its - // split point and because everything is finished here, it's not possible - // for the master to be booked. - assert(!searching); - assert(!activePosition); - - // We have returned from the idle loop, which means that all threads are - // finished. Note that decreasing splitPointsSize must be done under lock - // protection to avoid a race with Thread::can_join(). - spinlock.acquire(); - - searching = true; - --splitPointsSize; - activeSplitPoint = sp.parentSplitPoint; - activePosition = &pos; - - spinlock.release(); - - // Split point data cannot be changed now, so no need to lock protect - pos.set_nodes_searched(pos.nodes_searched() + sp.nodes); - *bestMove = sp.bestMove; - *bestValue = sp.bestValue; -} - - // TimerThread::idle_loop() is where the timer thread waits Resolution milliseconds // and then calls check_time(). When not searching, thread sleeps until it's woken up. @@ -260,13 +127,7 @@ void MainThread::idle_loop() { if (!exit) { - searching = true; - Search::think(); - - assert(searching); - - searching = false; } } } @@ -317,7 +178,6 @@ void ThreadPool::exit() { void ThreadPool::read_uci_options() { - minimumSplitDepth = Options["Min Split Depth"] * ONE_PLY; size_t requested = Options["Threads"]; assert(requested > 0); @@ -333,16 +193,14 @@ void ThreadPool::read_uci_options() { } -// ThreadPool::available_slave() tries to find an idle thread which is available -// to join SplitPoint 'sp'. +// ThreadPool::nodes_searched() returns the number of nodes searched. -Thread* ThreadPool::available_slave(const SplitPoint* sp) const { +uint64_t ThreadPool::nodes_searched() { - for (Thread* th : *this) - if (th->can_join(sp)) - return th; - - return nullptr; + uint64_t nodes = 0; + for (Thread *th : *this) + nodes += th->pos.nodes_searched(); + return nodes; } @@ -351,13 +209,14 @@ Thread* ThreadPool::available_slave(const SplitPoint* sp) const { void ThreadPool::start_thinking(const Position& pos, const LimitsType& limits, StateStackPtr& states) { + main()->join(); Signals.stopOnPonderhit = Signals.firstRootMove = false; Signals.stop = Signals.failedLowAtRoot = false; - RootMoves.clear(); - RootPos = pos; + main()->rootMoves.clear(); + main()->pos = pos; Limits = limits; if (states.get()) // If we don't set a new position, preserve current state { @@ -368,7 +227,7 @@ void ThreadPool::start_thinking(const Position& pos, const LimitsType& limits, for (const auto& m : MoveList(pos)) if ( limits.searchmoves.empty() || std::count(limits.searchmoves.begin(), limits.searchmoves.end(), m)) - RootMoves.push_back(RootMove(m)); + main()->rootMoves.push_back(RootMove(m)); main()->thinking = true; main()->notify_one(); // Wake up main thread: 'thinking' must be already set diff --git a/src/thread.h b/src/thread.h index e880b01c..f88dc11f 100644 --- a/src/thread.h +++ b/src/thread.h @@ -37,53 +37,6 @@ struct Thread; const size_t MAX_THREADS = 128; -const size_t MAX_SPLITPOINTS_PER_THREAD = 8; -const size_t MAX_SLAVES_PER_SPLITPOINT = 4; - -class Spinlock { - - std::atomic_int lock; - -public: - Spinlock() { lock = 1; } // Init here to workaround a bug with MSVC 2013 - void acquire() { - while (lock.fetch_sub(1, std::memory_order_acquire) != 1) - while (lock.load(std::memory_order_relaxed) <= 0) - std::this_thread::yield(); // Be nice to hyperthreading - } - void release() { lock.store(1, std::memory_order_release); } -}; - - -/// SplitPoint struct stores information shared by the threads searching in -/// parallel below the same split point. It is populated at splitting time. - -struct SplitPoint { - - // Const data after split point has been setup - const Position* pos; - Search::Stack* ss; - Thread* master; - Depth depth; - Value beta; - int nodeType; - bool cutNode; - - // Const pointers to shared data - MovePicker* movePicker; - SplitPoint* parentSplitPoint; - - // Shared variable data - Spinlock spinlock; - std::bitset slavesMask; - volatile bool allSlavesSearching; - volatile uint64_t nodes; - volatile Value alpha; - volatile Value bestValue; - volatile Move bestMove; - volatile int moveCount; - volatile bool cutoff; -}; /// ThreadBase struct is the base of the hierarchy from where we derive all the @@ -97,7 +50,6 @@ struct ThreadBase : public std::thread { void wait_for(volatile const bool& b); Mutex mutex; - Spinlock spinlock; ConditionVariable sleepCondition; volatile bool exit = false; }; @@ -112,22 +64,22 @@ struct Thread : public ThreadBase { Thread(); virtual void idle_loop(); - bool cutoff_occurred() const; - bool can_join(const SplitPoint* sp) const; + virtual void id_loop(); - void split(Position& pos, Search::Stack* ss, Value alpha, Value beta, Value* bestValue, Move* bestMove, - Depth depth, int moveCount, MovePicker* movePicker, int nodeType, bool cutNode); - - SplitPoint splitPoints[MAX_SPLITPOINTS_PER_THREAD]; Pawns::Table pawnsTable; Material::Table materialTable; Endgames endgames; - Position* activePosition; - size_t idx; + size_t idx, PVIdx; int maxPly; - SplitPoint* volatile activeSplitPoint; - volatile size_t splitPointsSize; volatile bool searching; + + // Data per thread. + Position pos; + Search::RootMoveVector rootMoves; + Search::Stack stack[MAX_PLY+4]; + HistoryStats History; + MovesStats Countermoves; + Depth rootDepth; }; @@ -138,6 +90,7 @@ struct MainThread : public Thread { virtual void idle_loop(); void join(); volatile bool thinking = true; // Avoid a race with start_thinking() + std::bitset slavesMask; }; struct TimerThread : public ThreadBase { @@ -161,10 +114,8 @@ struct ThreadPool : public std::vector { MainThread* main() { return static_cast(at(0)); } void read_uci_options(); - Thread* available_slave(const SplitPoint* sp) const; void start_thinking(const Position&, const Search::LimitsType&, Search::StateStackPtr&); - - Depth minimumSplitDepth; + uint64_t nodes_searched(); TimerThread* timer; }; diff --git a/src/timeman.h b/src/timeman.h index c5390bef..24632d79 100644 --- a/src/timeman.h +++ b/src/timeman.h @@ -22,6 +22,7 @@ #include "misc.h" #include "search.h" +#include "thread.h" /// The TimeManagement class computes the optimal time to think depending on /// the maximum available time, the game move number and other parameters. @@ -32,7 +33,7 @@ public: void pv_instability(double bestMoveChanges) { unstablePvFactor = 1 + bestMoveChanges; } int available() const { return int(optimumTime * unstablePvFactor * 0.76); } int maximum() const { return maximumTime; } - int elapsed() const { return int(Search::Limits.npmsec ? Search::RootPos.nodes_searched() : now() - start); } + int elapsed() const { return int(Search::Limits.npmsec ? Threads.nodes_searched() : now() - start); } int64_t availableNodes; // When in 'nodes as time' mode