From 2d5e248f58595c81c1d075f5874e4c18ca8b1998 Mon Sep 17 00:00:00 2001 From: FauziAkram Date: Tue, 7 May 2024 15:03:58 +0300 Subject: [PATCH 001/315] Tweak reduction formula based on depth The idea came to me by checking for trends from the megafauzi tunes, since the values of the divisor for this specific formula were as follows: stc: 15990 mtc: 16117 ltc: 14805 vltc: 12719 new vltc passed by Muzhen: 12076 This shows a clear trend related to time control, the higher it is, the lower the optimum value for the divisor seems to be. So I tried a simple formula, using educated guesses based on some calculations, tests show it works pretty fine, and it can still be further tuned at VLTC in the future to scale even better. Passed STC: LLR: 2.94 (-2.94,2.94) <0.00,2.00> Total: 431360 W: 110791 L: 109898 D: 210671 Ptnml(0-2): 1182, 50846, 110698, 51805, 1149 https://tests.stockfishchess.org/tests/view/663770409819650825aa269f Passed LTC: LLR: 2.94 (-2.94,2.94) <0.50,2.50> Total: 114114 W: 29109 L: 28625 D: 56380 Ptnml(0-2): 105, 12628, 31101, 13124, 99 https://tests.stockfishchess.org/tests/view/66378c099819650825aa73f6 https://github.com/official-stockfish/Stockfish/pull/5223 bench: 2273551 --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index 6830e4b1..2c3fc56e 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1154,7 +1154,7 @@ moves_loop: // When in check, search starts here + (*contHist[3])[movedPiece][move.to_sq()] - 5078; // Decrease/increase reduction for moves with a good/bad history (~8 Elo) - r -= ss->statScore / 12076; + r -= ss->statScore / std::max(21000 - (depth * 305), 12000); // Step 17. Late moves reduction / extension (LMR, ~117 Elo) if (depth >= 2 && moveCount > 1 + rootNode) From 3bdfa0fb4a837f51f142cc1e862837c6f9167796 Mon Sep 17 00:00:00 2001 From: FauziAkram Date: Tue, 7 May 2024 15:03:58 +0300 Subject: [PATCH 002/315] Depth dependent statscore based reductions Test a modification of Fawzi's PR #5223, against that PR. parameters locally tuned with nevergrad4sf. passed STC: LLR: 2.93 (-2.94,2.94) <0.00,2.00> Total: 1047424 W: 271478 L: 269649 D: 506297 Ptnml(0-2): 3851, 124543, 265290, 125982, 4046 https://tests.stockfishchess.org/tests/view/663b0889ca93dad645f7c58c passed LTC: LLR: 2.96 (-2.94,2.94) <0.50,2.50> Total: 796236 W: 201712 L: 199825 D: 394699 Ptnml(0-2): 361, 88381, 218778, 90206, 392 https://tests.stockfishchess.org/tests/view/663be6adca93dad645f7f509 https://github.com/official-stockfish/Stockfish/pull/5228 Bench: 3346224 --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index 2c3fc56e..3eec00b0 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1154,7 +1154,7 @@ moves_loop: // When in check, search starts here + (*contHist[3])[movedPiece][move.to_sq()] - 5078; // Decrease/increase reduction for moves with a good/bad history (~8 Elo) - r -= ss->statScore / std::max(21000 - (depth * 305), 12000); + r -= ss->statScore / (17662 - std::min(depth, 16) * 105); // Step 17. Late moves reduction / extension (LMR, ~117 Elo) if (depth >= 2 && moveCount > 1 + rootNode) From d1b8d8bab377eb873385bb4f8662062398f16686 Mon Sep 17 00:00:00 2001 From: Michael Chaly Date: Wed, 8 May 2024 21:59:03 +0300 Subject: [PATCH 003/315] Refactor quiet moves pruning in qsearch Make it formula more in line with what we use in search - current formula is more or less the one we used years ago for search but since then it was remade, this patch remakes qsearch formula to almost exactly the same as we use in search - with sum of conthist 0, 1 and pawn structure history. Passed STC: https://tests.stockfishchess.org/tests/view/6639c8421343f0cb16716206 LLR: 2.93 (-2.94,2.94) <0.00,2.00> Total: 84992 W: 22414 L: 22019 D: 40559 Ptnml(0-2): 358, 9992, 21440, 10309, 397 Passed LTC: LLR: 2.95 (-2.94,2.94) <0.50,2.50> Total: 119136 W: 30407 L: 29916 D: 58813 Ptnml(0-2): 46, 13192, 32622, 13641, 67 closes https://github.com/official-stockfish/Stockfish/pull/5224 Bench: 2138659 --- src/search.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 3eec00b0..633f9b51 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1570,8 +1570,9 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta, break; // Continuation history based pruning (~3 Elo) - if (!capture && (*contHist[0])[pos.moved_piece(move)][move.to_sq()] < 0 - && (*contHist[1])[pos.moved_piece(move)][move.to_sq()] < 0) + if (!capture && (*contHist[0])[pos.moved_piece(move)][move.to_sq()] + + (*contHist[1])[pos.moved_piece(move)][move.to_sq()] + + thisThread->pawnHistory[pawn_structure_index(pos)][pos.moved_piece(move)][move.to_sq()] <= 4000) continue; // Do not search moves with bad enough SEE values (~5 Elo) From db147fe2586527a854516016699949af53dc5b17 Mon Sep 17 00:00:00 2001 From: rn5f107s2 Date: Wed, 8 May 2024 22:08:56 +0200 Subject: [PATCH 004/315] IIR on cutnodes if there is a ttMove but the ttBound is upper If there is an upper bound stored in the transposition table, but we still have a ttMove, the upperbound indicates that the last time the ttMove was tried, it failed low. This fail low indicates that the ttMove may not be good, so this patch introduces a depth reduction of one for cutnodes with such ttMoves. Passed STC: https://tests.stockfishchess.org/tests/view/663be4d1ca93dad645f7f45f LLR: 2.93 (-2.94,2.94) <0.00,2.00> Total: 139424 W: 35900 L: 35433 D: 68091 Ptnml(0-2): 425, 16357, 35743, 16700, 487 Passed LTC: https://tests.stockfishchess.org/tests/view/663bec95ca93dad645f7f5c8 LLR: 2.94 (-2.94,2.94) <0.50,2.50> Total: 129690 W: 32902 L: 32390 D: 64398 Ptnml(0-2): 63, 14304, 35610, 14794, 74 closes https://github.com/official-stockfish/Stockfish/pull/5227 bench 2257437 --- src/search.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 633f9b51..767ea238 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -828,8 +828,8 @@ Value Search::Worker::search( return qsearch(pos, ss, alpha, beta); // For cutNodes without a ttMove, we decrease depth by 2 if depth is high enough. - if (cutNode && depth >= 8 && !ttMove) - depth -= 2; + if (cutNode && depth >= 8 && (!ttMove || tte->bound() == BOUND_UPPER)) + depth -= 1 + !ttMove; // Step 11. ProbCut (~10 Elo) // If we have a good enough capture (or queen promotion) and a reduced search returns a value From 2dbb44e28d2e5b3c72ddbbd6f436d41f75031a22 Mon Sep 17 00:00:00 2001 From: MinetaS Date: Wed, 8 May 2024 03:26:09 +0900 Subject: [PATCH 005/315] Fix nodestime 1. The current time management system utilizes limits.inc and limits.time, which can represent either milliseconds or node count, depending on whether the nodestime option is active. There have been several modifications which brought Elo gain for typical uses (i.e. real-time matches), however some of these changes overlooked such distinction. This patch adjusts constants and multiplication/division to more accurately simulate real TC conditions when nodestime is used. 2. The advance_nodes_time function has a bug that can extend the time limit when availableNodes reaches exact zero. This patch fixes the bug by initializing the variable to -1 and make sure it does not go below zero. 3. elapsed_time function is newly introduced to print PV in the UCI output based on real time. This makes PV output more consistent with the behavior of trivial use cases. closes https://github.com/official-stockfish/Stockfish/pull/5186 No functional changes --- src/search.cpp | 22 ++++++++++++++++------ src/search.h | 1 + src/timeman.cpp | 46 +++++++++++++++++++++++++++------------------- src/timeman.h | 5 +++-- 4 files changed, 47 insertions(+), 27 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 767ea238..684b760e 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -190,8 +190,8 @@ void Search::Worker::start_searching() { // When playing in 'nodes as time' mode, subtract the searched nodes from // the available ones before exiting. if (limits.npmsec) - main_manager()->tm.advance_nodes_time(limits.inc[rootPos.side_to_move()] - - threads.nodes_searched()); + main_manager()->tm.advance_nodes_time(threads.nodes_searched() + - limits.inc[rootPos.side_to_move()]); Worker* bestThread = this; Skill skill = @@ -347,7 +347,7 @@ void Search::Worker::iterative_deepening() { // When failing high/low give some update (without cluttering // the UI) before a re-search. if (mainThread && multiPV == 1 && (bestValue <= alpha || bestValue >= beta) - && elapsed() > 3000) + && elapsed_time() > 3000) main_manager()->pv(*this, threads, tt, rootDepth); // In case of failing low/high increase aspiration window and @@ -378,7 +378,7 @@ void Search::Worker::iterative_deepening() { std::stable_sort(rootMoves.begin() + pvFirst, rootMoves.begin() + pvIdx + 1); if (mainThread - && (threads.stop || pvIdx + 1 == multiPV || elapsed() > 3000) + && (threads.stop || pvIdx + 1 == multiPV || elapsed_time() > 3000) // A thread that aborted search can have mated-in/TB-loss PV and score // that cannot be trusted, i.e. it can be delayed or refuted if we would have // had time to fully search other root-moves. Thus we suppress this output and @@ -935,7 +935,7 @@ moves_loop: // When in check, search starts here ss->moveCount = ++moveCount; - if (rootNode && is_mainthread() && elapsed() > 3000) + if (rootNode && is_mainthread() && elapsed_time() > 3000) { main_manager()->updates.onIter( {depth, UCIEngine::move(move, pos.is_chess960()), moveCount + thisThread->pvIdx}); @@ -1647,10 +1647,20 @@ Depth Search::Worker::reduction(bool i, Depth d, int mn, int delta) { return (reductionScale + 1318 - delta * 760 / rootDelta) / 1024 + (!i && reductionScale > 1066); } +// elapsed() returns the time elapsed since the search started. If the +// 'nodestime' option is enabled, it will return the count of nodes searched +// instead. This function is called to check whether the search should be +// stopped based on predefined thresholds like time limits or nodes searched. +// +// elapsed_time() returns the actual time elapsed since the start of the search. +// This function is intended for use only when printing PV outputs, and not used +// for making decisions within the search algorithm itself. TimePoint Search::Worker::elapsed() const { return main_manager()->tm.elapsed([this]() { return threads.nodes_searched(); }); } +TimePoint Search::Worker::elapsed_time() const { return main_manager()->tm.elapsed_time(); } + namespace { // Adjusts a mate or TB score from "plies to mate from the root" @@ -1900,7 +1910,7 @@ void SearchManager::pv(const Search::Worker& worker, const auto& rootMoves = worker.rootMoves; const auto& pos = worker.rootPos; size_t pvIdx = worker.pvIdx; - TimePoint time = tm.elapsed([nodes]() { return nodes; }) + 1; + TimePoint time = tm.elapsed_time() + 1; size_t multiPV = std::min(size_t(worker.options["MultiPV"]), rootMoves.size()); uint64_t tbHits = threads.tb_hits() + (worker.tbConfig.rootInTB ? rootMoves.size() : 0); diff --git a/src/search.h b/src/search.h index cb73a5af..c824daf9 100644 --- a/src/search.h +++ b/src/search.h @@ -276,6 +276,7 @@ class Worker { } TimePoint elapsed() const; + TimePoint elapsed_time() const; LimitsType limits; diff --git a/src/timeman.cpp b/src/timeman.cpp index c651745f..4feb329b 100644 --- a/src/timeman.cpp +++ b/src/timeman.cpp @@ -32,12 +32,12 @@ TimePoint TimeManagement::optimum() const { return optimumTime; } TimePoint TimeManagement::maximum() const { return maximumTime; } void TimeManagement::clear() { - availableNodes = 0; // When in 'nodes as time' mode + availableNodes = -1; // When in 'nodes as time' mode } void TimeManagement::advance_nodes_time(std::int64_t nodes) { assert(useNodesTime); - availableNodes += nodes; + availableNodes = std::max(int64_t(0), availableNodes - nodes); } // Called at the beginning of the search and calculates @@ -48,14 +48,17 @@ void TimeManagement::init(Search::LimitsType& limits, Color us, int ply, const OptionsMap& options) { - // If we have no time, no need to initialize TM, except for the start time, - // which is used by movetime. - startTime = limits.startTime; + TimePoint npmsec = TimePoint(options["nodestime"]); + + // If we have no time, we don't need to fully initialize TM. + // startTime is used by movetime and useNodesTime is used in elapsed calls. + startTime = limits.startTime; + useNodesTime = npmsec != 0; + if (limits.time[us] == 0) return; TimePoint moveOverhead = TimePoint(options["Move Overhead"]); - TimePoint npmsec = TimePoint(options["nodestime"]); // optScale is a percentage of available time to use for the current move. // maxScale is a multiplier applied to optimumTime. @@ -65,26 +68,31 @@ void TimeManagement::init(Search::LimitsType& limits, // to nodes, and use resulting values in time management formulas. // WARNING: to avoid time losses, the given npmsec (nodes per millisecond) // must be much lower than the real engine speed. - if (npmsec) + if (useNodesTime) { - useNodesTime = true; - - if (!availableNodes) // Only once at game start + if (availableNodes == -1) // Only once at game start availableNodes = npmsec * limits.time[us]; // Time is in msec // Convert from milliseconds to nodes limits.time[us] = TimePoint(availableNodes); limits.inc[us] *= npmsec; limits.npmsec = npmsec; + moveOverhead *= npmsec; } + // These numbers are used where multiplications, divisions or comparisons + // with constants are involved. + const int64_t scaleFactor = useNodesTime ? npmsec : 1; + const TimePoint scaledTime = limits.time[us] / scaleFactor; + const TimePoint scaledInc = limits.inc[us] / scaleFactor; + // Maximum move horizon of 50 moves int mtg = limits.movestogo ? std::min(limits.movestogo, 50) : 50; - // if less than one second, gradually reduce mtg - if (limits.time[us] < 1000 && (double(mtg) / limits.time[us] > 0.05)) + // If less than one second, gradually reduce mtg + if (scaledTime < 1000 && double(mtg) / scaledInc > 0.05) { - mtg = limits.time[us] * 0.05; + mtg = scaledTime * 0.05; } // Make sure timeLeft is > 0 since we may use it as a divisor @@ -97,15 +105,15 @@ void TimeManagement::init(Search::LimitsType& limits, if (limits.movestogo == 0) { // Use extra time with larger increments - double optExtra = limits.inc[us] < 500 ? 1.0 : 1.13; + double optExtra = scaledInc < 500 ? 1.0 : 1.13; // Calculate time constants based on current time left. - double optConstant = - std::min(0.00308 + 0.000319 * std::log10(limits.time[us] / 1000.0), 0.00506); - double maxConstant = std::max(3.39 + 3.01 * std::log10(limits.time[us] / 1000.0), 2.93); + double logTimeInSec = std::log10(scaledTime / 1000.0); + double optConstant = std::min(0.00308 + 0.000319 * logTimeInSec, 0.00506); + double maxConstant = std::max(3.39 + 3.01 * logTimeInSec, 2.93); optScale = std::min(0.0122 + std::pow(ply + 2.95, 0.462) * optConstant, - 0.213 * limits.time[us] / double(timeLeft)) + 0.213 * limits.time[us] / timeLeft) * optExtra; maxScale = std::min(6.64, maxConstant + ply / 12.0); } @@ -113,7 +121,7 @@ void TimeManagement::init(Search::LimitsType& limits, // x moves in y seconds (+ z increment) else { - optScale = std::min((0.88 + ply / 116.4) / mtg, 0.88 * limits.time[us] / double(timeLeft)); + optScale = std::min((0.88 + ply / 116.4) / mtg, 0.88 * limits.time[us] / timeLeft); maxScale = std::min(6.3, 1.5 + 0.11 * mtg); } diff --git a/src/timeman.h b/src/timeman.h index 35c3cfc0..1b6bd849 100644 --- a/src/timeman.h +++ b/src/timeman.h @@ -42,8 +42,9 @@ class TimeManagement { TimePoint maximum() const; template TimePoint elapsed(FUNC nodes) const { - return useNodesTime ? TimePoint(nodes()) : now() - startTime; + return useNodesTime ? TimePoint(nodes()) : elapsed_time(); } + TimePoint elapsed_time() const { return now() - startTime; }; void clear(); void advance_nodes_time(std::int64_t nodes); @@ -53,7 +54,7 @@ class TimeManagement { TimePoint optimumTime; TimePoint maximumTime; - std::int64_t availableNodes = 0; // When in 'nodes as time' mode + std::int64_t availableNodes = -1; // When in 'nodes as time' mode bool useNodesTime = false; // True if we are in 'nodes as time' mode }; From 9d6dab06a8274c4e09b437110f86bdb1ea7edb0f Mon Sep 17 00:00:00 2001 From: FauziAkram Date: Sun, 5 May 2024 03:10:26 +0300 Subject: [PATCH 006/315] simplify moveCountPruning no (significant) speedup upon renewed testing Passed stc: LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 88992 W: 22779 L: 22633 D: 43580 Ptnml(0-2): 137, 8706, 26681, 8818, 154 https://tests.stockfishchess.org/tests/view/6636c4844b68b70d85800dae closes https://github.com/official-stockfish/Stockfish/pull/5213 No functional change. --- src/search.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 684b760e..1d0cb4ab 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -960,8 +960,7 @@ moves_loop: // When in check, search starts here if (!rootNode && pos.non_pawn_material(us) && bestValue > VALUE_TB_LOSS_IN_MAX_PLY) { // Skip quiet moves if movecount exceeds our FutilityMoveCount threshold (~8 Elo) - if (!moveCountPruning) - moveCountPruning = moveCount >= futility_move_count(improving, depth); + moveCountPruning = moveCount >= futility_move_count(improving, depth); // Reduced depth of the next LMR search int lmrDepth = newDepth - r; From 3b4ddf4ae6362ddef063cc644d1466754015482e Mon Sep 17 00:00:00 2001 From: Michael Chaly Date: Mon, 6 May 2024 20:18:12 +0300 Subject: [PATCH 007/315] Simplify away conthist 3 from statscore Following previous elo gainer that gained by making conthist 3 less important in pruning this patch simplifies away this history from calculation of statscore. Passed STC: https://tests.stockfishchess.org/tests/view/6637aa7e9819650825aa93e0 LLR: 3.00 (-2.94,2.94) <-1.75,0.25> Total: 35392 W: 9352 L: 9120 D: 16920 Ptnml(0-2): 141, 4145, 8888, 4385, 137 Passed LTC: https://tests.stockfishchess.org/tests/view/66383cd8493aaaf4b7ea90c5 LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 33948 W: 8714 L: 8503 D: 16731 Ptnml(0-2): 39, 3701, 9270, 3938, 26 closes https://github.com/official-stockfish/Stockfish/pull/5220 Bench: 2508571 --- src/search.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 1d0cb4ab..d9f997e8 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1149,8 +1149,7 @@ moves_loop: // When in check, search starts here ss->statScore = 2 * thisThread->mainHistory[us][move.from_to()] + (*contHist[0])[movedPiece][move.to_sq()] - + (*contHist[1])[movedPiece][move.to_sq()] - + (*contHist[3])[movedPiece][move.to_sq()] - 5078; + + (*contHist[1])[movedPiece][move.to_sq()] - 5078; // Decrease/increase reduction for moves with a good/bad history (~8 Elo) r -= ss->statScore / (17662 - std::min(depth, 16) * 105); @@ -1569,9 +1568,12 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta, break; // Continuation history based pruning (~3 Elo) - if (!capture && (*contHist[0])[pos.moved_piece(move)][move.to_sq()] - + (*contHist[1])[pos.moved_piece(move)][move.to_sq()] - + thisThread->pawnHistory[pawn_structure_index(pos)][pos.moved_piece(move)][move.to_sq()] <= 4000) + if (!capture + && (*contHist[0])[pos.moved_piece(move)][move.to_sq()] + + (*contHist[1])[pos.moved_piece(move)][move.to_sq()] + + thisThread->pawnHistory[pawn_structure_index(pos)][pos.moved_piece(move)] + [move.to_sq()] + <= 4000) continue; // Do not search moves with bad enough SEE values (~5 Elo) From 23439e4096bc28deb2e4e935f24c5ddb22999dc5 Mon Sep 17 00:00:00 2001 From: Michael Chaly Date: Tue, 7 May 2024 09:27:04 +0300 Subject: [PATCH 008/315] Remove conthist 3 from moves loop pruning Followup to previous gainer that made it twice less impactful there - this patch removes it entirely as a simplification. Passed STC: https://tests.stockfishchess.org/tests/view/6637aa7e9819650825aa93e0 LLR: 2.93 (-2.94,2.94) <-1.75,0.25> Total: 26208 W: 6930 L: 6694 D: 12584 Ptnml(0-2): 113, 2997, 6652, 3225, 117 Passed LTC: https://tests.stockfishchess.org/tests/view/66383cba493aaaf4b7ea90c2 LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 67866 W: 17294 L: 17118 D: 33454 Ptnml(0-2): 46, 7627, 18415, 7795, 50 closes https://github.com/official-stockfish/Stockfish/pull/5221 Bench: 2691699 --- src/search.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index d9f997e8..448da7e2 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -990,7 +990,6 @@ moves_loop: // When in check, search starts here int history = (*contHist[0])[movedPiece][move.to_sq()] + (*contHist[1])[movedPiece][move.to_sq()] - + (*contHist[3])[movedPiece][move.to_sq()] / 2 + thisThread->pawnHistory[pawn_structure_index(pos)][movedPiece][move.to_sq()]; // Continuation history based pruning (~2 Elo) From 574ad14b323465314c8d5d5a81af995cb58b07c9 Mon Sep 17 00:00:00 2001 From: FauziAkram Date: Thu, 9 May 2024 02:56:43 +0300 Subject: [PATCH 009/315] Simplify depth formula based on score improvement Simplify depth formula based on score improvement. This idea was first tried by cj5716 Passed STC: LLR: 2.93 (-2.94,2.94) <-1.75,0.25> Total: 347104 W: 89683 L: 89804 D: 167617 Ptnml(0-2): 1357, 38824, 93307, 38711, 1353 https://tests.stockfishchess.org/tests/view/66378edf9819650825aa75d0 Passed LTC: LLR: 2.93 (-2.94,2.94) <-1.75,0.25> Total: 63000 W: 15851 L: 15694 D: 31455 Ptnml(0-2): 22, 5396, 20499, 5569, 14 https://tests.stockfishchess.org/tests/view/663c04e5c0b75d7f7b97d461 closes https://github.com/official-stockfish/Stockfish/pull/5225 Bench: 2691699 Co-Authored-By: cj5716 <125858804+cj5716@users.noreply.github.com> --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index 448da7e2..fdf9871c 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1288,7 +1288,7 @@ moves_loop: // When in check, search starts here else { // Reduce other moves if we have found at least one score improvement (~2 Elo) - if (depth > 2 && depth < 13 && beta < 15868 && value > -14630) + if (depth > 2 && depth < 13 && abs(value) < VALUE_TB_WIN_IN_MAX_PLY) depth -= 2; assert(depth > 0); From c43425b0b1167665b2f9520690e639c80977c067 Mon Sep 17 00:00:00 2001 From: Shawn Xu Date: Wed, 8 May 2024 14:26:01 -0700 Subject: [PATCH 010/315] Simplify Away Negative Extension This patch simplifies away the negative extension applied when the value returned by the transposition table is assumed to fail low over the value of reduced search. Passed STC: LLR: 2.99 (-2.94,2.94) <-1.75,0.25> Total: 248736 W: 64293 L: 64302 D: 120141 Ptnml(0-2): 925, 29833, 62831, 29884, 895 https://tests.stockfishchess.org/tests/view/663bee3bca93dad645f7f64a Passed LTC: LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 254970 W: 64289 L: 64308 D: 126373 Ptnml(0-2): 110, 28428, 70422, 28421, 104 https://tests.stockfishchess.org/tests/view/663c11f0c0b75d7f7b97d4bb closes https://github.com/official-stockfish/Stockfish/pull/5226 Bench: 2353057 --- src/search.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index fdf9871c..4572ffc9 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1087,10 +1087,6 @@ moves_loop: // When in check, search starts here // If we are on a cutNode but the ttMove is not assumed to fail high over current beta (~1 Elo) else if (cutNode) extension = -2; - - // If the ttMove is assumed to fail low over the value of the reduced search (~1 Elo) - else if (ttValue <= value) - extension = -1; } // Extension for capturing the previous moved piece (~0 Elo on STC, ~1 Elo on LTC) From b8812138e8e4e6ebd9d1c46ca9da15ddab1eb1ae Mon Sep 17 00:00:00 2001 From: xu-shawn <50402888+xu-shawn@users.noreply.github.com> Date: Thu, 9 May 2024 00:11:09 -0700 Subject: [PATCH 011/315] Fix usage of abs vs std::abs closes https://github.com/official-stockfish/Stockfish/pull/5229 no functional change --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index 4572ffc9..6c30c3e9 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1284,7 +1284,7 @@ moves_loop: // When in check, search starts here else { // Reduce other moves if we have found at least one score improvement (~2 Elo) - if (depth > 2 && depth < 13 && abs(value) < VALUE_TB_WIN_IN_MAX_PLY) + if (depth > 2 && depth < 13 && std::abs(value) < VALUE_TB_WIN_IN_MAX_PLY) depth -= 2; assert(depth > 0); From 540545d12792dc554e3a4cd1b09633c31a16d31b Mon Sep 17 00:00:00 2001 From: Shawn Xu Date: Thu, 9 May 2024 00:38:43 -0700 Subject: [PATCH 012/315] simplify away quietCheckEvasions pruning simplifies away the pruning of quiet evasion moves in quiescent search. Passed STC: LLR: 2.98 (-2.94,2.94) <-1.75,0.25> Total: 343520 W: 88356 L: 88470 D: 166694 Ptnml(0-2): 1061, 40073, 89706, 39759, 1161 https://tests.stockfishchess.org/tests/view/663c7ddfc0b75d7f7b980f3b Passed LTC: LLR: 2.95 (-2.94,2.94) <-1.75,0.25> Total: 168744 W: 42454 L: 42384 D: 83906 Ptnml(0-2): 75, 18678, 46782, 18776, 61 https://tests.stockfishchess.org/tests/view/663ce34fc0b75d7f7b981ed9 closes https://github.com/official-stockfish/Stockfish/pull/5231 bench 3681552 --- src/search.cpp | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 6c30c3e9..3867a397 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1502,8 +1502,6 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta, MovePicker mp(pos, ttMove, depth, &thisThread->mainHistory, &thisThread->captureHistory, contHist, &thisThread->pawnHistory); - int quietCheckEvasions = 0; - // Step 5. Loop through all pseudo-legal moves until no moves remain // or a beta cutoff occurs. while ((move = mp.next_move()) != Move::none()) @@ -1556,12 +1554,6 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta, } } - // We prune after the second quiet check evasion move, where being 'in check' is - // implicitly checked through the counter, and being a 'quiet move' apart from - // being a tt move is assumed after an increment because captures are pushed ahead. - if (quietCheckEvasions > 1) - break; - // Continuation history based pruning (~3 Elo) if (!capture && (*contHist[0])[pos.moved_piece(move)][move.to_sq()] @@ -1585,8 +1577,6 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta, &thisThread ->continuationHistory[ss->inCheck][capture][pos.moved_piece(move)][move.to_sq()]; - quietCheckEvasions += !capture && ss->inCheck; - // Step 7. Make and search the move thisThread->nodes.fetch_add(1, std::memory_order_relaxed); pos.do_move(move, st, givesCheck); From 813c5aa5329011e218dad8dc53d61504cecadc3f Mon Sep 17 00:00:00 2001 From: Muzhen Gaming <61100393+XInTheDark@users.noreply.github.com> Date: Sun, 12 May 2024 17:49:30 +0800 Subject: [PATCH 013/315] VVLTC search tune Tuned at 111k games of VVLTC. Passed VVLTC 1st sprt: https://tests.stockfishchess.org/tests/view/664090c6d163897c63214324 LLR: 2.94 (-2.94,2.94) <0.00,2.00> Total: 83046 W: 21071 L: 20747 D: 41228 Ptnml(0-2): 2, 7574, 26048, 7896, 3 Passed VVLTC 2nd sprt: https://tests.stockfishchess.org/tests/view/6640cb2abaa6260a5688dc17 LLR: 2.95 (-2.94,2.94) <0.50,2.50> Total: 68630 W: 17620 L: 17270 D: 33740 Ptnml(0-2): 4, 6242, 21471, 6596, 2 closes https://github.com/official-stockfish/Stockfish/pull/5240 Bench: 1752471 --- src/search.cpp | 84 +++++++++++++++++++++++++------------------------- 1 file changed, 42 insertions(+), 42 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 3867a397..1d9e0d81 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -59,9 +59,9 @@ static constexpr double EvalLevel[10] = {0.981, 0.956, 0.895, 0.949, 0.913, // Futility margin Value futility_margin(Depth d, bool noTtCutNode, bool improving, bool oppWorsening) { - Value futilityMult = 126 - 46 * noTtCutNode; - Value improvingDeduction = 58 * improving * futilityMult / 32; - Value worseningDeduction = (323 + 52 * improving) * oppWorsening * futilityMult / 1024; + Value futilityMult = 131 - 48 * noTtCutNode; + Value improvingDeduction = 57 * improving * futilityMult / 32; + Value worseningDeduction = (309 + 52 * improving) * oppWorsening * futilityMult / 1024; return futilityMult * d - improvingDeduction - worseningDeduction; } @@ -73,15 +73,15 @@ constexpr int futility_move_count(bool improving, Depth depth) { // Add correctionHistory value to raw staticEval and guarantee evaluation does not hit the tablebase range Value to_corrected_static_eval(Value v, const Worker& w, const Position& pos) { auto cv = w.correctionHistory[pos.side_to_move()][pawn_structure_index(pos)]; - v += cv * std::abs(cv) / 7350; + v += cv * std::abs(cv) / 7179; return std::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1); } // History and stats update bonus, based on depth -int stat_bonus(Depth d) { return std::clamp(208 * d - 297, 16, 1406); } +int stat_bonus(Depth d) { return std::clamp(200 * d - 280, 16, 1495); } // History and stats update malus, based on depth -int stat_malus(Depth d) { return (d < 4 ? 520 * d - 312 : 1479); } +int stat_malus(Depth d) { return (d < 4 ? 586 * d - 284 : 1639); } // Add a small random component to draw evaluations to avoid 3-fold blindness Value value_draw(size_t nodes) { return VALUE_DRAW - 1 + Value(nodes & 0x2); } @@ -310,12 +310,12 @@ void Search::Worker::iterative_deepening() { // Reset aspiration window starting size Value avg = rootMoves[pvIdx].averageScore; - delta = 10 + avg * avg / 9530; + delta = 10 + avg * avg / 9474; alpha = std::max(avg - delta, -VALUE_INFINITE); beta = std::min(avg + delta, VALUE_INFINITE); // Adjust optimism based on root move's averageScore (~4 Elo) - optimism[us] = 119 * avg / (std::abs(avg) + 88); + optimism[us] = 117 * avg / (std::abs(avg) + 88); optimism[~us] = -optimism[us]; // Start with a small aspiration window and, in the case of a fail @@ -502,10 +502,10 @@ void Search::Worker::clear() { for (StatsType c : {NoCaptures, Captures}) for (auto& to : continuationHistory[inCheck][c]) for (auto& h : to) - h->fill(-60); + h->fill(-62); for (size_t i = 1; i < reductions.size(); ++i) - reductions[i] = int((18.93 + std::log(size_t(options["Threads"])) / 2) * std::log(i)); + reductions[i] = int((21.19 + std::log(size_t(options["Threads"])) / 2) * std::log(i)); refreshTable.clear(networks); } @@ -738,7 +738,7 @@ Value Search::Worker::search( // Use static evaluation difference to improve quiet move ordering (~9 Elo) if (((ss - 1)->currentMove).is_ok() && !(ss - 1)->inCheck && !priorCapture) { - int bonus = std::clamp(-13 * int((ss - 1)->staticEval + ss->staticEval), -1796, 1526); + int bonus = std::clamp(-12 * int((ss - 1)->staticEval + ss->staticEval), -1749, 1602); bonus = bonus > 0 ? 2 * bonus : bonus / 2; thisThread->mainHistory[~us][((ss - 1)->currentMove).from_to()] << bonus; if (type_of(pos.piece_on(prevSq)) != PAWN && ((ss - 1)->currentMove).type_of() != PROMOTION) @@ -761,7 +761,7 @@ Value Search::Worker::search( // If eval is really low check with qsearch if it can exceed alpha, if it can't, // return a fail low. // Adjust razor margin according to cutoffCnt. (~1 Elo) - if (eval < alpha - 433 - (302 - 141 * ((ss + 1)->cutoffCnt > 3)) * depth * depth) + if (eval < alpha - 473 - (308 - 138 * ((ss + 1)->cutoffCnt > 3)) * depth * depth) { value = qsearch(pos, ss, alpha - 1, alpha); if (value < alpha) @@ -772,21 +772,21 @@ Value Search::Worker::search( // The depth condition is important for mate finding. if (!ss->ttPv && depth < 11 && eval - futility_margin(depth, cutNode && !ss->ttHit, improving, opponentWorsening) - - (ss - 1)->statScore / 254 + - (ss - 1)->statScore / 258 >= beta && eval >= beta && eval < VALUE_TB_WIN_IN_MAX_PLY && (!ttMove || ttCapture)) return beta > VALUE_TB_LOSS_IN_MAX_PLY ? (eval + beta) / 2 : eval; // Step 9. Null move search with verification search (~35 Elo) - if (!PvNode && (ss - 1)->currentMove != Move::null() && (ss - 1)->statScore < 16993 - && eval >= beta && ss->staticEval >= beta - 19 * depth + 326 && !excludedMove + if (!PvNode && (ss - 1)->currentMove != Move::null() && (ss - 1)->statScore < 16079 + && eval >= beta && ss->staticEval >= beta - 21 * depth + 324 && !excludedMove && pos.non_pawn_material(us) && ss->ply >= thisThread->nmpMinPly && beta > VALUE_TB_LOSS_IN_MAX_PLY) { assert(eval - beta >= 0); // Null move dynamic reduction based on depth and eval - Depth R = std::min(int(eval - beta) / 134, 6) + depth / 3 + 4; + Depth R = std::min(int(eval - beta) / 144, 6) + depth / 3 + 4; ss->currentMove = Move::null(); ss->continuationHistory = &thisThread->continuationHistory[0][0][NO_PIECE][0]; @@ -834,7 +834,7 @@ Value Search::Worker::search( // Step 11. ProbCut (~10 Elo) // If we have a good enough capture (or queen promotion) and a reduced search returns a value // much above beta, we can (almost) safely prune the previous move. - probCutBeta = beta + 159 - 66 * improving; + probCutBeta = beta + 177 - 65 * improving; if ( !PvNode && depth > 3 && std::abs(beta) < VALUE_TB_WIN_IN_MAX_PLY @@ -890,7 +890,7 @@ Value Search::Worker::search( moves_loop: // When in check, search starts here // Step 12. A small Probcut idea, when we are in check (~4 Elo) - probCutBeta = beta + 420; + probCutBeta = beta + 428; if (ss->inCheck && !PvNode && ttCapture && (tte->bound() & BOUND_LOWER) && tte->depth() >= depth - 4 && ttValue >= probCutBeta && std::abs(ttValue) < VALUE_TB_WIN_IN_MAX_PLY && std::abs(beta) < VALUE_TB_WIN_IN_MAX_PLY) @@ -974,15 +974,15 @@ moves_loop: // When in check, search starts here // Futility pruning for captures (~2 Elo) if (!givesCheck && lmrDepth < 7 && !ss->inCheck) { - Value futilityValue = ss->staticEval + 295 + 280 * lmrDepth + Value futilityValue = ss->staticEval + 305 + 272 * lmrDepth + PieceValue[capturedPiece] + captHist / 7; if (futilityValue <= alpha) continue; } // SEE based pruning for captures and checks (~11 Elo) - int seeHist = std::clamp(captHist / 32, -197 * depth, 196 * depth); - if (!pos.see_ge(move, -186 * depth - seeHist)) + int seeHist = std::clamp(captHist / 32, -185 * depth, 182 * depth); + if (!pos.see_ge(move, -176 * depth - seeHist)) continue; } else @@ -993,18 +993,18 @@ moves_loop: // When in check, search starts here + thisThread->pawnHistory[pawn_structure_index(pos)][movedPiece][move.to_sq()]; // Continuation history based pruning (~2 Elo) - if (lmrDepth < 6 && history < -4081 * depth) + if (lmrDepth < 6 && history < -4360 * depth) continue; history += 2 * thisThread->mainHistory[us][move.from_to()]; - lmrDepth += history / 4768; + lmrDepth += history / 4507; Value futilityValue = - ss->staticEval + (bestValue < ss->staticEval - 52 ? 134 : 54) + 142 * lmrDepth; + ss->staticEval + (bestValue < ss->staticEval - 54 ? 142 : 55) + 132 * lmrDepth; // Futility pruning: parent node (~13 Elo) - if (!ss->inCheck && lmrDepth < 13 && futilityValue <= alpha) + if (!ss->inCheck && lmrDepth < 11 && futilityValue <= alpha) { if (bestValue <= futilityValue && std::abs(bestValue) < VALUE_TB_WIN_IN_MAX_PLY && futilityValue < VALUE_TB_WIN_IN_MAX_PLY) @@ -1015,7 +1015,7 @@ moves_loop: // When in check, search starts here lmrDepth = std::max(lmrDepth, 0); // Prune moves with negative SEE (~4 Elo) - if (!pos.see_ge(move, -28 * lmrDepth * lmrDepth)) + if (!pos.see_ge(move, -27 * lmrDepth * lmrDepth)) continue; } } @@ -1035,11 +1035,11 @@ moves_loop: // When in check, search starts here // so changing them requires tests at these types of time controls. // Recursive singular search is avoided. if (!rootNode && move == ttMove && !excludedMove - && depth >= 4 - (thisThread->completedDepth > 32) + ss->ttPv + && depth >= 4 - (thisThread->completedDepth > 33) + ss->ttPv && std::abs(ttValue) < VALUE_TB_WIN_IN_MAX_PLY && (tte->bound() & BOUND_LOWER) && tte->depth() >= depth - 3) { - Value singularBeta = ttValue - (65 + 52 * (ss->ttPv && !PvNode)) * depth / 63; + Value singularBeta = ttValue - (59 + 49 * (ss->ttPv && !PvNode)) * depth / 64; Depth singularDepth = newDepth / 2; ss->excludedMove = move; @@ -1049,10 +1049,10 @@ moves_loop: // When in check, search starts here if (value < singularBeta) { - int doubleMargin = 251 * PvNode - 241 * !ttCapture; + int doubleMargin = 285 * PvNode - 228 * !ttCapture; int tripleMargin = - 135 + 234 * PvNode - 248 * !ttCapture + 124 * (ss->ttPv || !ttCapture); - int quadMargin = 447 + 354 * PvNode - 300 * !ttCapture + 206 * ss->ttPv; + 121 + 238 * PvNode - 259 * !ttCapture + 117 * (ss->ttPv || !ttCapture); + int quadMargin = 471 + 343 * PvNode - 281 * !ttCapture + 217 * ss->ttPv; extension = 1 + (value < singularBeta - doubleMargin) + (value < singularBeta - tripleMargin) @@ -1093,7 +1093,7 @@ moves_loop: // When in check, search starts here else if (PvNode && move == ttMove && move.to_sq() == prevSq && thisThread->captureHistory[movedPiece][move.to_sq()] [type_of(pos.piece_on(move.to_sq()))] - > 4016) + > 4041) extension = 1; } @@ -1144,10 +1144,10 @@ moves_loop: // When in check, search starts here ss->statScore = 2 * thisThread->mainHistory[us][move.from_to()] + (*contHist[0])[movedPiece][move.to_sq()] - + (*contHist[1])[movedPiece][move.to_sq()] - 5078; + + (*contHist[1])[movedPiece][move.to_sq()] - 5313; // Decrease/increase reduction for moves with a good/bad history (~8 Elo) - r -= ss->statScore / (17662 - std::min(depth, 16) * 105); + r -= ss->statScore / (16145 - std::min(depth, 15) * 102); // Step 17. Late moves reduction / extension (LMR, ~117 Elo) if (depth >= 2 && moveCount > 1 + rootNode) @@ -1166,7 +1166,7 @@ moves_loop: // When in check, search starts here { // Adjust full-depth search based on LMR results - if the result // was good enough search deeper, if it was bad enough search shallower. - const bool doDeeperSearch = value > (bestValue + 40 + 2 * newDepth); // (~1 Elo) + const bool doDeeperSearch = value > (bestValue + 41 + 2 * newDepth); // (~1 Elo) const bool doShallowerSearch = value < bestValue + newDepth; // (~2 Elo) newDepth += doDeeperSearch - doShallowerSearch; @@ -1327,9 +1327,9 @@ moves_loop: // When in check, search starts here // Bonus for prior countermove that caused the fail low else if (!priorCapture && prevSq != SQ_NONE) { - int bonus = (depth > 5) + (PvNode || cutNode) + ((ss - 1)->statScore < -14455) - + ((ss - 1)->moveCount > 10) + (!ss->inCheck && bestValue <= ss->staticEval - 130) - + (!(ss - 1)->inCheck && bestValue <= -(ss - 1)->staticEval - 77); + int bonus = (depth > 5) + (PvNode || cutNode) + ((ss - 1)->statScore < -14323) + + ((ss - 1)->moveCount > 10) + (!ss->inCheck && bestValue <= ss->staticEval - 127) + + (!(ss - 1)->inCheck && bestValue <= -(ss - 1)->staticEval - 76); update_continuation_histories(ss - 1, pos.piece_on(prevSq), prevSq, stat_bonus(depth) * bonus); thisThread->mainHistory[~us][((ss - 1)->currentMove).from_to()] @@ -1488,7 +1488,7 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta, if (bestValue > alpha) alpha = bestValue; - futilityBase = ss->staticEval + 270; + futilityBase = ss->staticEval + 259; } const PieceToHistory* contHist[] = {(ss - 1)->continuationHistory, @@ -1560,11 +1560,11 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta, + (*contHist[1])[pos.moved_piece(move)][move.to_sq()] + thisThread->pawnHistory[pawn_structure_index(pos)][pos.moved_piece(move)] [move.to_sq()] - <= 4000) + <= 4057) continue; // Do not search moves with bad enough SEE values (~5 Elo) - if (!pos.see_ge(move, -69)) + if (!pos.see_ge(move, -68)) continue; } @@ -1630,7 +1630,7 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta, Depth Search::Worker::reduction(bool i, Depth d, int mn, int delta) { int reductionScale = reductions[d] * reductions[mn]; - return (reductionScale + 1318 - delta * 760 / rootDelta) / 1024 + (!i && reductionScale > 1066); + return (reductionScale + 1284 - delta * 755 / rootDelta) / 1024 + (!i && reductionScale > 1133); } // elapsed() returns the time elapsed since the search started. If the From d3f081ed8ad749cc7e07c0d85b4e8818678f952f Mon Sep 17 00:00:00 2001 From: Michael Chaly Date: Thu, 9 May 2024 21:10:24 +0300 Subject: [PATCH 014/315] Adjust standpat return value in qsearch Instead of returning value itself return value between it and beta for non pv nodes - analogous to what we do after actual search there. Passed STC: https://tests.stockfishchess.org/tests/view/663cb1b4c0b75d7f7b98188e LLR: 2.95 (-2.94,2.94) <0.00,2.00> Total: 131552 W: 34131 L: 33673 D: 63748 Ptnml(0-2): 420, 15446, 33600, 15876, 434 Passed LTC: https://tests.stockfishchess.org/tests/view/663cda5dc0b75d7f7b981c6f LLR: 2.95 (-2.94,2.94) <0.50,2.50> Total: 282798 W: 71658 L: 70833 D: 140307 Ptnml(0-2): 112, 31187, 77979, 32006, 115 closes https://github.com/official-stockfish/Stockfish/pull/5233 Bench: 1606672 --- src/search.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/search.cpp b/src/search.cpp index 1d9e0d81..ae2b1de2 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1478,6 +1478,8 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta, // Stand pat. Return immediately if static value is at least beta if (bestValue >= beta) { + if (std::abs(bestValue) < VALUE_TB_WIN_IN_MAX_PLY && !PvNode) + bestValue = (3 * bestValue + beta) / 4; if (!ss->ttHit) tte->save(posKey, value_to_tt(bestValue, ss->ply), false, BOUND_LOWER, DEPTH_NONE, Move::none(), unadjustedStaticEval, tt.generation()); From 53f363041cd96be840244f989823781ecd21b658 Mon Sep 17 00:00:00 2001 From: Linmiao Xu Date: Thu, 9 May 2024 13:47:00 -0400 Subject: [PATCH 015/315] Simplify npm constants when adjusting eval Passed non-regression STC: https://tests.stockfishchess.org/tests/view/663d0c4f507ebe1c0e91ec8d LLR: 2.93 (-2.94,2.94) <-1.75,0.25> Total: 162784 W: 41987 L: 41906 D: 78891 Ptnml(0-2): 520, 19338, 41591, 19427, 516 Passed non-regression LTC: https://tests.stockfishchess.org/tests/view/663d20fd507ebe1c0e91f405 LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 457242 W: 115022 L: 115250 D: 226970 Ptnml(0-2): 271, 51566, 125179, 51330, 275 closes https://github.com/official-stockfish/Stockfish/pull/5237 Bench: 2238216 --- src/evaluate.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index 5be7e7a1..cfe20601 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -62,15 +62,13 @@ Value Eval::evaluate(const Eval::NNUE::Networks& networks, Value nnue = smallNet ? networks.small.evaluate(pos, &caches.small, true, &nnueComplexity) : networks.big.evaluate(pos, &caches.big, true, &nnueComplexity); - const auto adjustEval = [&](int nnueDiv, int pawnCountConstant, int pawnCountMul, - int npmConstant, int evalDiv, int shufflingConstant) { + const auto adjustEval = [&](int nnueDiv, int pawnCountMul, int evalDiv, int shufflingConstant) { // Blend optimism and eval with nnue complexity and material imbalance optimism += optimism * (nnueComplexity + std::abs(simpleEval - nnue)) / 584; nnue -= nnue * (nnueComplexity * 5 / 3) / nnueDiv; int npm = pos.non_pawn_material() / 64; - v = (nnue * (npm + pawnCountConstant + pawnCountMul * pos.count()) - + optimism * (npmConstant + npm)) + v = (nnue * (npm + 943 + pawnCountMul * pos.count()) + optimism * (npm + 140)) / evalDiv; // Damp down the evaluation linearly when shuffling @@ -79,9 +77,9 @@ Value Eval::evaluate(const Eval::NNUE::Networks& networks, }; if (!smallNet) - adjustEval(32395, 942, 11, 139, 1058, 178); + adjustEval(32395, 11, 1058, 178); else - adjustEval(32793, 944, 9, 140, 1067, 206); + adjustEval(32793, 9, 1067, 206); // Guarantee evaluation does not hit the tablebase range v = std::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1); From 0b08953174d222270100690b45fad0dc47c01f98 Mon Sep 17 00:00:00 2001 From: Linmiao Xu Date: Thu, 9 May 2024 14:03:35 -0400 Subject: [PATCH 016/315] Re-evaluate some small net positions for more accurate evals Use main net evals when small net evals hint that higher eval accuracy may be worth the slower eval speeds. With Finny caches, re-evals with the main net are less expensive than before. Original idea by mstembera who I've added as co-author to this PR. Based on reEval tests by mstembera: https://tests.stockfishchess.org/tests/view/65e69187b6345c1b934866e5 https://tests.stockfishchess.org/tests/view/65e863aa0ec64f0526c3e991 A few variants of this patch also passed LTC: https://tests.stockfishchess.org/tests/view/663d2108507ebe1c0e91f407 https://tests.stockfishchess.org/tests/view/663e388c3a2f9702074bc152 Passed STC: https://tests.stockfishchess.org/tests/view/663dadbd1a61d6377f190e2c LLR: 2.93 (-2.94,2.94) <0.00,2.00> Total: 92320 W: 23941 L: 23531 D: 44848 Ptnml(0-2): 430, 10993, 22931, 11349, 457 Passed LTC: https://tests.stockfishchess.org/tests/view/663ef48b2948bf9aa698690c LLR: 2.94 (-2.94,2.94) <0.50,2.50> Total: 98934 W: 24907 L: 24457 D: 49570 Ptnml(0-2): 48, 10952, 27027, 11382, 58 closes https://github.com/official-stockfish/Stockfish/pull/5238 bench 1876282 Co-Authored-By: mstembera <5421953+mstembera@users.noreply.github.com> --- src/evaluate.cpp | 3 +++ src/evaluate.h | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index cfe20601..b5f28d5a 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -62,6 +62,9 @@ Value Eval::evaluate(const Eval::NNUE::Networks& networks, Value nnue = smallNet ? networks.small.evaluate(pos, &caches.small, true, &nnueComplexity) : networks.big.evaluate(pos, &caches.big, true, &nnueComplexity); + if (smallNet && (nnue * simpleEval < 0 || std::abs(nnue) < 500)) + nnue = networks.big.evaluate(pos, &caches.big, true, &nnueComplexity); + const auto adjustEval = [&](int nnueDiv, int pawnCountMul, int evalDiv, int shufflingConstant) { // Blend optimism and eval with nnue complexity and material imbalance optimism += optimism * (nnueComplexity + std::abs(simpleEval - nnue)) / 584; diff --git a/src/evaluate.h b/src/evaluate.h index 2d244ff6..afaf35eb 100644 --- a/src/evaluate.h +++ b/src/evaluate.h @@ -29,7 +29,7 @@ class Position; namespace Eval { -constexpr inline int SmallNetThreshold = 1274; +constexpr inline int SmallNetThreshold = 1174; // The default net name MUST follow the format nn-[SHA256 first 12 digits].nnue // for the build process (profile-build and fishtest) to work. Do not change the From e608eab8dd9f7bd68f192d56d742f621674b8fa8 Mon Sep 17 00:00:00 2001 From: mstembera Date: Sun, 12 May 2024 04:45:01 -0700 Subject: [PATCH 017/315] Optimize update_accumulator_refresh_cache() STC https://tests.stockfishchess.org/tests/view/664105df26ac5f9b286d30e6 LLR: 2.94 (-2.94,2.94) <0.00,2.00> Total: 178528 W: 46235 L: 45750 D: 86543 Ptnml(0-2): 505, 17792, 52142, 18363, 462 Combo of two yellow speedups https://tests.stockfishchess.org/tests/view/6640abf9d163897c63214f5c LLR: -2.93 (-2.94,2.94) <0.00,2.00> Total: 355744 W: 91714 L: 91470 D: 172560 Ptnml(0-2): 913, 36233, 103384, 36381, 961 https://tests.stockfishchess.org/tests/view/6628ce073fe04ce4cefc739c LLR: -2.93 (-2.94,2.94) <0.00,2.00> Total: 627040 W: 162001 L: 161339 D: 303700 Ptnml(0-2): 2268, 72379, 163532, 73105, 2236 closes https://github.com/official-stockfish/Stockfish/pull/5239 No functional change --- src/nnue/nnue_feature_transformer.h | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/src/nnue/nnue_feature_transformer.h b/src/nnue/nnue_feature_transformer.h index 2b11adef..bcd14e6f 100644 --- a/src/nnue/nnue_feature_transformer.h +++ b/src/nnue/nnue_feature_transformer.h @@ -664,7 +664,11 @@ class FeatureTransformer { for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j) { - auto entryTile = reinterpret_cast(&entry.accumulation[j * TileHeight]); + auto accTile = + reinterpret_cast(&accumulator.accumulation[Perspective][j * TileHeight]); + auto entryTile = + reinterpret_cast(&entry.accumulation[j * TileHeight]); + for (IndexType k = 0; k < NumRegs; ++k) acc[k] = entryTile[k]; @@ -679,7 +683,7 @@ class FeatureTransformer { auto columnA = reinterpret_cast(&weights[offsetA]); for (unsigned k = 0; k < NumRegs; ++k) - acc[k] = vec_add_16(vec_sub_16(acc[k], columnR[k]), columnA[k]); + acc[k] = vec_add_16(acc[k], vec_sub_16(columnA[k], columnR[k])); } for (; i < int(removed.size()); ++i) { @@ -702,12 +706,17 @@ class FeatureTransformer { for (IndexType k = 0; k < NumRegs; k++) vec_store(&entryTile[k], acc[k]); + for (IndexType k = 0; k < NumRegs; k++) + vec_store(&accTile[k], acc[k]); } for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j) { - auto entryTilePsqt = - reinterpret_cast(&entry.psqtAccumulation[j * PsqtTileHeight]); + auto accTilePsqt = reinterpret_cast( + &accumulator.psqtAccumulation[Perspective][j * PsqtTileHeight]); + auto entryTilePsqt = reinterpret_cast( + &entry.psqtAccumulation[j * PsqtTileHeight]); + for (std::size_t k = 0; k < NumPsqtRegs; ++k) psqt[k] = entryTilePsqt[k]; @@ -732,6 +741,8 @@ class FeatureTransformer { for (std::size_t k = 0; k < NumPsqtRegs; ++k) vec_store_psqt(&entryTilePsqt[k], psqt[k]); + for (std::size_t k = 0; k < NumPsqtRegs; ++k) + vec_store_psqt(&accTilePsqt[k], psqt[k]); } #else @@ -755,8 +766,6 @@ class FeatureTransformer { entry.psqtAccumulation[k] += psqtWeights[index * PSQTBuckets + k]; } -#endif - // The accumulator of the refresh entry has been updated. // Now copy its content to the actual accumulator we were refreshing @@ -765,6 +774,7 @@ class FeatureTransformer { std::memcpy(accumulator.psqtAccumulation[Perspective], entry.psqtAccumulation, sizeof(int32_t) * PSQTBuckets); +#endif for (Color c : {WHITE, BLACK}) entry.byColorBB[c] = pos.pieces(c); From 2682c2127d1360524915f6cd68cbeabfdd19ce26 Mon Sep 17 00:00:00 2001 From: xoto10 <23479932+xoto10@users.noreply.github.com> Date: Mon, 13 May 2024 07:19:18 +0100 Subject: [PATCH 018/315] Use 5% less time on first move Stockfish appears to take too much time on the first move of a game and then not enough on moves 2,3,4... Probably caused by most of the factors that increase time usually applying on the first move. Attempts to give more time to the subsequent moves have not worked so far, but this change to simply reduce first move time by 5% worked. STC 10+0.1 : LLR: 2.96 (-2.94,2.94) <0.00,2.00> Total: 78496 W: 20516 L: 20135 D: 37845 Ptnml(0-2): 340, 8859, 20456, 9266, 327 https://tests.stockfishchess.org/tests/view/663d47bf507ebe1c0e9200ba LTC 60+0.6 : LLR: 2.95 (-2.94,2.94) <0.50,2.50> Total: 94872 W: 24179 L: 23751 D: 46942 Ptnml(0-2): 61, 9743, 27405, 10161, 66 https://tests.stockfishchess.org/tests/view/663e779cbb28828150dd9089 closes https://github.com/official-stockfish/Stockfish/pull/5235 Bench: 1876282 --- src/nnue/nnue_feature_transformer.h | 9 ++++----- src/search.cpp | 3 ++- src/search.h | 1 + src/thread.cpp | 1 + src/timeman.cpp | 11 +++++++---- src/timeman.h | 3 ++- 6 files changed, 17 insertions(+), 11 deletions(-) diff --git a/src/nnue/nnue_feature_transformer.h b/src/nnue/nnue_feature_transformer.h index bcd14e6f..7b7aada3 100644 --- a/src/nnue/nnue_feature_transformer.h +++ b/src/nnue/nnue_feature_transformer.h @@ -666,8 +666,7 @@ class FeatureTransformer { { auto accTile = reinterpret_cast(&accumulator.accumulation[Perspective][j * TileHeight]); - auto entryTile = - reinterpret_cast(&entry.accumulation[j * TileHeight]); + auto entryTile = reinterpret_cast(&entry.accumulation[j * TileHeight]); for (IndexType k = 0; k < NumRegs; ++k) acc[k] = entryTile[k]; @@ -714,9 +713,9 @@ class FeatureTransformer { { auto accTilePsqt = reinterpret_cast( &accumulator.psqtAccumulation[Perspective][j * PsqtTileHeight]); - auto entryTilePsqt = reinterpret_cast( - &entry.psqtAccumulation[j * PsqtTileHeight]); - + auto entryTilePsqt = + reinterpret_cast(&entry.psqtAccumulation[j * PsqtTileHeight]); + for (std::size_t k = 0; k < NumPsqtRegs; ++k) psqt[k] = entryTilePsqt[k]; diff --git a/src/search.cpp b/src/search.cpp index ae2b1de2..edbb58c6 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -157,7 +157,8 @@ void Search::Worker::start_searching() { return; } - main_manager()->tm.init(limits, rootPos.side_to_move(), rootPos.game_ply(), options); + main_manager()->tm.init(limits, rootPos.side_to_move(), rootPos.game_ply(), options, + main_manager()->originalPly); tt.new_search(); if (rootMoves.empty()) diff --git a/src/search.h b/src/search.h index c824daf9..6e5b22bd 100644 --- a/src/search.h +++ b/src/search.h @@ -210,6 +210,7 @@ class SearchManager: public ISearchManager { Depth depth) const; Stockfish::TimeManagement tm; + int originalPly; int callsCnt; std::atomic_bool ponder; diff --git a/src/thread.cpp b/src/thread.cpp index 9052654b..8724cb49 100644 --- a/src/thread.cpp +++ b/src/thread.cpp @@ -167,6 +167,7 @@ void ThreadPool::clear() { main_manager()->callsCnt = 0; main_manager()->bestPreviousScore = VALUE_INFINITE; main_manager()->bestPreviousAverageScore = VALUE_INFINITE; + main_manager()->originalPly = -1; main_manager()->previousTimeReduction = 1.0; main_manager()->tm.clear(); } diff --git a/src/timeman.cpp b/src/timeman.cpp index 4feb329b..f389e082 100644 --- a/src/timeman.cpp +++ b/src/timeman.cpp @@ -44,10 +44,8 @@ void TimeManagement::advance_nodes_time(std::int64_t nodes) { // the bounds of time allowed for the current game ply. We currently support: // 1) x basetime (+ z increment) // 2) x moves in y seconds (+ z increment) -void TimeManagement::init(Search::LimitsType& limits, - Color us, - int ply, - const OptionsMap& options) { +void TimeManagement::init( + Search::LimitsType& limits, Color us, int ply, const OptionsMap& options, int& originalPly) { TimePoint npmsec = TimePoint(options["nodestime"]); // If we have no time, we don't need to fully initialize TM. @@ -58,6 +56,9 @@ void TimeManagement::init(Search::LimitsType& limits, if (limits.time[us] == 0) return; + if (originalPly == -1) + originalPly = ply; + TimePoint moveOverhead = TimePoint(options["Move Overhead"]); // optScale is a percentage of available time to use for the current move. @@ -106,6 +107,8 @@ void TimeManagement::init(Search::LimitsType& limits, { // Use extra time with larger increments double optExtra = scaledInc < 500 ? 1.0 : 1.13; + if (ply - originalPly < 2) + optExtra *= 0.95; // Calculate time constants based on current time left. double logTimeInSec = std::log10(scaledTime / 1000.0); diff --git a/src/timeman.h b/src/timeman.h index 1b6bd849..8f1bb563 100644 --- a/src/timeman.h +++ b/src/timeman.h @@ -36,7 +36,8 @@ struct LimitsType; // the maximum available time, the game move number, and other parameters. class TimeManagement { public: - void init(Search::LimitsType& limits, Color us, int ply, const OptionsMap& options); + void init( + Search::LimitsType& limits, Color us, int ply, const OptionsMap& options, int& originalPly); TimePoint optimum() const; TimePoint maximum() const; From fa114266fa7ea996c6d2ef12c625547b1aefddc1 Mon Sep 17 00:00:00 2001 From: FauziAkram Date: Mon, 13 May 2024 14:08:19 +0300 Subject: [PATCH 019/315] Add extra bonus for high-depth condition Passed STC: LLR: 2.93 (-2.94,2.94) <0.00,2.00> Total: 54208 W: 14058 L: 13717 D: 26433 Ptnml(0-2): 166, 6277, 13885, 6602, 174 https://tests.stockfishchess.org/tests/view/664136d8f9f4e8fc783c9b82 Passed LTC: LLR: 2.96 (-2.94,2.94) <0.50,2.50> Total: 112548 W: 28492 L: 28018 D: 56038 Ptnml(0-2): 53, 12186, 31318, 12668, 49 https://tests.stockfishchess.org/tests/view/664143fef9f4e8fc783c9bf6 closes https://github.com/official-stockfish/Stockfish/pull/5242 Bench: 1725980 --- src/search.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index edbb58c6..30f718bd 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -739,7 +739,7 @@ Value Search::Worker::search( // Use static evaluation difference to improve quiet move ordering (~9 Elo) if (((ss - 1)->currentMove).is_ok() && !(ss - 1)->inCheck && !priorCapture) { - int bonus = std::clamp(-12 * int((ss - 1)->staticEval + ss->staticEval), -1749, 1602); + int bonus = std::clamp(-12 * int((ss - 1)->staticEval + ss->staticEval), -1749, 1584); bonus = bonus > 0 ? 2 * bonus : bonus / 2; thisThread->mainHistory[~us][((ss - 1)->currentMove).from_to()] << bonus; if (type_of(pos.piece_on(prevSq)) != PAWN && ((ss - 1)->currentMove).type_of() != PROMOTION) @@ -1328,8 +1328,8 @@ moves_loop: // When in check, search starts here // Bonus for prior countermove that caused the fail low else if (!priorCapture && prevSq != SQ_NONE) { - int bonus = (depth > 5) + (PvNode || cutNode) + ((ss - 1)->statScore < -14323) - + ((ss - 1)->moveCount > 10) + (!ss->inCheck && bestValue <= ss->staticEval - 127) + int bonus = (depth > 4) + (depth > 5) + (PvNode || cutNode) + ((ss - 1)->statScore < -14323) + + ((ss - 1)->moveCount > 10) + (!ss->inCheck && bestValue <= ss->staticEval - 120) + (!(ss - 1)->inCheck && bestValue <= -(ss - 1)->staticEval - 76); update_continuation_histories(ss - 1, pos.piece_on(prevSq), prevSq, stat_bonus(depth) * bonus); From 9e45644c50e4650e4603ddef3e8147a8daf3a790 Mon Sep 17 00:00:00 2001 From: Michael Chaly Date: Tue, 14 May 2024 20:10:01 +0300 Subject: [PATCH 020/315] Add extra bonus to pawn history for a move that caused a fail low Basically the same idea as it is for continuation/main history, but it has some tweaks. 1) it has * 2 multiplier for bonus instead of full/half bonus - for whatever reason this seems to work better; 2) attempts with this type of big bonuses scaled somewhat poorly (or were unlucky at longer time controls), but after measuring the fact that average value of pawn history in LMR after adding this bonuses increased by substantial number (for multiplier 1,5 it increased by smth like 400~ from 8192 cap) attempts were made to make default pawn history negative to compensate it - and version with multiplier 2 and initial fill value -900 passed. Passed STC: https://tests.stockfishchess.org/tests/view/66424815f9f4e8fc783cba59 LLR: 2.93 (-2.94,2.94) <0.00,2.00> Total: 115008 W: 30001 L: 29564 D: 55443 Ptnml(0-2): 432, 13629, 28903, 14150, 390 Passed LTC: https://tests.stockfishchess.org/tests/view/6642f5437134c82f3f7a3ffa LLR: 2.94 (-2.94,2.94) <0.50,2.50> Total: 56448 W: 14432 L: 14067 D: 27949 Ptnml(0-2): 36, 6268, 15254, 6627, 39 Bench: 1857237 --- src/search.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index 30f718bd..09a9cc92 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -496,7 +496,7 @@ void Search::Worker::clear() { counterMoves.fill(Move::none()); mainHistory.fill(0); captureHistory.fill(0); - pawnHistory.fill(0); + pawnHistory.fill(-900); correctionHistory.fill(0); for (bool inCheck : {false, true}) @@ -1335,6 +1335,11 @@ moves_loop: // When in check, search starts here stat_bonus(depth) * bonus); thisThread->mainHistory[~us][((ss - 1)->currentMove).from_to()] << stat_bonus(depth) * bonus / 2; + + + if (type_of(pos.piece_on(prevSq)) != PAWN && ((ss - 1)->currentMove).type_of() != PROMOTION) + thisThread->pawnHistory[pawn_structure_index(pos)][pos.piece_on(prevSq)][prevSq] + << stat_bonus(depth) * bonus * 2; } if (PvNode) From 09dba1f0806a973d1f9f4ebf04b7a45d81683168 Mon Sep 17 00:00:00 2001 From: mstembera Date: Mon, 13 May 2024 15:28:48 -0700 Subject: [PATCH 021/315] Call adjustEval with correct parameters after rescore Set smallNet to false after rescoring so we call adjustEval() w/ correct parameters. STC: https://tests.stockfishchess.org/tests/view/664308687134c82f3f7a4003 LLR: 2.93 (-2.94,2.94) <-1.75,0.25> Total: 146912 W: 37856 L: 37756 D: 71300 Ptnml(0-2): 566, 17562, 37122, 17618, 588 LTC: https://tests.stockfishchess.org/tests/view/6643a0821f32a966da7485d6 LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 390414 W: 98015 L: 98173 D: 194226 Ptnml(0-2): 162, 43555, 107929, 43401, 160 closes https://github.com/official-stockfish/Stockfish/pull/5244 Bench: 1819318 --- src/evaluate.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index b5f28d5a..de1adc98 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -63,7 +63,10 @@ Value Eval::evaluate(const Eval::NNUE::Networks& networks, : networks.big.evaluate(pos, &caches.big, true, &nnueComplexity); if (smallNet && (nnue * simpleEval < 0 || std::abs(nnue) < 500)) - nnue = networks.big.evaluate(pos, &caches.big, true, &nnueComplexity); + { + nnue = networks.big.evaluate(pos, &caches.big, true, &nnueComplexity); + smallNet = false; + } const auto adjustEval = [&](int nnueDiv, int pawnCountMul, int evalDiv, int shufflingConstant) { // Blend optimism and eval with nnue complexity and material imbalance From 9b90cd88f0ddd568e43161a0ada7daf02fc59c67 Mon Sep 17 00:00:00 2001 From: Stefan Geschwentner Date: Wed, 15 May 2024 04:10:58 +0200 Subject: [PATCH 022/315] Reduce more when improving and ttvalue is lower than alpha More reduction if position is improving but value from TT doesn't exceeds alpha but the tt move is excluded. This idea is based on following LMR condition tuning https://tests.stockfishchess.org/tests/view/66423a1bf9f4e8fc783cba37 by using only three of the four largest terms P[3], P[18] and P[12]. Passed STC: LLR: 2.93 (-2.94,2.94) <0.00,2.00> Total: 27840 W: 7309 L: 7004 D: 13527 Ptnml(0-2): 85, 3219, 7018, 3502, 96 https://tests.stockfishchess.org/tests/view/6643dc1cbc537f56194508ba Passed LTC: LLR: 2.95 (-2.94,2.94) <0.50,2.50> Total: 191280 W: 48656 L: 48020 D: 94604 Ptnml(0-2): 78, 20979, 52903, 21589, 91 https://tests.stockfishchess.org/tests/view/6643e543bc537f5619451683 closes https://github.com/official-stockfish/Stockfish/pull/5245 Bench: 1430835 --- src/search.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/search.cpp b/src/search.cpp index 09a9cc92..2dadd0dc 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1134,6 +1134,9 @@ moves_loop: // When in check, search starts here if (PvNode) r--; + if (improving && ttValue <= alpha && move != ttMove) + r++; + // Increase reduction if next ply has a lot of fail high (~5 Elo) if ((ss + 1)->cutoffCnt > 3) r++; From 1f3a0fda2e3a0d4aa825dd148c2593fb3631bf82 Mon Sep 17 00:00:00 2001 From: Linmiao Xu Date: Mon, 13 May 2024 18:06:38 -0400 Subject: [PATCH 023/315] Use same eval divisor for both nets Passed non-regression STC: https://tests.stockfishchess.org/tests/view/66428f146577e9d2c8a29cf8 LLR: 2.95 (-2.94,2.94) <-1.75,0.25> Total: 241024 W: 62173 L: 62177 D: 116674 Ptnml(0-2): 904, 28648, 61407, 28654, 899 Passed non-regression LTC: https://tests.stockfishchess.org/tests/view/6643ae6f1f32a966da74977b LLR: 2.95 (-2.94,2.94) <-1.75,0.25> Total: 193710 W: 48762 L: 48717 D: 96231 Ptnml(0-2): 70, 21599, 53481, 21626, 79 closes https://github.com/official-stockfish/Stockfish/pull/5246 Bench: 1700680 --- src/evaluate.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index de1adc98..76d630dd 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -68,14 +68,13 @@ Value Eval::evaluate(const Eval::NNUE::Networks& networks, smallNet = false; } - const auto adjustEval = [&](int nnueDiv, int pawnCountMul, int evalDiv, int shufflingConstant) { + const auto adjustEval = [&](int nnueDiv, int pawnCountMul, int shufflingConstant) { // Blend optimism and eval with nnue complexity and material imbalance optimism += optimism * (nnueComplexity + std::abs(simpleEval - nnue)) / 584; nnue -= nnue * (nnueComplexity * 5 / 3) / nnueDiv; int npm = pos.non_pawn_material() / 64; - v = (nnue * (npm + 943 + pawnCountMul * pos.count()) + optimism * (npm + 140)) - / evalDiv; + v = (nnue * (npm + 943 + pawnCountMul * pos.count()) + optimism * (npm + 140)) / 1058; // Damp down the evaluation linearly when shuffling int shuffling = pos.rule50_count(); @@ -83,9 +82,9 @@ Value Eval::evaluate(const Eval::NNUE::Networks& networks, }; if (!smallNet) - adjustEval(32395, 11, 1058, 178); + adjustEval(32395, 11, 178); else - adjustEval(32793, 9, 1067, 206); + adjustEval(32793, 9, 206); // Guarantee evaluation does not hit the tablebase range v = std::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1); From dcb02337844d71e56df57b9a8ba17646f953711c Mon Sep 17 00:00:00 2001 From: FauziAkram Date: Wed, 15 May 2024 14:22:36 +0300 Subject: [PATCH 024/315] Simplifying improving and worsening deduction formulas Passed STC: LLR: 2.93 (-2.94,2.94) <-1.75,0.25> Total: 77696 W: 20052 L: 19878 D: 37766 Ptnml(0-2): 222, 9124, 19994, 9274, 234 https://tests.stockfishchess.org/tests/view/66440032bc537f561945171e Passed LTC: LLR: 2.95 (-2.94,2.94) <-1.75,0.25> Total: 234414 W: 58874 L: 58871 D: 116669 Ptnml(0-2): 96, 26147, 64742, 26102, 120 https://tests.stockfishchess.org/tests/view/6644094cbc537f5619451735 closes https://github.com/official-stockfish/Stockfish/pull/5248 Bench: 1336738 --- src/search.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 2dadd0dc..d9041c66 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -60,8 +60,8 @@ static constexpr double EvalLevel[10] = {0.981, 0.956, 0.895, 0.949, 0.913, // Futility margin Value futility_margin(Depth d, bool noTtCutNode, bool improving, bool oppWorsening) { Value futilityMult = 131 - 48 * noTtCutNode; - Value improvingDeduction = 57 * improving * futilityMult / 32; - Value worseningDeduction = (309 + 52 * improving) * oppWorsening * futilityMult / 1024; + Value improvingDeduction = 2 * improving * futilityMult; + Value worseningDeduction = 330 * oppWorsening * futilityMult / 1024; return futilityMult * d - improvingDeduction - worseningDeduction; } From 541406ab9151891b3a42f49030a6167cfca55599 Mon Sep 17 00:00:00 2001 From: Linmiao Xu Date: Tue, 14 May 2024 16:51:02 -0400 Subject: [PATCH 025/315] Use same nnue divisor for both nets Passed non-regression STC: https://tests.stockfishchess.org/tests/view/6643ceeabc537f56194506f6 LLR: 2.95 (-2.94,2.94) <-1.75,0.25> Total: 224800 W: 57910 L: 57896 D: 108994 Ptnml(0-2): 673, 26790, 57519, 26686, 732 Passed non-regression LTC: https://tests.stockfishchess.org/tests/view/6643ff15bc537f5619451719 LLR: 2.95 (-2.94,2.94) <-1.75,0.25> Total: 347658 W: 87574 L: 87688 D: 172396 Ptnml(0-2): 207, 39004, 95488, 38956, 174 closes https://github.com/official-stockfish/Stockfish/pull/5250 Bench: 1804704 --- src/evaluate.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index 76d630dd..3ce14862 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -68,10 +68,10 @@ Value Eval::evaluate(const Eval::NNUE::Networks& networks, smallNet = false; } - const auto adjustEval = [&](int nnueDiv, int pawnCountMul, int shufflingConstant) { + const auto adjustEval = [&](int pawnCountMul, int shufflingConstant) { // Blend optimism and eval with nnue complexity and material imbalance optimism += optimism * (nnueComplexity + std::abs(simpleEval - nnue)) / 584; - nnue -= nnue * (nnueComplexity * 5 / 3) / nnueDiv; + nnue -= nnue * (nnueComplexity * 5 / 3) / 32395; int npm = pos.non_pawn_material() / 64; v = (nnue * (npm + 943 + pawnCountMul * pos.count()) + optimism * (npm + 140)) / 1058; @@ -82,9 +82,9 @@ Value Eval::evaluate(const Eval::NNUE::Networks& networks, }; if (!smallNet) - adjustEval(32395, 11, 178); + adjustEval(11, 178); else - adjustEval(32793, 9, 206); + adjustEval(9, 206); // Guarantee evaluation does not hit the tablebase range v = std::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1); From e3c9ed77aa62e096d52bb558193279b804f53a84 Mon Sep 17 00:00:00 2001 From: Muzhen Gaming <61100393+XInTheDark@users.noreply.github.com> Date: Wed, 15 May 2024 22:32:55 +0800 Subject: [PATCH 026/315] Revert "Reduce more when improving and ttvalue is lower than alpha" The patch regressed significantly at longer time controls. Passed VLTC: https://tests.stockfishchess.org/tests/view/6644c7a2bc537f5619453096 LLR: 2.95 (-2.94,2.94) <0.00,2.00> Total: 43336 W: 11177 L: 10884 D: 21275 Ptnml(0-2): 3, 4432, 12507, 4721, 5 Passed VVLTC: https://tests.stockfishchess.org/tests/view/66450c974aa4fa9a83b6d0b0 LLR: 2.95 (-2.94,2.94) <0.50,2.50> Total: 32394 W: 8350 L: 8072 D: 15972 Ptnml(0-2): 2, 2798, 10317, 3080, 0 closes https://github.com/official-stockfish/Stockfish/pull/5251 Bench: 1594188 --- src/search.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index d9041c66..bdcecd1c 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1134,9 +1134,6 @@ moves_loop: // When in check, search starts here if (PvNode) r--; - if (improving && ttValue <= alpha && move != ttMove) - r++; - // Increase reduction if next ply has a lot of fail high (~5 Elo) if ((ss + 1)->cutoffCnt > 3) r++; From 47597641dc8da7c65d0f1d987f784af09d6aec15 Mon Sep 17 00:00:00 2001 From: Linmiao Xu Date: Wed, 15 May 2024 13:22:46 -0400 Subject: [PATCH 027/315] Lower smallnet threshold linearly as pawn count decreases Passed STC: https://tests.stockfishchess.org/tests/view/6644f677324e96f42f89d894 LLR: 2.94 (-2.94,2.94) <0.00,2.00> Total: 377920 W: 97135 L: 96322 D: 184463 Ptnml(0-2): 1044, 44259, 97588, 44978, 1091 Passed LTC: https://tests.stockfishchess.org/tests/view/664548af93ce6da3e93b31b3 LLR: 2.94 (-2.94,2.94) <0.50,2.50> Total: 169056 W: 42901 L: 42312 D: 83843 Ptnml(0-2): 58, 18538, 46753, 19115, 64 closes https://github.com/official-stockfish/Stockfish/pull/5252 Bench: 1991750 --- src/evaluate.cpp | 2 +- src/evaluate.h | 2 +- src/nnue/nnue_misc.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index 3ce14862..498ec161 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -55,7 +55,7 @@ Value Eval::evaluate(const Eval::NNUE::Networks& networks, assert(!pos.checkers()); int simpleEval = simple_eval(pos, pos.side_to_move()); - bool smallNet = std::abs(simpleEval) > SmallNetThreshold; + bool smallNet = std::abs(simpleEval) > SmallNetThreshold + 6 * pos.count(); int nnueComplexity; int v; diff --git a/src/evaluate.h b/src/evaluate.h index afaf35eb..6612ec9d 100644 --- a/src/evaluate.h +++ b/src/evaluate.h @@ -29,7 +29,7 @@ class Position; namespace Eval { -constexpr inline int SmallNetThreshold = 1174; +constexpr inline int SmallNetThreshold = 1126; // The default net name MUST follow the format nn-[SHA256 first 12 digits].nnue // for the build process (profile-build and fishtest) to work. Do not change the diff --git a/src/nnue/nnue_misc.cpp b/src/nnue/nnue_misc.cpp index bf73a58b..8a777912 100644 --- a/src/nnue/nnue_misc.cpp +++ b/src/nnue/nnue_misc.cpp @@ -47,7 +47,7 @@ void hint_common_parent_position(const Position& pos, AccumulatorCaches& caches) { int simpleEvalAbs = std::abs(simple_eval(pos, pos.side_to_move())); - if (simpleEvalAbs > Eval::SmallNetThreshold) + if (simpleEvalAbs > Eval::SmallNetThreshold + 6 * pos.count()) networks.small.hint_common_access(pos, &caches.small); else networks.big.hint_common_access(pos, &caches.big); From e0227a627288c786fdd3b12452303ff4eabba5b0 Mon Sep 17 00:00:00 2001 From: Rak Laptudirm Date: Wed, 15 May 2024 22:26:12 +0530 Subject: [PATCH 028/315] Improve comment closes https://github.com/official-stockfish/Stockfish/pull/5249 No functional change --- src/tt.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tt.cpp b/src/tt.cpp index 4885a781..cb46fc8a 100644 --- a/src/tt.cpp +++ b/src/tt.cpp @@ -35,7 +35,7 @@ namespace Stockfish { void TTEntry::save( Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev, uint8_t generation8) { - // Preserve any existing move for the same position + // Preserve the old ttmove if we don't have a new one if (m || uint16_t(k) != key16) move16 = m; From 1b7dea3f851cd5c5411ba6f07a2f935bfb7da8a9 Mon Sep 17 00:00:00 2001 From: Linmiao Xu Date: Wed, 15 May 2024 19:26:48 -0400 Subject: [PATCH 029/315] Update default main net to nn-c721dfca8cd3.nnue Created by first retraining the spsa-tuned main net `nn-ae6a388e4a1a.nnue` with: - using v6-dd data without bestmove captures removed - addition of T80 mar2024 data - increasing loss by 20% when Q is too high - torch.compile changes for marginal training speed gains And then SPSA tuning weights of epoch 899 following methods described in: https://github.com/official-stockfish/Stockfish/pull/5149 This net was reached at 92k out of 120k steps in this 70+0.7 th 7 SPSA tuning run: https://tests.stockfishchess.org/tests/view/66413b7df9f4e8fc783c9bbb Thanks to @Viren6 for suggesting usage of: - c value 4 for the weights - c value 128 for the biases Scripts for automating applying fishtest spsa params to exporting tuned .nnue are in: https://github.com/linrock/nnue-tools/tree/master/spsa Before spsa tuning, epoch 899 was nn-f85738aefa84.nnue https://tests.stockfishchess.org/tests/view/663e5c893a2f9702074bc167 After initially training with max-epoch 800, training was resumed with max-epoch 1000. ``` experiment-name: 3072--S11--more-data-v6-dd-t80-mar2024--see-ge0-20p-more-loss-high-q-sk28-l8 nnue-pytorch-branch: linrock/nnue-pytorch/3072-r21-skip-more-wdl-see-ge0-20p-more-loss-high-q-torch-compile-more start-from-engine-test-net: False start-from-model: /data/config/apr2024-3072/nn-ae6a388e4a1a.nnue early-fen-skipping: 28 training-dataset: /data/S11-mar2024/: - leela96.v2.min.binpack - test60-2021-11-12-novdec-12tb7p.v6-dd.min.binpack - test78-2022-01-to-05-jantomay-16tb7p.v6-dd.min.binpack - test80-2022-06-jun-16tb7p.v6-dd.min.binpack - test80-2022-08-aug-16tb7p.v6-dd.min.binpack - test80-2022-09-sep-16tb7p.v6-dd.min.binpack - test80-2023-01-jan-16tb7p.v6-sk20.min.binpack - test80-2023-02-feb-16tb7p.v6-sk20.min.binpack - test80-2023-03-mar-2tb7p.v6-sk16.min.binpack - test80-2023-04-apr-2tb7p.v6-sk16.min.binpack - test80-2023-05-may-2tb7p.v6.min.binpack # https://github.com/official-stockfish/Stockfish/pull/4782 - test80-2023-06-jun-2tb7p.binpack - test80-2023-07-jul-2tb7p.binpack # https://github.com/official-stockfish/Stockfish/pull/4972 - test80-2023-08-aug-2tb7p.v6.min.binpack - test80-2023-09-sep-2tb7p.binpack - test80-2023-10-oct-2tb7p.binpack # S9 new data: https://github.com/official-stockfish/Stockfish/pull/5056 - test80-2023-11-nov-2tb7p.binpack - test80-2023-12-dec-2tb7p.binpack # S10 new data: https://github.com/official-stockfish/Stockfish/pull/5149 - test80-2024-01-jan-2tb7p.binpack - test80-2024-02-feb-2tb7p.binpack # S11 new data - test80-2024-03-mar-2tb7p.binpack /data/filt-v6-dd/: - test77-dec2021-16tb7p-filter-v6-dd.binpack - test78-juntosep2022-16tb7p-filter-v6-dd.binpack - test79-apr2022-16tb7p-filter-v6-dd.binpack - test79-may2022-16tb7p-filter-v6-dd.binpack - test80-jul2022-16tb7p-filter-v6-dd.binpack - test80-oct2022-16tb7p-filter-v6-dd.binpack - test80-nov2022-16tb7p-filter-v6-dd.binpack num-epochs: 1000 lr: 4.375e-4 gamma: 0.995 start-lambda: 0.8 end-lambda: 0.7 ``` Training data can be found at: https://robotmoon.com/nnue-training-data/ Local elo at 25k nodes per move: nn-epoch899.nnue : 4.6 +/- 1.4 Passed STC: https://tests.stockfishchess.org/tests/view/6645454893ce6da3e93b31ae LLR: 2.95 (-2.94,2.94) <0.00,2.00> Total: 95232 W: 24598 L: 24194 D: 46440 Ptnml(0-2): 294, 11215, 24180, 11647, 280 Passed LTC: https://tests.stockfishchess.org/tests/view/6645522d93ce6da3e93b31df LLR: 2.95 (-2.94,2.94) <0.50,2.50> Total: 320544 W: 81432 L: 80524 D: 158588 Ptnml(0-2): 164, 35659, 87696, 36611, 142 closes https://github.com/official-stockfish/Stockfish/pull/5254 bench 1995552 --- src/evaluate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/evaluate.h b/src/evaluate.h index 6612ec9d..c87be53c 100644 --- a/src/evaluate.h +++ b/src/evaluate.h @@ -35,7 +35,7 @@ constexpr inline int SmallNetThreshold = 1126; // for the build process (profile-build and fishtest) to work. Do not change the // name of the macro or the location where this macro is defined, as it is used // in the Makefile/Fishtest. -#define EvalFileDefaultNameBig "nn-ae6a388e4a1a.nnue" +#define EvalFileDefaultNameBig "nn-c721dfca8cd3.nnue" #define EvalFileDefaultNameSmall "nn-baff1ede1f90.nnue" namespace NNUE { From d92d1f31809afc8aa83cc14fcbd54b95258d09ad Mon Sep 17 00:00:00 2001 From: Linmiao Xu Date: Thu, 16 May 2024 01:48:56 -0400 Subject: [PATCH 030/315] Move smallnet threshold logic into a function Now that the smallnet threshold is no longer a constant, use a function to organize it with other eval code. Passed non-regression STC: https://tests.stockfishchess.org/tests/view/66459fa093ce6da3e93b5ba2 LLR: 2.95 (-2.94,2.94) <-1.75,0.25> Total: 217600 W: 56281 L: 56260 D: 105059 Ptnml(0-2): 756, 23787, 59729, 23736, 792 closes https://github.com/official-stockfish/Stockfish/pull/5255 No functional change --- src/evaluate.cpp | 6 +++++- src/evaluate.h | 3 +-- src/nnue/nnue_misc.cpp | 4 +--- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index 498ec161..09402b8b 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -44,6 +44,10 @@ int Eval::simple_eval(const Position& pos, Color c) { + (pos.non_pawn_material(c) - pos.non_pawn_material(~c)); } +bool Eval::use_smallnet(const Position& pos) { + int simpleEval = simple_eval(pos, pos.side_to_move()); + return std::abs(simpleEval) > 1126 + 6 * pos.count(); +} // Evaluate is the evaluator for the outer world. It returns a static evaluation // of the position from the point of view of the side to move. @@ -55,7 +59,7 @@ Value Eval::evaluate(const Eval::NNUE::Networks& networks, assert(!pos.checkers()); int simpleEval = simple_eval(pos, pos.side_to_move()); - bool smallNet = std::abs(simpleEval) > SmallNetThreshold + 6 * pos.count(); + bool smallNet = use_smallnet(pos); int nnueComplexity; int v; diff --git a/src/evaluate.h b/src/evaluate.h index c87be53c..4b3e91ac 100644 --- a/src/evaluate.h +++ b/src/evaluate.h @@ -29,8 +29,6 @@ class Position; namespace Eval { -constexpr inline int SmallNetThreshold = 1126; - // The default net name MUST follow the format nn-[SHA256 first 12 digits].nnue // for the build process (profile-build and fishtest) to work. Do not change the // name of the macro or the location where this macro is defined, as it is used @@ -46,6 +44,7 @@ struct AccumulatorCaches; std::string trace(Position& pos, const Eval::NNUE::Networks& networks); int simple_eval(const Position& pos, Color c); +bool use_smallnet(const Position& pos); Value evaluate(const NNUE::Networks& networks, const Position& pos, Eval::NNUE::AccumulatorCaches& caches, diff --git a/src/nnue/nnue_misc.cpp b/src/nnue/nnue_misc.cpp index 8a777912..a13c717c 100644 --- a/src/nnue/nnue_misc.cpp +++ b/src/nnue/nnue_misc.cpp @@ -45,9 +45,7 @@ constexpr std::string_view PieceToChar(" PNBRQK pnbrqk"); void hint_common_parent_position(const Position& pos, const Networks& networks, AccumulatorCaches& caches) { - - int simpleEvalAbs = std::abs(simple_eval(pos, pos.side_to_move())); - if (simpleEvalAbs > Eval::SmallNetThreshold + 6 * pos.count()) + if (Eval::use_smallnet(pos)) networks.small.hint_common_access(pos, &caches.small); else networks.big.hint_common_access(pos, &caches.big); From f5e15441b8e3b8087024d309313e8a4d6c48bba7 Mon Sep 17 00:00:00 2001 From: FauziAkram Date: Sat, 18 May 2024 01:22:41 +0300 Subject: [PATCH 031/315] Early Exit in Bitboards::sliding_attack() he original code checks for occupancy within the loop condition. By moving this check inside the loop and adding an early exit condition, we can avoid unnecessary iterations if a blocking piece is encountered. Passed stc: LLR: 2.95 (-2.94,2.94) <0.00,2.00> Total: 127200 W: 33129 L: 32700 D: 61371 Ptnml(0-2): 424, 13243, 35826, 13694, 413 https://tests.stockfishchess.org/tests/view/664646006dcff0d1d6b05bca closes https://github.com/official-stockfish/Stockfish/pull/5256 No functional change --- src/bitboard.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/bitboard.cpp b/src/bitboard.cpp index 32c626d4..c842ca12 100644 --- a/src/bitboard.cpp +++ b/src/bitboard.cpp @@ -124,8 +124,14 @@ Bitboard sliding_attack(PieceType pt, Square sq, Bitboard occupied) { for (Direction d : (pt == ROOK ? RookDirections : BishopDirections)) { Square s = sq; - while (safe_destination(s, d) && !(occupied & s)) + while (safe_destination(s, d)) + { attacks |= (s += d); + if (occupied & s) + { + break; + } + } } return attacks; From 285f1d2a663fb111f7124272403923eab4251982 Mon Sep 17 00:00:00 2001 From: Shawn Xu Date: Wed, 15 May 2024 22:26:15 -0700 Subject: [PATCH 032/315] Tweak NMP Formula Passed STC: LLR: 2.99 (-2.94,2.94) <0.00,2.00> Total: 241728 W: 62440 L: 61811 D: 117477 Ptnml(0-2): 914, 28467, 61458, 29126, 899 https://tests.stockfishchess.org/tests/live_elo/6645992993ce6da3e93b5b99 Passed LTC: LLR: 2.96 (-2.94,2.94) <0.50,2.50> Total: 167850 W: 42620 L: 42030 D: 83200 Ptnml(0-2): 82, 18412, 46354, 18988, 89 https://tests.stockfishchess.org/tests/live_elo/6647c5726dcff0d1d6b05dd3 closes https://github.com/official-stockfish/Stockfish/pull/5257 Bench: 1636018 --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index bdcecd1c..06c6e198 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -787,7 +787,7 @@ Value Search::Worker::search( assert(eval - beta >= 0); // Null move dynamic reduction based on depth and eval - Depth R = std::min(int(eval - beta) / 144, 6) + depth / 3 + 4; + Depth R = std::min(int(eval - beta) / 144, 6) + depth / 3 + 5; ss->currentMove = Move::null(); ss->continuationHistory = &thisThread->continuationHistory[0][0][NO_PIECE][0]; From 99dfc63e0321cb8544ce5455993df00a6c817ba3 Mon Sep 17 00:00:00 2001 From: Linmiao Xu Date: Fri, 17 May 2024 19:27:20 -0400 Subject: [PATCH 033/315] Use one nnue pawn count multiplier Switch to the value used by the main net. Passed non-regression STC: https://tests.stockfishchess.org/tests/view/6647e8096dcff0d1d6b05e96 LLR: 2.93 (-2.94,2.94) <-1.75,0.25> Total: 51040 W: 13249 L: 13044 D: 24747 Ptnml(0-2): 139, 6029, 13016, 6160, 176 Passed non-regression LTC: https://tests.stockfishchess.org/tests/view/6647f4a46dcff0d1d6b05eea LLR: 2.95 (-2.94,2.94) <-1.75,0.25> Total: 20460 W: 5195 L: 4972 D: 10293 Ptnml(0-2): 8, 2178, 5637, 2397, 10 https://github.com/official-stockfish/Stockfish/pull/5258 bench 1887462 --- src/evaluate.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index 09402b8b..abb04fcc 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -72,13 +72,13 @@ Value Eval::evaluate(const Eval::NNUE::Networks& networks, smallNet = false; } - const auto adjustEval = [&](int pawnCountMul, int shufflingConstant) { + const auto adjustEval = [&](int shufflingConstant) { // Blend optimism and eval with nnue complexity and material imbalance optimism += optimism * (nnueComplexity + std::abs(simpleEval - nnue)) / 584; nnue -= nnue * (nnueComplexity * 5 / 3) / 32395; int npm = pos.non_pawn_material() / 64; - v = (nnue * (npm + 943 + pawnCountMul * pos.count()) + optimism * (npm + 140)) / 1058; + v = (nnue * (npm + 943 + 11 * pos.count()) + optimism * (npm + 140)) / 1058; // Damp down the evaluation linearly when shuffling int shuffling = pos.rule50_count(); @@ -86,9 +86,9 @@ Value Eval::evaluate(const Eval::NNUE::Networks& networks, }; if (!smallNet) - adjustEval(11, 178); + adjustEval(178); else - adjustEval(9, 206); + adjustEval(206); // Guarantee evaluation does not hit the tablebase range v = std::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1); From 4edd1a389e4146a610098a841841f37f58980213 Mon Sep 17 00:00:00 2001 From: Shawn Xu Date: Fri, 17 May 2024 17:45:09 -0700 Subject: [PATCH 034/315] Simplify Away Quadruple Extensions serendipitous gainer Passed LTC: LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 95472 W: 24176 L: 24031 D: 47265 Ptnml(0-2): 52, 10533, 26414, 10692, 45 https://tests.stockfishchess.org/tests/live_elo/6647fa596dcff0d1d6b05efa Passed VVLTC 70+7 th 7: LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 6772 W: 1793 L: 1583 D: 3396 Ptnml(0-2): 0, 502, 2172, 712, 0 https://tests.stockfishchess.org/tests/live_elo/6648277a6dcff0d1d6b05ffb Passed VVLTC 70+7 th 7 (2x): https://tests.stockfishchess.org/tests/view/66484c896dcff0d1d6b0619d LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 5424 W: 1469 L: 1254 D: 2701 Ptnml(0-2): 0, 394, 1710, 607, 1 closes https://github.com/official-stockfish/Stockfish/pull/5259 Bench: 1441794 --- src/search.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 06c6e198..2b95043f 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1053,11 +1053,9 @@ moves_loop: // When in check, search starts here int doubleMargin = 285 * PvNode - 228 * !ttCapture; int tripleMargin = 121 + 238 * PvNode - 259 * !ttCapture + 117 * (ss->ttPv || !ttCapture); - int quadMargin = 471 + 343 * PvNode - 281 * !ttCapture + 217 * ss->ttPv; extension = 1 + (value < singularBeta - doubleMargin) - + (value < singularBeta - tripleMargin) - + (value < singularBeta - quadMargin); + + (value < singularBeta - tripleMargin); depth += ((!PvNode) && (depth < 14)); } From 2694fce928e5eec867d56d853b416c9f389c284d Mon Sep 17 00:00:00 2001 From: Linmiao Xu Date: Fri, 17 May 2024 21:38:38 -0400 Subject: [PATCH 035/315] Simplify away adjustEval lambda Now that only the shuffling constant differs between nets, a lambda for adjusting eval is no longer needed. Passed non-regression STC: https://tests.stockfishchess.org/tests/view/664806ca6dcff0d1d6b05f34 LLR: 2.99 (-2.94,2.94) <-1.75,0.25> Total: 31552 W: 8175 L: 7959 D: 15418 Ptnml(0-2): 76, 3180, 9065, 3362, 93 closes https://github.com/official-stockfish/Stockfish/pull/5260 No functional change --- src/evaluate.cpp | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index abb04fcc..e5ebd45a 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -72,23 +72,15 @@ Value Eval::evaluate(const Eval::NNUE::Networks& networks, smallNet = false; } - const auto adjustEval = [&](int shufflingConstant) { - // Blend optimism and eval with nnue complexity and material imbalance - optimism += optimism * (nnueComplexity + std::abs(simpleEval - nnue)) / 584; - nnue -= nnue * (nnueComplexity * 5 / 3) / 32395; + // Blend optimism and eval with nnue complexity and material imbalance + optimism += optimism * (nnueComplexity + std::abs(simpleEval - nnue)) / 584; + nnue -= nnue * (nnueComplexity * 5 / 3) / 32395; - int npm = pos.non_pawn_material() / 64; - v = (nnue * (npm + 943 + 11 * pos.count()) + optimism * (npm + 140)) / 1058; + int npm = pos.non_pawn_material() / 64; + v = (nnue * (npm + 943 + 11 * pos.count()) + optimism * (npm + 140)) / 1058; - // Damp down the evaluation linearly when shuffling - int shuffling = pos.rule50_count(); - v = v * (shufflingConstant - shuffling) / 207; - }; - - if (!smallNet) - adjustEval(178); - else - adjustEval(206); + // Damp down the evaluation linearly when shuffling + v = v * ((smallNet ? 206 : 178) - pos.rule50_count()) / 207; // Guarantee evaluation does not hit the tablebase range v = std::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1); From 99f1bacfd6864afca86ae74f33232b9cdfb3828c Mon Sep 17 00:00:00 2001 From: Muzhen Gaming <61100393+XInTheDark@users.noreply.github.com> Date: Sat, 18 May 2024 22:15:41 +0800 Subject: [PATCH 036/315] VVLTC search tune Tuned with 85k games at VVLTC. VVLTC 1st sprt: https://tests.stockfishchess.org/tests/view/6648b836308cceea45533ad7 LLR: 2.94 (-2.94,2.94) <0.00,2.00> Total: 14880 W: 3890 L: 3652 D: 7338 Ptnml(0-2): 0, 1255, 4694, 1489, 2 VVLTC 2nd sprt: https://tests.stockfishchess.org/tests/view/6648c34f308cceea45533b4f LLR: 2.95 (-2.94,2.94) <0.50,2.50> Total: 24984 W: 6502 L: 6235 D: 12247 Ptnml(0-2): 1, 2178, 7867, 2445, 1 closes https://github.com/official-stockfish/Stockfish/pull/5264 Bench: 1198142 --- src/search.cpp | 84 +++++++++++++++++++++++++------------------------- 1 file changed, 42 insertions(+), 42 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 2b95043f..54990ce6 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -59,9 +59,9 @@ static constexpr double EvalLevel[10] = {0.981, 0.956, 0.895, 0.949, 0.913, // Futility margin Value futility_margin(Depth d, bool noTtCutNode, bool improving, bool oppWorsening) { - Value futilityMult = 131 - 48 * noTtCutNode; - Value improvingDeduction = 2 * improving * futilityMult; - Value worseningDeduction = 330 * oppWorsening * futilityMult / 1024; + Value futilityMult = 127 - 48 * noTtCutNode; + Value improvingDeduction = 65 * improving * futilityMult / 32; + Value worseningDeduction = 334 * oppWorsening * futilityMult / 1024; return futilityMult * d - improvingDeduction - worseningDeduction; } @@ -73,15 +73,15 @@ constexpr int futility_move_count(bool improving, Depth depth) { // Add correctionHistory value to raw staticEval and guarantee evaluation does not hit the tablebase range Value to_corrected_static_eval(Value v, const Worker& w, const Position& pos) { auto cv = w.correctionHistory[pos.side_to_move()][pawn_structure_index(pos)]; - v += cv * std::abs(cv) / 7179; + v += cv * std::abs(cv) / 6047; return std::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1); } // History and stats update bonus, based on depth -int stat_bonus(Depth d) { return std::clamp(200 * d - 280, 16, 1495); } +int stat_bonus(Depth d) { return std::clamp(187 * d - 288, 17, 1548); } // History and stats update malus, based on depth -int stat_malus(Depth d) { return (d < 4 ? 586 * d - 284 : 1639); } +int stat_malus(Depth d) { return (d < 4 ? 630 * d - 281 : 1741); } // Add a small random component to draw evaluations to avoid 3-fold blindness Value value_draw(size_t nodes) { return VALUE_DRAW - 1 + Value(nodes & 0x2); } @@ -311,12 +311,12 @@ void Search::Worker::iterative_deepening() { // Reset aspiration window starting size Value avg = rootMoves[pvIdx].averageScore; - delta = 10 + avg * avg / 9474; + delta = 10 + avg * avg / 9828; alpha = std::max(avg - delta, -VALUE_INFINITE); beta = std::min(avg + delta, VALUE_INFINITE); // Adjust optimism based on root move's averageScore (~4 Elo) - optimism[us] = 117 * avg / (std::abs(avg) + 88); + optimism[us] = 116 * avg / (std::abs(avg) + 84); optimism[~us] = -optimism[us]; // Start with a small aspiration window and, in the case of a fail @@ -503,10 +503,10 @@ void Search::Worker::clear() { for (StatsType c : {NoCaptures, Captures}) for (auto& to : continuationHistory[inCheck][c]) for (auto& h : to) - h->fill(-62); + h->fill(-60); for (size_t i = 1; i < reductions.size(); ++i) - reductions[i] = int((21.19 + std::log(size_t(options["Threads"])) / 2) * std::log(i)); + reductions[i] = int((21.69 + std::log(size_t(options["Threads"])) / 2) * std::log(i)); refreshTable.clear(networks); } @@ -739,7 +739,7 @@ Value Search::Worker::search( // Use static evaluation difference to improve quiet move ordering (~9 Elo) if (((ss - 1)->currentMove).is_ok() && !(ss - 1)->inCheck && !priorCapture) { - int bonus = std::clamp(-12 * int((ss - 1)->staticEval + ss->staticEval), -1749, 1584); + int bonus = std::clamp(-11 * int((ss - 1)->staticEval + ss->staticEval), -1729, 1517); bonus = bonus > 0 ? 2 * bonus : bonus / 2; thisThread->mainHistory[~us][((ss - 1)->currentMove).from_to()] << bonus; if (type_of(pos.piece_on(prevSq)) != PAWN && ((ss - 1)->currentMove).type_of() != PROMOTION) @@ -762,7 +762,7 @@ Value Search::Worker::search( // If eval is really low check with qsearch if it can exceed alpha, if it can't, // return a fail low. // Adjust razor margin according to cutoffCnt. (~1 Elo) - if (eval < alpha - 473 - (308 - 138 * ((ss + 1)->cutoffCnt > 3)) * depth * depth) + if (eval < alpha - 474 - (326 - 139 * ((ss + 1)->cutoffCnt > 3)) * depth * depth) { value = qsearch(pos, ss, alpha - 1, alpha); if (value < alpha) @@ -773,21 +773,21 @@ Value Search::Worker::search( // The depth condition is important for mate finding. if (!ss->ttPv && depth < 11 && eval - futility_margin(depth, cutNode && !ss->ttHit, improving, opponentWorsening) - - (ss - 1)->statScore / 258 + - (ss - 1)->statScore / 252 >= beta && eval >= beta && eval < VALUE_TB_WIN_IN_MAX_PLY && (!ttMove || ttCapture)) return beta > VALUE_TB_LOSS_IN_MAX_PLY ? (eval + beta) / 2 : eval; // Step 9. Null move search with verification search (~35 Elo) - if (!PvNode && (ss - 1)->currentMove != Move::null() && (ss - 1)->statScore < 16079 - && eval >= beta && ss->staticEval >= beta - 21 * depth + 324 && !excludedMove + if (!PvNode && (ss - 1)->currentMove != Move::null() && (ss - 1)->statScore < 15246 + && eval >= beta && ss->staticEval >= beta - 21 * depth + 366 && !excludedMove && pos.non_pawn_material(us) && ss->ply >= thisThread->nmpMinPly && beta > VALUE_TB_LOSS_IN_MAX_PLY) { assert(eval - beta >= 0); // Null move dynamic reduction based on depth and eval - Depth R = std::min(int(eval - beta) / 144, 6) + depth / 3 + 5; + Depth R = std::min(int(eval - beta) / 152, 6) + depth / 3 + 5; ss->currentMove = Move::null(); ss->continuationHistory = &thisThread->continuationHistory[0][0][NO_PIECE][0]; @@ -835,7 +835,7 @@ Value Search::Worker::search( // Step 11. ProbCut (~10 Elo) // If we have a good enough capture (or queen promotion) and a reduced search returns a value // much above beta, we can (almost) safely prune the previous move. - probCutBeta = beta + 177 - 65 * improving; + probCutBeta = beta + 176 - 65 * improving; if ( !PvNode && depth > 3 && std::abs(beta) < VALUE_TB_WIN_IN_MAX_PLY @@ -891,7 +891,7 @@ Value Search::Worker::search( moves_loop: // When in check, search starts here // Step 12. A small Probcut idea, when we are in check (~4 Elo) - probCutBeta = beta + 428; + probCutBeta = beta + 440; if (ss->inCheck && !PvNode && ttCapture && (tte->bound() & BOUND_LOWER) && tte->depth() >= depth - 4 && ttValue >= probCutBeta && std::abs(ttValue) < VALUE_TB_WIN_IN_MAX_PLY && std::abs(beta) < VALUE_TB_WIN_IN_MAX_PLY) @@ -975,15 +975,15 @@ moves_loop: // When in check, search starts here // Futility pruning for captures (~2 Elo) if (!givesCheck && lmrDepth < 7 && !ss->inCheck) { - Value futilityValue = ss->staticEval + 305 + 272 * lmrDepth + Value futilityValue = ss->staticEval + 276 + 256 * lmrDepth + PieceValue[capturedPiece] + captHist / 7; if (futilityValue <= alpha) continue; } // SEE based pruning for captures and checks (~11 Elo) - int seeHist = std::clamp(captHist / 32, -185 * depth, 182 * depth); - if (!pos.see_ge(move, -176 * depth - seeHist)) + int seeHist = std::clamp(captHist / 32, -177 * depth, 175 * depth); + if (!pos.see_ge(move, -183 * depth - seeHist)) continue; } else @@ -994,18 +994,18 @@ moves_loop: // When in check, search starts here + thisThread->pawnHistory[pawn_structure_index(pos)][movedPiece][move.to_sq()]; // Continuation history based pruning (~2 Elo) - if (lmrDepth < 6 && history < -4360 * depth) + if (lmrDepth < 6 && history < -4076 * depth) continue; history += 2 * thisThread->mainHistory[us][move.from_to()]; - lmrDepth += history / 4507; + lmrDepth += history / 4401; Value futilityValue = - ss->staticEval + (bestValue < ss->staticEval - 54 ? 142 : 55) + 132 * lmrDepth; + ss->staticEval + (bestValue < ss->staticEval - 53 ? 151 : 57) + 140 * lmrDepth; // Futility pruning: parent node (~13 Elo) - if (!ss->inCheck && lmrDepth < 11 && futilityValue <= alpha) + if (!ss->inCheck && lmrDepth < 10 && futilityValue <= alpha) { if (bestValue <= futilityValue && std::abs(bestValue) < VALUE_TB_WIN_IN_MAX_PLY && futilityValue < VALUE_TB_WIN_IN_MAX_PLY) @@ -1016,7 +1016,7 @@ moves_loop: // When in check, search starts here lmrDepth = std::max(lmrDepth, 0); // Prune moves with negative SEE (~4 Elo) - if (!pos.see_ge(move, -27 * lmrDepth * lmrDepth)) + if (!pos.see_ge(move, -26 * lmrDepth * lmrDepth)) continue; } } @@ -1036,11 +1036,11 @@ moves_loop: // When in check, search starts here // so changing them requires tests at these types of time controls. // Recursive singular search is avoided. if (!rootNode && move == ttMove && !excludedMove - && depth >= 4 - (thisThread->completedDepth > 33) + ss->ttPv + && depth >= 4 - (thisThread->completedDepth > 35) + ss->ttPv && std::abs(ttValue) < VALUE_TB_WIN_IN_MAX_PLY && (tte->bound() & BOUND_LOWER) && tte->depth() >= depth - 3) { - Value singularBeta = ttValue - (59 + 49 * (ss->ttPv && !PvNode)) * depth / 64; + Value singularBeta = ttValue - (57 + 50 * (ss->ttPv && !PvNode)) * depth / 64; Depth singularDepth = newDepth / 2; ss->excludedMove = move; @@ -1050,14 +1050,14 @@ moves_loop: // When in check, search starts here if (value < singularBeta) { - int doubleMargin = 285 * PvNode - 228 * !ttCapture; + int doubleMargin = 298 * PvNode - 209 * !ttCapture; int tripleMargin = - 121 + 238 * PvNode - 259 * !ttCapture + 117 * (ss->ttPv || !ttCapture); + 117 + 252 * PvNode - 270 * !ttCapture + 111 * (ss->ttPv || !ttCapture); extension = 1 + (value < singularBeta - doubleMargin) + (value < singularBeta - tripleMargin); - depth += ((!PvNode) && (depth < 14)); + depth += ((!PvNode) && (depth < 15)); } // Multi-cut pruning @@ -1092,7 +1092,7 @@ moves_loop: // When in check, search starts here else if (PvNode && move == ttMove && move.to_sq() == prevSq && thisThread->captureHistory[movedPiece][move.to_sq()] [type_of(pos.piece_on(move.to_sq()))] - > 4041) + > 3748) extension = 1; } @@ -1143,10 +1143,10 @@ moves_loop: // When in check, search starts here ss->statScore = 2 * thisThread->mainHistory[us][move.from_to()] + (*contHist[0])[movedPiece][move.to_sq()] - + (*contHist[1])[movedPiece][move.to_sq()] - 5313; + + (*contHist[1])[movedPiece][move.to_sq()] - 5266; // Decrease/increase reduction for moves with a good/bad history (~8 Elo) - r -= ss->statScore / (16145 - std::min(depth, 15) * 102); + r -= ss->statScore / (14519 - std::min(depth, 15) * 103); // Step 17. Late moves reduction / extension (LMR, ~117 Elo) if (depth >= 2 && moveCount > 1 + rootNode) @@ -1165,7 +1165,7 @@ moves_loop: // When in check, search starts here { // Adjust full-depth search based on LMR results - if the result // was good enough search deeper, if it was bad enough search shallower. - const bool doDeeperSearch = value > (bestValue + 41 + 2 * newDepth); // (~1 Elo) + const bool doDeeperSearch = value > (bestValue + 40 + 2 * newDepth); // (~1 Elo) const bool doShallowerSearch = value < bestValue + newDepth; // (~2 Elo) newDepth += doDeeperSearch - doShallowerSearch; @@ -1326,9 +1326,9 @@ moves_loop: // When in check, search starts here // Bonus for prior countermove that caused the fail low else if (!priorCapture && prevSq != SQ_NONE) { - int bonus = (depth > 4) + (depth > 5) + (PvNode || cutNode) + ((ss - 1)->statScore < -14323) - + ((ss - 1)->moveCount > 10) + (!ss->inCheck && bestValue <= ss->staticEval - 120) - + (!(ss - 1)->inCheck && bestValue <= -(ss - 1)->staticEval - 76); + int bonus = (depth > 4) + (depth > 5) + (PvNode || cutNode) + ((ss - 1)->statScore < -13241) + + ((ss - 1)->moveCount > 10) + (!ss->inCheck && bestValue <= ss->staticEval - 127) + + (!(ss - 1)->inCheck && bestValue <= -(ss - 1)->staticEval - 74); update_continuation_histories(ss - 1, pos.piece_on(prevSq), prevSq, stat_bonus(depth) * bonus); thisThread->mainHistory[~us][((ss - 1)->currentMove).from_to()] @@ -1494,7 +1494,7 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta, if (bestValue > alpha) alpha = bestValue; - futilityBase = ss->staticEval + 259; + futilityBase = ss->staticEval + 264; } const PieceToHistory* contHist[] = {(ss - 1)->continuationHistory, @@ -1566,11 +1566,11 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta, + (*contHist[1])[pos.moved_piece(move)][move.to_sq()] + thisThread->pawnHistory[pawn_structure_index(pos)][pos.moved_piece(move)] [move.to_sq()] - <= 4057) + <= 4348) continue; // Do not search moves with bad enough SEE values (~5 Elo) - if (!pos.see_ge(move, -68)) + if (!pos.see_ge(move, -63)) continue; } @@ -1636,7 +1636,7 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta, Depth Search::Worker::reduction(bool i, Depth d, int mn, int delta) { int reductionScale = reductions[d] * reductions[mn]; - return (reductionScale + 1284 - delta * 755 / rootDelta) / 1024 + (!i && reductionScale > 1133); + return (reductionScale + 1147 - delta * 755 / rootDelta) / 1024 + (!i && reductionScale > 1125); } // elapsed() returns the time elapsed since the search started. If the From 2d3258162387bf38551962bf2c9dd1d47e72b4dd Mon Sep 17 00:00:00 2001 From: Viren6 <94880762+Viren6@users.noreply.github.com> Date: Sun, 19 May 2024 01:40:29 +0100 Subject: [PATCH 037/315] Revert "Simplify Away Quadruple Extensions" This reverts commit 4edd1a3 The unusual result of (combined) +12.0 +- 3.7 in the 2 VVLTC simplification SPRTs ran was the result of base having only 64MB of hash instead of 512MB (Asymmetric hash). Vizvezdenec was the one to notice this. closes https://github.com/official-stockfish/Stockfish/pull/5265 bench 1404295 Co-Authored-By: Michael Chaly <26898827+Vizvezdenec@users.noreply.github.com> --- src/search.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index 54990ce6..cbd454ef 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1053,9 +1053,11 @@ moves_loop: // When in check, search starts here int doubleMargin = 298 * PvNode - 209 * !ttCapture; int tripleMargin = 117 + 252 * PvNode - 270 * !ttCapture + 111 * (ss->ttPv || !ttCapture); + int quadMargin = 471 + 343 * PvNode - 281 * !ttCapture + 217 * ss->ttPv; extension = 1 + (value < singularBeta - doubleMargin) - + (value < singularBeta - tripleMargin); + + (value < singularBeta - tripleMargin) + + (value < singularBeta - quadMargin); depth += ((!PvNode) && (depth < 15)); } From 27eb49a2211c90650ef64d5102e6e36ca5e69af0 Mon Sep 17 00:00:00 2001 From: cj5716 <125858804+cj5716@users.noreply.github.com> Date: Fri, 17 May 2024 18:05:12 +0800 Subject: [PATCH 038/315] Simplify ClippedReLU Removes some max calls Some speedup stats, courtesy of @AndyGrant (albeit measured in an alternate implementation) Dev 749240 nps Base 748495 nps Gain 0.100% 289936 games STC: LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 203040 W: 52213 L: 52179 D: 98648 Ptnml(0-2): 480, 20722, 59139, 20642, 537 https://tests.stockfishchess.org/tests/view/664805fe6dcff0d1d6b05f2c closes #5261 No functional change --- src/nnue/layers/clipped_relu.h | 48 ++++++++++++++++------------------ src/nnue/nnue_misc.cpp | 9 +++---- src/tune.cpp | 6 ++--- src/uci.cpp | 6 ++--- 4 files changed, 30 insertions(+), 39 deletions(-) diff --git a/src/nnue/layers/clipped_relu.h b/src/nnue/layers/clipped_relu.h index 813234c5..2ee378ad 100644 --- a/src/nnue/layers/clipped_relu.h +++ b/src/nnue/layers/clipped_relu.h @@ -65,41 +65,37 @@ class ClippedReLU { if constexpr (InputDimensions % SimdWidth == 0) { constexpr IndexType NumChunks = InputDimensions / SimdWidth; - const __m256i Zero = _mm256_setzero_si256(); const __m256i Offsets = _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0); const auto in = reinterpret_cast(input); const auto out = reinterpret_cast<__m256i*>(output); for (IndexType i = 0; i < NumChunks; ++i) { const __m256i words0 = - _mm256_srai_epi16(_mm256_packs_epi32(_mm256_load_si256(&in[i * 4 + 0]), - _mm256_load_si256(&in[i * 4 + 1])), + _mm256_srli_epi16(_mm256_packus_epi32(_mm256_load_si256(&in[i * 4 + 0]), + _mm256_load_si256(&in[i * 4 + 1])), WeightScaleBits); const __m256i words1 = - _mm256_srai_epi16(_mm256_packs_epi32(_mm256_load_si256(&in[i * 4 + 2]), - _mm256_load_si256(&in[i * 4 + 3])), + _mm256_srli_epi16(_mm256_packus_epi32(_mm256_load_si256(&in[i * 4 + 2]), + _mm256_load_si256(&in[i * 4 + 3])), WeightScaleBits); - _mm256_store_si256( - &out[i], _mm256_permutevar8x32_epi32( - _mm256_max_epi8(_mm256_packs_epi16(words0, words1), Zero), Offsets)); + _mm256_store_si256(&out[i], _mm256_permutevar8x32_epi32( + _mm256_packs_epi16(words0, words1), Offsets)); } } else { constexpr IndexType NumChunks = InputDimensions / (SimdWidth / 2); - const __m128i Zero = _mm_setzero_si128(); const auto in = reinterpret_cast(input); const auto out = reinterpret_cast<__m128i*>(output); for (IndexType i = 0; i < NumChunks; ++i) { - const __m128i words0 = _mm_srai_epi16( - _mm_packs_epi32(_mm_load_si128(&in[i * 4 + 0]), _mm_load_si128(&in[i * 4 + 1])), + const __m128i words0 = _mm_srli_epi16( + _mm_packus_epi32(_mm_load_si128(&in[i * 4 + 0]), _mm_load_si128(&in[i * 4 + 1])), WeightScaleBits); - const __m128i words1 = _mm_srai_epi16( - _mm_packs_epi32(_mm_load_si128(&in[i * 4 + 2]), _mm_load_si128(&in[i * 4 + 3])), + const __m128i words1 = _mm_srli_epi16( + _mm_packus_epi32(_mm_load_si128(&in[i * 4 + 2]), _mm_load_si128(&in[i * 4 + 3])), WeightScaleBits); - const __m128i packedbytes = _mm_packs_epi16(words0, words1); - _mm_store_si128(&out[i], _mm_max_epi8(packedbytes, Zero)); + _mm_store_si128(&out[i], _mm_packs_epi16(words0, words1)); } } constexpr IndexType Start = InputDimensions % SimdWidth == 0 @@ -109,9 +105,7 @@ class ClippedReLU { #elif defined(USE_SSE2) constexpr IndexType NumChunks = InputDimensions / SimdWidth; - #ifdef USE_SSE41 - const __m128i Zero = _mm_setzero_si128(); - #else + #ifndef USE_SSE41 const __m128i k0x80s = _mm_set1_epi8(-128); #endif @@ -119,6 +113,15 @@ class ClippedReLU { const auto out = reinterpret_cast<__m128i*>(output); for (IndexType i = 0; i < NumChunks; ++i) { + #if defined(USE_SSE41) + const __m128i words0 = _mm_srli_epi16( + _mm_packus_epi32(_mm_load_si128(&in[i * 4 + 0]), _mm_load_si128(&in[i * 4 + 1])), + WeightScaleBits); + const __m128i words1 = _mm_srli_epi16( + _mm_packus_epi32(_mm_load_si128(&in[i * 4 + 2]), _mm_load_si128(&in[i * 4 + 3])), + WeightScaleBits); + _mm_store_si128(&out[i], _mm_packs_epi16(words0, words1)); + #else const __m128i words0 = _mm_srai_epi16( _mm_packs_epi32(_mm_load_si128(&in[i * 4 + 0]), _mm_load_si128(&in[i * 4 + 1])), WeightScaleBits); @@ -126,15 +129,8 @@ class ClippedReLU { _mm_packs_epi32(_mm_load_si128(&in[i * 4 + 2]), _mm_load_si128(&in[i * 4 + 3])), WeightScaleBits); const __m128i packedbytes = _mm_packs_epi16(words0, words1); - _mm_store_si128(&out[i], - - #ifdef USE_SSE41 - _mm_max_epi8(packedbytes, Zero) - #else - _mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s) + _mm_store_si128(&out[i], _mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s)); #endif - - ); } constexpr IndexType Start = NumChunks * SimdWidth; diff --git a/src/nnue/nnue_misc.cpp b/src/nnue/nnue_misc.cpp index a13c717c..b54bbaba 100644 --- a/src/nnue/nnue_misc.cpp +++ b/src/nnue/nnue_misc.cpp @@ -178,14 +178,11 @@ trace(Position& pos, const Eval::NNUE::Networks& networks, Eval::NNUE::Accumulat ss << "| " << bucket << " "; ss << " | "; format_cp_aligned_dot(t.psqt[bucket], ss, pos); - ss << " " - << " | "; + ss << " " << " | "; format_cp_aligned_dot(t.positional[bucket], ss, pos); - ss << " " - << " | "; + ss << " " << " | "; format_cp_aligned_dot(t.psqt[bucket] + t.positional[bucket], ss, pos); - ss << " " - << " |"; + ss << " " << " |"; if (bucket == t.correctBucket) ss << " <-- this bucket is used"; ss << '\n'; diff --git a/src/tune.cpp b/src/tune.cpp index 3e5ebe5e..84f59524 100644 --- a/src/tune.cpp +++ b/src/tune.cpp @@ -59,8 +59,7 @@ void make_option(OptionsMap* options, const string& n, int v, const SetRange& r) // Print formatted parameters, ready to be copy-pasted in Fishtest std::cout << n << "," << v << "," << r(v).first << "," << r(v).second << "," - << (r(v).second - r(v).first) / 20.0 << "," - << "0.0020" << std::endl; + << (r(v).second - r(v).first) / 20.0 << "," << "0.0020" << std::endl; } } @@ -118,7 +117,6 @@ void Tune::Entry::read_option() { namespace Stockfish { -void Tune::read_results() { /* ...insert your values here... */ -} +void Tune::read_results() { /* ...insert your values here... */ } } // namespace Stockfish diff --git a/src/uci.cpp b/src/uci.cpp index cb686a02..cb9d7b08 100644 --- a/src/uci.cpp +++ b/src/uci.cpp @@ -286,9 +286,9 @@ void UCIEngine::bench(std::istream& args) { dbg_print(); - std::cerr << "\n===========================" - << "\nTotal time (ms) : " << elapsed << "\nNodes searched : " << nodes - << "\nNodes/second : " << 1000 * nodes / elapsed << std::endl; + std::cerr << "\n===========================" << "\nTotal time (ms) : " << elapsed + << "\nNodes searched : " << nodes << "\nNodes/second : " << 1000 * nodes / elapsed + << std::endl; // reset callback, to not capture a dangling reference to nodesSearched engine.set_on_update_full([&](const auto& i) { on_update_full(i, options["UCI_ShowWDL"]); }); From a3bb7e626d1489bbbcc16014b16065849ec786b5 Mon Sep 17 00:00:00 2001 From: Stefan Geschwentner Date: Sun, 19 May 2024 10:12:05 +0200 Subject: [PATCH 039/315] Tweak continuation history bonus dependent on ply. This patch is based on following tuning https://tests.stockfishchess.org/tests/view/6648b2eb308cceea45533abe by only using the tuned factors for the continuation history. Passed STC: LLR: 2.94 (-2.94,2.94) <0.00,2.00> Total: 99904 W: 25865 L: 25457 D: 48582 Ptnml(0-2): 281, 11705, 25578, 12101, 287 https://tests.stockfishchess.org/tests/view/6648c136308cceea45533af8 Passed LTC: LLR: 2.96 (-2.94,2.94) <0.50,2.50> Total: 36402 W: 9362 L: 9039 D: 18001 Ptnml(0-2): 20, 3952, 9951, 4241, 37 https://tests.stockfishchess.org/tests/view/6648ee3cb8fa20e74c39f3fd closes https://github.com/official-stockfish/Stockfish/pull/5267 Bench: 1917762 --- src/search.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/search.cpp b/src/search.cpp index cbd454ef..5b9c9bb0 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1779,6 +1779,8 @@ void update_all_stats(const Position& pos, // by moves at ply -1, -2, -3, -4, and -6 with current move. void update_continuation_histories(Stack* ss, Piece pc, Square to, int bonus) { + bonus = bonus * (112 * ss->ply + 136) / (159 * ss->ply + 124); + for (int i : {1, 2, 3, 4, 6}) { // Only update the first 2 continuation histories if we are in check From 4a66a7c9caeca70ea8cd4527de7ec1e839b6cf46 Mon Sep 17 00:00:00 2001 From: Michael Chaly Date: Sun, 19 May 2024 18:48:43 +0300 Subject: [PATCH 040/315] Do more aggressive pawn history updates Tweak of recent patch that made pawn history to update for move that caused a fail low - and setting up default value of it to -900. This patch makes it more aggressive - twice bigger updates and default value -1100. Passed STC: https://tests.stockfishchess.org/tests/view/6648c5d4308cceea45533b5d LLR: 2.94 (-2.94,2.94) <0.00,2.00> Total: 235200 W: 61090 L: 60476 D: 113634 Ptnml(0-2): 763, 27952, 59651, 28376, 858 Passed LTC: https://tests.stockfishchess.org/tests/view/664a1008ae57c1758ac5b523 LLR: 2.94 (-2.94,2.94) <0.50,2.50> Total: 20076 W: 5193 L: 4908 D: 9975 Ptnml(0-2): 7, 2105, 5534, 2380, 12 closes https://github.com/official-stockfish/Stockfish/pull/5268 Bench: 1590474 --- src/search.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 5b9c9bb0..2618b984 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -496,7 +496,7 @@ void Search::Worker::clear() { counterMoves.fill(Move::none()); mainHistory.fill(0); captureHistory.fill(0); - pawnHistory.fill(-900); + pawnHistory.fill(-1100); correctionHistory.fill(0); for (bool inCheck : {false, true}) @@ -1339,7 +1339,7 @@ moves_loop: // When in check, search starts here if (type_of(pos.piece_on(prevSq)) != PAWN && ((ss - 1)->currentMove).type_of() != PROMOTION) thisThread->pawnHistory[pawn_structure_index(pos)][pos.piece_on(prevSq)][prevSq] - << stat_bonus(depth) * bonus * 2; + << stat_bonus(depth) * bonus * 4; } if (PvNode) From 81e21a69f02164fd988d5636a47c8790a1174b81 Mon Sep 17 00:00:00 2001 From: Stefan Geschwentner Date: Sun, 19 May 2024 10:12:05 +0200 Subject: [PATCH 041/315] Simplify the recently introduced ply-based cmh bonus factor. Replace it with a constant which is an approximation of the limit of the factor. STC: LLR: 2.95 (-2.94,2.94) <-1.75,0.25> Total: 120064 W: 30967 L: 30836 D: 58261 Ptnml(0-2): 421, 14238, 30608, 14319, 446 https://tests.stockfishchess.org/tests/view/6649d146b8fa20e74c39f4ad LTC: LLR: 2.95 (-2.94,2.94) <-1.75,0.25> Total: 53856 W: 13719 L: 13530 D: 26607 Ptnml(0-2): 31, 5879, 14922, 6062, 34 https://tests.stockfishchess.org/tests/view/664a027fae57c1758ac5b4ee closes https://github.com/official-stockfish/Stockfish/pull/5270 Bench: 1355618 --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index 2618b984..7e95dd87 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1779,7 +1779,7 @@ void update_all_stats(const Position& pos, // by moves at ply -1, -2, -3, -4, and -6 with current move. void update_continuation_histories(Stack* ss, Piece pc, Square to, int bonus) { - bonus = bonus * (112 * ss->ply + 136) / (159 * ss->ply + 124); + bonus = bonus * 45 / 64; for (int i : {1, 2, 3, 4, 6}) { From 4d88a63e607f44e59b9cc56b45984937e5eb123c Mon Sep 17 00:00:00 2001 From: Linmiao Xu Date: Sun, 19 May 2024 14:01:49 -0400 Subject: [PATCH 042/315] Re-eval only if smallnet output flips from simple eval Recent attempts to change the smallnet nnue re-eval threshold did not show much elo difference: https://tests.stockfishchess.org/tests/view/664a29bb25a9058c4d21d53c https://tests.stockfishchess.org/tests/view/664a299925a9058c4d21d53a Passed non-regression STC: https://tests.stockfishchess.org/tests/view/664a3ea95fc7b70b8817aee2 LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 22304 W: 5905 L: 5664 D: 10735 Ptnml(0-2): 67, 2602, 5603, 2783, 97 Passed non-regression LTC: https://tests.stockfishchess.org/tests/view/664a43d35fc7b70b8817aef4 LLR: 2.96 (-2.94,2.94) <-1.75,0.25> Total: 37536 W: 9667 L: 9460 D: 18409 Ptnml(0-2): 25, 4090, 10321, 4317, 15 closes https://github.com/official-stockfish/Stockfish/pull/5271 bench 1287409 --- src/evaluate.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index e5ebd45a..2cf82eaf 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -66,7 +66,7 @@ Value Eval::evaluate(const Eval::NNUE::Networks& networks, Value nnue = smallNet ? networks.small.evaluate(pos, &caches.small, true, &nnueComplexity) : networks.big.evaluate(pos, &caches.big, true, &nnueComplexity); - if (smallNet && (nnue * simpleEval < 0 || std::abs(nnue) < 500)) + if (smallNet && nnue * simpleEval < 0) { nnue = networks.big.evaluate(pos, &caches.big, true, &nnueComplexity); smallNet = false; From 0c797367a3a9783ff87422d543eb2106fea3e948 Mon Sep 17 00:00:00 2001 From: Michael Chaly Date: Mon, 20 May 2024 03:22:40 +0300 Subject: [PATCH 043/315] Update correction history in case of successful null move pruning Since null move pruning uses the same position it makes some sense to try to update correction history there in case of fail high. Update value is 4 times less than normal update. Passed STC: https://tests.stockfishchess.org/tests/view/664a011cae57c1758ac5b4dd LLR: 2.94 (-2.94,2.94) <0.00,2.00> Total: 419360 W: 108390 L: 107505 D: 203465 Ptnml(0-2): 1416, 49603, 106724, 50554, 1383 Passed LTC: https://tests.stockfishchess.org/tests/view/664a53d95fc7b70b8817c65b LLR: 2.94 (-2.94,2.94) <0.50,2.50> Total: 193518 W: 49076 L: 48434 D: 96008 Ptnml(0-2): 89, 21335, 53263, 21989, 83 closes https://github.com/official-stockfish/Stockfish/pull/5272 bench 1301487 --- src/nnue/nnue_misc.cpp | 9 ++++++--- src/search.cpp | 9 +++++++++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/src/nnue/nnue_misc.cpp b/src/nnue/nnue_misc.cpp index b54bbaba..a13c717c 100644 --- a/src/nnue/nnue_misc.cpp +++ b/src/nnue/nnue_misc.cpp @@ -178,11 +178,14 @@ trace(Position& pos, const Eval::NNUE::Networks& networks, Eval::NNUE::Accumulat ss << "| " << bucket << " "; ss << " | "; format_cp_aligned_dot(t.psqt[bucket], ss, pos); - ss << " " << " | "; + ss << " " + << " | "; format_cp_aligned_dot(t.positional[bucket], ss, pos); - ss << " " << " | "; + ss << " " + << " | "; format_cp_aligned_dot(t.psqt[bucket] + t.positional[bucket], ss, pos); - ss << " " << " |"; + ss << " " + << " |"; if (bucket == t.correctBucket) ss << " <-- this bucket is used"; ss << '\n'; diff --git a/src/search.cpp b/src/search.cpp index 7e95dd87..2ed5d97b 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -802,7 +802,16 @@ Value Search::Worker::search( if (nullValue >= beta && nullValue < VALUE_TB_WIN_IN_MAX_PLY) { if (thisThread->nmpMinPly || depth < 16) + { + if (nullValue >= ss->staticEval) + { + auto bonus = std::min(int(nullValue - ss->staticEval) * depth / 32, + CORRECTION_HISTORY_LIMIT / 16); + thisThread->correctionHistory[us][pawn_structure_index(pos)] + << bonus; + } return nullValue; + } assert(!thisThread->nmpMinPly); // Recursive verification is not allowed From b8ccaf038a21effba4613dca95f30eb1bc3d77b9 Mon Sep 17 00:00:00 2001 From: FauziAkram Date: Mon, 20 May 2024 03:14:00 +0300 Subject: [PATCH 044/315] Use same shuffling Constant for both nets Passed STC: https://tests.stockfishchess.org/tests/view/664a42b15fc7b70b8817aeef LLR: 2.93 (-2.94,2.94) <-1.75,0.25> Total: 87840 W: 22759 L: 22594 D: 42487 Ptnml(0-2): 335, 10351, 22324, 10634, 276 Passed LTC: https://tests.stockfishchess.org/tests/view/664a46995fc7b70b8817af02 LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 163122 W: 41443 L: 41367 D: 80312 Ptnml(0-2): 105, 18154, 44927, 18310, 65 closes https://github.com/official-stockfish/Stockfish/pull/5273 bench: 1190174 --- src/evaluate.cpp | 2 +- src/tune.cpp | 6 ++++-- src/uci.cpp | 6 +++--- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index 2cf82eaf..3a24657f 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -80,7 +80,7 @@ Value Eval::evaluate(const Eval::NNUE::Networks& networks, v = (nnue * (npm + 943 + 11 * pos.count()) + optimism * (npm + 140)) / 1058; // Damp down the evaluation linearly when shuffling - v = v * ((smallNet ? 206 : 178) - pos.rule50_count()) / 207; + v = v * (204 - pos.rule50_count()) / 208; // Guarantee evaluation does not hit the tablebase range v = std::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1); diff --git a/src/tune.cpp b/src/tune.cpp index 84f59524..3e5ebe5e 100644 --- a/src/tune.cpp +++ b/src/tune.cpp @@ -59,7 +59,8 @@ void make_option(OptionsMap* options, const string& n, int v, const SetRange& r) // Print formatted parameters, ready to be copy-pasted in Fishtest std::cout << n << "," << v << "," << r(v).first << "," << r(v).second << "," - << (r(v).second - r(v).first) / 20.0 << "," << "0.0020" << std::endl; + << (r(v).second - r(v).first) / 20.0 << "," + << "0.0020" << std::endl; } } @@ -117,6 +118,7 @@ void Tune::Entry::read_option() { namespace Stockfish { -void Tune::read_results() { /* ...insert your values here... */ } +void Tune::read_results() { /* ...insert your values here... */ +} } // namespace Stockfish diff --git a/src/uci.cpp b/src/uci.cpp index cb9d7b08..cb686a02 100644 --- a/src/uci.cpp +++ b/src/uci.cpp @@ -286,9 +286,9 @@ void UCIEngine::bench(std::istream& args) { dbg_print(); - std::cerr << "\n===========================" << "\nTotal time (ms) : " << elapsed - << "\nNodes searched : " << nodes << "\nNodes/second : " << 1000 * nodes / elapsed - << std::endl; + std::cerr << "\n===========================" + << "\nTotal time (ms) : " << elapsed << "\nNodes searched : " << nodes + << "\nNodes/second : " << 1000 * nodes / elapsed << std::endl; // reset callback, to not capture a dangling reference to nodesSearched engine.set_on_update_full([&](const auto& i) { on_update_full(i, options["UCI_ShowWDL"]); }); From daf9787de197ce9e5478f3e7ceec8c64cb3d549a Mon Sep 17 00:00:00 2001 From: Michael Chaly Date: Tue, 21 May 2024 00:40:55 +0300 Subject: [PATCH 045/315] Rescale pawn history updates This patch is somewhat of a continuation of recent pawn history gainers. It makes pawn history updates after search twice smaller. Since on average they make pawn history more negative offset is changed to lower value to remain average value approximately the same. https://tests.stockfishchess.org/tests/view/664b3af9830eb9f886614aab Passed STC: LLR: 2.93 (-2.94,2.94) <0.00,2.00> Total: 170464 W: 44239 L: 43724 D: 82501 Ptnml(0-2): 523, 20278, 43128, 20767, 536 Passed LTC against pending PR : https://tests.stockfishchess.org/tests/view/664b8c58830eb9f886614b64 LLR: 2.94 (-2.94,2.94) <0.50,2.50> Total: 98178 W: 25015 L: 24569 D: 48594 Ptnml(0-2): 48, 10769, 27005, 11223, 44 closes https://github.com/official-stockfish/Stockfish/pull/5275 Bench: 1343175 --- src/search.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 2ed5d97b..4c5e521e 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -496,7 +496,7 @@ void Search::Worker::clear() { counterMoves.fill(Move::none()); mainHistory.fill(0); captureHistory.fill(0); - pawnHistory.fill(-1100); + pawnHistory.fill(-1300); correctionHistory.fill(0); for (bool inCheck : {false, true}) @@ -1827,7 +1827,7 @@ void update_quiet_histories( update_continuation_histories(ss, pos.moved_piece(move), move.to_sq(), bonus); int pIndex = pawn_structure_index(pos); - workerThread.pawnHistory[pIndex][pos.moved_piece(move)][move.to_sq()] << bonus; + workerThread.pawnHistory[pIndex][pos.moved_piece(move)][move.to_sq()] << bonus / 2; } // Updates move sorting heuristics From f27a9be29c74b1d12babeb8a06ee992a22d67c9a Mon Sep 17 00:00:00 2001 From: Shawn Xu Date: Sat, 18 May 2024 03:19:36 -0700 Subject: [PATCH 046/315] Reduce When TTValue is Above Alpha Passed STC: LLR: 2.95 (-2.94,2.94) <0.00,2.00> Total: 53376 W: 13818 L: 13476 D: 26082 Ptnml(0-2): 156, 6212, 13626, 6522, 172 https://tests.stockfishchess.org/tests/view/664aa261830eb9f8866145e5 Passed LTC: LLR: 2.94 (-2.94,2.94) <0.50,2.50> Total: 393444 W: 100096 L: 99042 D: 194306 Ptnml(0-2): 191, 43516, 108248, 44582, 185 https://tests.stockfishchess.org/tests/view/664ab54f830eb9f88661463c closes https://github.com/official-stockfish/Stockfish/pull/5276 Bench: 1024562 --- src/search.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index 4c5e521e..2817247d 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -42,6 +42,7 @@ #include "thread.h" #include "timeman.h" #include "tt.h" +#include "types.h" #include "uci.h" #include "ucioption.h" @@ -833,9 +834,12 @@ Value Search::Worker::search( if (PvNode && !ttMove) depth -= 3; + if (!PvNode && ss->ttHit && (tte->bound() & BOUND_UPPER) && ttValue > alpha + 5 * depth) + depth--; + // Use qsearch if depth <= 0. if (depth <= 0) - return qsearch(pos, ss, alpha, beta); + return qsearch < PvNode ? PV : NonPV > (pos, ss, alpha, beta); // For cutNodes without a ttMove, we decrease depth by 2 if depth is high enough. if (cutNode && depth >= 8 && (!ttMove || tte->bound() == BOUND_UPPER)) From 87bad0c38a2b6a654850e61127dc0667a49acf82 Mon Sep 17 00:00:00 2001 From: FauziAkram Date: Tue, 21 May 2024 02:19:54 +0300 Subject: [PATCH 047/315] Refine Evaluation Scaling with Piece-Specific Weights Refine Evaluation Scaling with Piece-Specific Weights, instead of the simplified npm method. I took the initial idea from Viren6 , as he worked on it in September of last year. I worked on it, and tuned it, and now it passed both tests. Passed STC: LLR: 2.94 (-2.94,2.94) <0.00,2.00> Total: 95712 W: 24731 L: 24325 D: 46656 Ptnml(0-2): 363, 11152, 24357, 11684, 300 https://tests.stockfishchess.org/tests/view/664b5493830eb9f886614af3 Passed LTC: LLR: 2.94 (-2.94,2.94) <0.50,2.50> Total: 204480 W: 52167 L: 51501 D: 100812 Ptnml(0-2): 114, 22579, 56166, 23289, 92 https://tests.stockfishchess.org/tests/view/664b75dd830eb9f886614b44 closes https://github.com/official-stockfish/Stockfish/pull/5277 Bench: 1384337 --- src/evaluate.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index 3a24657f..44e69b3f 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -76,8 +76,13 @@ Value Eval::evaluate(const Eval::NNUE::Networks& networks, optimism += optimism * (nnueComplexity + std::abs(simpleEval - nnue)) / 584; nnue -= nnue * (nnueComplexity * 5 / 3) / 32395; - int npm = pos.non_pawn_material() / 64; - v = (nnue * (npm + 943 + 11 * pos.count()) + optimism * (npm + 140)) / 1058; + v = (nnue + * (32961 + 381 * pos.count() + 349 * pos.count() + + 392 * pos.count() + 649 * pos.count() + 1211 * pos.count()) + + optimism + * (4835 + 136 * pos.count() + 375 * pos.count() + + 403 * pos.count() + 628 * pos.count() + 1124 * pos.count())) + / 32768; // Damp down the evaluation linearly when shuffling v = v * (204 - pos.rule50_count()) / 208; From c86ec8ec2916924065138770e0201c2cfe6d3e72 Mon Sep 17 00:00:00 2001 From: MinetaS Date: Tue, 21 May 2024 12:42:34 +0900 Subject: [PATCH 048/315] Remove cutoffCnt margin adjustment in razoring Passed non-regression STC: LLR: 2.95 (-2.94,2.94) <-1.75,0.25> Total: 65344 W: 16767 L: 16578 D: 31999 Ptnml(0-2): 198, 7557, 16987, 7718, 212 https://tests.stockfishchess.org/tests/view/664bd895830eb9f886615a26 Passed non-regression LTC: LLR: 2.96 (-2.94,2.94) <-1.75,0.25> Total: 35214 W: 8999 L: 8791 D: 17424 Ptnml(0-2): 16, 3804, 9760, 4010, 17 https://tests.stockfishchess.org/tests/view/664bead5830eb9f886615a52 closes https://github.com/official-stockfish/Stockfish/pull/5278 Bench: 1296223 --- src/search.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 2817247d..a152b931 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -762,8 +762,7 @@ Value Search::Worker::search( // Step 7. Razoring (~1 Elo) // If eval is really low check with qsearch if it can exceed alpha, if it can't, // return a fail low. - // Adjust razor margin according to cutoffCnt. (~1 Elo) - if (eval < alpha - 474 - (326 - 139 * ((ss + 1)->cutoffCnt > 3)) * depth * depth) + if (eval < alpha - 474 - 324 * depth * depth) { value = qsearch(pos, ss, alpha - 1, alpha); if (value < alpha) From c14b69790a62aad89fcc471cde482923dfe57f1e Mon Sep 17 00:00:00 2001 From: Linmiao Xu Date: Tue, 21 May 2024 13:55:20 -0400 Subject: [PATCH 049/315] Lower smallnet threshold with updated eval divisors Params found after 30k spsa games at 60+0.6, with initial values from 64k spsa games at 45+0.45 First spsa with 64k / 120k games at 45+0.45: https://tests.stockfishchess.org/tests/view/664a561b5fc7b70b8817c663 https://tests.stockfishchess.org/tests/view/664ae88e830eb9f8866146f9 Second spsa with 30k / 120k games at 60+0.6: https://tests.stockfishchess.org/tests/view/664be227830eb9f886615a36 Values found at 10k games at 60+0.6 also passed STC and LTC: https://tests.stockfishchess.org/tests/view/664bf4bd830eb9f886615a72 https://tests.stockfishchess.org/tests/view/664c0905830eb9f886615abf Passed STC: https://tests.stockfishchess.org/tests/view/664c139e830eb9f886615af2 LLR: 2.94 (-2.94,2.94) <0.00,2.00> Total: 69408 W: 18216 L: 17842 D: 33350 Ptnml(0-2): 257, 8275, 17401, 8379, 392 Passed LTC: https://tests.stockfishchess.org/tests/view/664cdaf7830eb9f886616a24 LLR: 2.94 (-2.94,2.94) <0.50,2.50> Total: 35466 W: 9075 L: 8758 D: 17633 Ptnml(0-2): 27, 3783, 9794, 4104, 25 closes https://github.com/official-stockfish/Stockfish/pull/5280 bench 1301287 --- src/evaluate.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index 44e69b3f..ca09aaf9 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -46,7 +46,7 @@ int Eval::simple_eval(const Position& pos, Color c) { bool Eval::use_smallnet(const Position& pos) { int simpleEval = simple_eval(pos, pos.side_to_move()); - return std::abs(simpleEval) > 1126 + 6 * pos.count(); + return std::abs(simpleEval) > 1018 + 5 * pos.count(); } // Evaluate is the evaluator for the outer world. It returns a static evaluation @@ -73,8 +73,8 @@ Value Eval::evaluate(const Eval::NNUE::Networks& networks, } // Blend optimism and eval with nnue complexity and material imbalance - optimism += optimism * (nnueComplexity + std::abs(simpleEval - nnue)) / 584; - nnue -= nnue * (nnueComplexity * 5 / 3) / 32395; + optimism += optimism * (nnueComplexity + std::abs(simpleEval - nnue)) / 620; + nnue -= nnue * (nnueComplexity * 5 / 3) / 32082; v = (nnue * (32961 + 381 * pos.count() + 349 * pos.count() @@ -82,7 +82,7 @@ Value Eval::evaluate(const Eval::NNUE::Networks& networks, + optimism * (4835 + 136 * pos.count() + 375 * pos.count() + 403 * pos.count() + 628 * pos.count() + 1124 * pos.count())) - / 32768; + / 36860; // Damp down the evaluation linearly when shuffling v = v * (204 - pos.rule50_count()) / 208; From ed79745bb9e7207b604c62758ea45dd5c597ed8d Mon Sep 17 00:00:00 2001 From: Dubslow Date: Tue, 4 Apr 2023 22:55:52 -0500 Subject: [PATCH 050/315] Improve comments about DEPTH constants Also "fix" movepicker to allow depths between CHECKS and NO_CHECKS, which makes them easier to tweak (not that they get tweaked hardly ever) (This was more beneficial when there was a third stage to DEPTH_QS, but it's still an improvement now) closes https://github.com/official-stockfish/Stockfish/pull/5205 No functional change --- src/movepick.cpp | 4 ++-- src/search.cpp | 31 ++++++++++++++++++------------- src/tt.cpp | 12 ++++++++---- src/tt.h | 5 ++++- src/types.h | 17 +++++++++++------ 5 files changed, 43 insertions(+), 26 deletions(-) diff --git a/src/movepick.cpp b/src/movepick.cpp index 4a93662d..7def0ce8 100644 --- a/src/movepick.cpp +++ b/src/movepick.cpp @@ -361,8 +361,8 @@ top: if (select([]() { return true; })) return *(cur - 1); - // If we did not find any move and we do not try checks, we have finished - if (depth != DEPTH_QS_CHECKS) + // If we found no move and the depth is too low to try checks, then we have finished + if (depth <= DEPTH_QS_NORMAL) return Move::none(); ++stage; diff --git a/src/search.cpp b/src/search.cpp index a152b931..87cfdbc2 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -733,7 +733,7 @@ Value Search::Worker::search( ss->staticEval = eval = to_corrected_static_eval(unadjustedStaticEval, *thisThread, pos); // Static evaluation is saved as it was before adjustment by correction history - tte->save(posKey, VALUE_NONE, ss->ttPv, BOUND_NONE, DEPTH_NONE, Move::none(), + tte->save(posKey, VALUE_NONE, ss->ttPv, BOUND_NONE, DEPTH_UNSEARCHED, Move::none(), unadjustedStaticEval, tt.generation()); } @@ -1387,8 +1387,11 @@ moves_loop: // When in check, search starts here } -// Quiescence search function, which is called by the main search -// function with zero depth, or recursively with further decreasing depth per call. +// Quiescence search function, which is called by the main search function with zero depth, or +// recursively with further decreasing depth per call. With depth <= 0, we "should" be using +// static eval only, but tactical moves may confuse the static eval. To fight this horizon effect, +// we implement this qsearch of tactical moves only. +// See https://www.chessprogramming.org/Horizon_Effect and https://www.chessprogramming.org/Quiescence_Search // (~155 Elo) template Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth) { @@ -1446,8 +1449,10 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta, assert(0 <= ss->ply && ss->ply < MAX_PLY); - // Decide the replacement and cutoff priority of the qsearch TT entries - ttDepth = ss->inCheck || depth >= DEPTH_QS_CHECKS ? DEPTH_QS_CHECKS : DEPTH_QS_NO_CHECKS; + // Note that unlike regular search, which stores literal depth, in QS we only store the + // current movegen stage. If in check, we search all evasions and thus store + // DEPTH_QS_CHECKS. (Evasions may be quiet, and _CHECKS includes quiets.) + ttDepth = ss->inCheck || depth >= DEPTH_QS_CHECKS ? DEPTH_QS_CHECKS : DEPTH_QS_NORMAL; // Step 3. Transposition table lookup posKey = pos.key(); @@ -1499,8 +1504,8 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta, if (std::abs(bestValue) < VALUE_TB_WIN_IN_MAX_PLY && !PvNode) bestValue = (3 * bestValue + beta) / 4; if (!ss->ttHit) - tte->save(posKey, value_to_tt(bestValue, ss->ply), false, BOUND_LOWER, DEPTH_NONE, - Move::none(), unadjustedStaticEval, tt.generation()); + tte->save(posKey, value_to_tt(bestValue, ss->ply), false, BOUND_LOWER, + DEPTH_UNSEARCHED, Move::none(), unadjustedStaticEval, tt.generation()); return bestValue; } @@ -1514,16 +1519,16 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta, const PieceToHistory* contHist[] = {(ss - 1)->continuationHistory, (ss - 2)->continuationHistory}; - // Initialize a MovePicker object for the current position, and prepare - // to search the moves. Because the depth is <= 0 here, only captures, - // queen promotions, and other checks (only if depth >= DEPTH_QS_CHECKS) - // will be generated. + // Initialize a MovePicker object for the current position, and prepare to search the moves. + // We presently use two stages of qs movegen, first captures+checks, then captures only. + // (When in check, we simply search all evasions.) + // (Presently, having the checks stage is worth only 1 Elo, and may be removable in the near future, + // which would result in only a single stage of QS movegen.) Square prevSq = ((ss - 1)->currentMove).is_ok() ? ((ss - 1)->currentMove).to_sq() : SQ_NONE; MovePicker mp(pos, ttMove, depth, &thisThread->mainHistory, &thisThread->captureHistory, contHist, &thisThread->pawnHistory); - // Step 5. Loop through all pseudo-legal moves until no moves remain - // or a beta cutoff occurs. + // Step 5. Loop through all pseudo-legal moves until no moves remain or a beta cutoff occurs. while ((move = mp.next_move()) != Move::none()) { assert(move.is_ok()); diff --git a/src/tt.cpp b/src/tt.cpp index cb46fc8a..3f5b9d4d 100644 --- a/src/tt.cpp +++ b/src/tt.cpp @@ -30,6 +30,10 @@ namespace Stockfish { +// DEPTH_ENTRY_OFFSET exists because 1) we use `bool(depth8)` as the occupancy check, but +// 2) we need to store negative depths for QS. (`depth8` is the only field with "spare bits": +// we sacrifice the ability to store depths greater than 1<<8 less the offset, as asserted below.) + // Populates the TTEntry with a new node's data, possibly // overwriting an old position. The update is not atomic and can be racy. void TTEntry::save( @@ -40,14 +44,14 @@ void TTEntry::save( move16 = m; // Overwrite less valuable entries (cheapest checks first) - if (b == BOUND_EXACT || uint16_t(k) != key16 || d - DEPTH_OFFSET + 2 * pv > depth8 - 4 + if (b == BOUND_EXACT || uint16_t(k) != key16 || d - DEPTH_ENTRY_OFFSET + 2 * pv > depth8 - 4 || relative_age(generation8)) { - assert(d > DEPTH_OFFSET); - assert(d < 256 + DEPTH_OFFSET); + assert(d > DEPTH_ENTRY_OFFSET); + assert(d < 256 + DEPTH_ENTRY_OFFSET); key16 = uint16_t(k); - depth8 = uint8_t(d - DEPTH_OFFSET); + depth8 = uint8_t(d - DEPTH_ENTRY_OFFSET); genBound8 = uint8_t(generation8 | uint8_t(pv) << 2 | b); value16 = int16_t(v); eval16 = int16_t(ev); diff --git a/src/tt.h b/src/tt.h index 554a81a5..7cc876fb 100644 --- a/src/tt.h +++ b/src/tt.h @@ -37,12 +37,15 @@ namespace Stockfish { // move 16 bit // value 16 bit // eval value 16 bit +// +// These fields are in the same order as accessed by TT::probe(), since memory is fastest sequentially. +// Equally, the store order in save() matches this order. struct TTEntry { Move move() const { return Move(move16); } Value value() const { return Value(value16); } Value eval() const { return Value(eval16); } - Depth depth() const { return Depth(depth8 + DEPTH_OFFSET); } + Depth depth() const { return Depth(depth8 + DEPTH_ENTRY_OFFSET); } bool is_pv() const { return bool(genBound8 & 0x4); } Bound bound() const { return Bound(genBound8 & 0x3); } void save(Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev, uint8_t generation8); diff --git a/src/types.h b/src/types.h index 8b0ffb0c..aa4af012 100644 --- a/src/types.h +++ b/src/types.h @@ -187,12 +187,17 @@ constexpr Value PieceValue[PIECE_NB] = { using Depth = int; enum : int { - DEPTH_QS_CHECKS = 0, - DEPTH_QS_NO_CHECKS = -1, - - DEPTH_NONE = -6, - - DEPTH_OFFSET = -7 // value used only for TT entry occupancy check + // The following DEPTH_ constants are used for TT entries and QS movegen stages. In regular search, + // TT depth is literal: the search depth (effort) used to make the corresponding TT value. + // In qsearch, however, TT entries only store the current QS movegen stage (which should thus compare + // lower than any regular search depth). + DEPTH_QS_CHECKS = 0, + DEPTH_QS_NORMAL = -1, + // For TT entries where no searching at all was done (whether regular or qsearch) we use + // _UNSEARCHED, which should thus compare lower than any QS or regular depth. _ENTRY_OFFSET is used + // only for the TT entry occupancy check (see tt.cpp), and should thus be lower than _UNSEARCHED. + DEPTH_UNSEARCHED = -6, + DEPTH_ENTRY_OFFSET = -7 }; // clang-format off From 6db47ed71aac3b1667dd68a08c39bfde0fe0a2ab Mon Sep 17 00:00:00 2001 From: Viren6 <94880762+Viren6@users.noreply.github.com> Date: Sun, 19 May 2024 02:58:01 +0100 Subject: [PATCH 051/315] Addition of new scaling comments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch is intended to prevent patches like 9b90cd8 and the subsequent reversion e3c9ed7 from happening again. Scaling behaviour of the reduction adjustments in the non-linear scaling section have been proven to >8 sigma: STC: https://tests.stockfishchess.org/tests/view/6647b19f6dcff0d1d6b05d52 Elo: 4.28 ± 0.8 (95%) LOS: 100.0% Total: 200000 W: 52555 L: 50094 D: 97351 Ptnml(0-2): 573, 22628, 51248, 24867, 684 nElo: 8.35 ± 1.5 (95%) PairsRatio: 1.10 VLTC: https://tests.stockfishchess.org/tests/view/6647b1b06dcff0d1d6b05d54 Elo: -1.48 ± 1.0 (95%) LOS: 0.2% Total: 100000 W: 25009 L: 25436 D: 49555 Ptnml(0-2): 11, 10716, 28971, 10293, 9 nElo: -3.23 ± 2.2 (95%) PairsRatio: 0.96 The else if condition is moved to the non scaling section based on: https://tests.stockfishchess.org/tests/view/664567a193ce6da3e93b3232 (It has no proven scaling) General comment improvements and removal of a redundant margin condition have also been included. closes https://github.com/official-stockfish/Stockfish/pull/5266 No functional change --- src/search.cpp | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 87cfdbc2..08141818 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -840,7 +840,8 @@ Value Search::Worker::search( if (depth <= 0) return qsearch < PvNode ? PV : NonPV > (pos, ss, alpha, beta); - // For cutNodes without a ttMove, we decrease depth by 2 if depth is high enough. + // For cutNodes, if depth is high enough, decrease depth by 2 if there is no ttMove, or + // by 1 if there is a ttMove with an upper bound. if (cutNode && depth >= 8 && (!ttMove || tte->bound() == BOUND_UPPER)) depth -= 1 + !ttMove; @@ -1042,11 +1043,14 @@ moves_loop: // When in check, search starts here // then that move is singular and should be extended. To verify this we do // a reduced search on the position excluding the ttMove and if the result // is lower than ttValue minus a margin, then we will extend the ttMove. + // Recursive singular search is avoided. // Note: the depth margin and singularBeta margin are known for having non-linear // scaling. Their values are optimized to time controls of 180+1.8 and longer // so changing them requires tests at these types of time controls. - // Recursive singular search is avoided. + // Generally, higher singularBeta (i.e closer to ttValue) and lower extension + // margins scale well. + if (!rootNode && move == ttMove && !excludedMove && depth >= 4 - (thisThread->completedDepth > 35) + ss->ttPv && std::abs(ttValue) < VALUE_TB_WIN_IN_MAX_PLY && (tte->bound() & BOUND_LOWER) @@ -1063,9 +1067,8 @@ moves_loop: // When in check, search starts here if (value < singularBeta) { int doubleMargin = 298 * PvNode - 209 * !ttCapture; - int tripleMargin = - 117 + 252 * PvNode - 270 * !ttCapture + 111 * (ss->ttPv || !ttCapture); - int quadMargin = 471 + 343 * PvNode - 281 * !ttCapture + 217 * ss->ttPv; + int tripleMargin = 117 + 252 * PvNode - 270 * !ttCapture + 111 * ss->ttPv; + int quadMargin = 471 + 343 * PvNode - 281 * !ttCapture + 217 * ss->ttPv; extension = 1 + (value < singularBeta - doubleMargin) + (value < singularBeta - tripleMargin) @@ -1127,25 +1130,30 @@ moves_loop: // When in check, search starts here thisThread->nodes.fetch_add(1, std::memory_order_relaxed); pos.do_move(move, st, givesCheck); + // These reduction adjustments have proven non-linear scaling. + // They are optimized to time controls of 180 + 1.8 and longer so + // changing them or adding conditions that are similar + // requires tests at these types of time controls. + // Decrease reduction if position is or has been on the PV (~7 Elo) if (ss->ttPv) r -= 1 + (ttValue > alpha) + (tte->depth() >= depth); - else if (cutNode && move != ttMove && move != ss->killers[0]) - r++; + // Decrease reduction for PvNodes (~0 Elo on STC, ~2 Elo on LTC) + if (PvNode) + r--; + + // These reduction adjustments have no proven non-linear scaling. // Increase reduction for cut nodes (~4 Elo) if (cutNode) - r += 2 - (tte->depth() >= depth && ss->ttPv); + r += 2 - (tte->depth() >= depth && ss->ttPv) + + (!ss->ttPv && move != ttMove && move != ss->killers[0]); // Increase reduction if ttMove is a capture (~3 Elo) if (ttCapture) r++; - // Decrease reduction for PvNodes (~0 Elo on STC, ~2 Elo on LTC) - if (PvNode) - r--; - // Increase reduction if next ply has a lot of fail high (~5 Elo) if ((ss + 1)->cutoffCnt > 3) r++; From 1dcffa621065f58982feb462671d79404e51e088 Mon Sep 17 00:00:00 2001 From: Linmiao Xu Date: Tue, 21 May 2024 22:50:44 -0400 Subject: [PATCH 052/315] Comment about re-evaluating positions While the smallNet bool is no longer used as of now, setting it to false upon re-evaluation represents the correct eval state. closes https://github.com/official-stockfish/Stockfish/pull/5279 No functional change --- src/evaluate.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index ca09aaf9..4c449774 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -66,6 +66,7 @@ Value Eval::evaluate(const Eval::NNUE::Networks& networks, Value nnue = smallNet ? networks.small.evaluate(pos, &caches.small, true, &nnueComplexity) : networks.big.evaluate(pos, &caches.big, true, &nnueComplexity); + // Re-evaluate the position when higher eval accuracy is worth the time spent if (smallNet && nnue * simpleEval < 0) { nnue = networks.big.evaluate(pos, &caches.big, true, &nnueComplexity); From c39b98b9e356f6d01d323c6e6d5badd50e31c980 Mon Sep 17 00:00:00 2001 From: Shawn Xu Date: Tue, 21 May 2024 11:54:53 -0700 Subject: [PATCH 053/315] Simplify Away History Updates in Multicut Passed Non-regression STC: LLR: 2.93 (-2.94,2.94) <-1.75,0.25> Total: 44896 W: 11600 L: 11388 D: 21908 Ptnml(0-2): 140, 5230, 11532, 5370, 176 https://tests.stockfishchess.org/tests/view/664cee31830eb9f886616a80 Passed Non-regression LTC: LLR: 2.96 (-2.94,2.94) <-1.75,0.25> Total: 56832 W: 14421 L: 14234 D: 28177 Ptnml(0-2): 37, 6251, 15643, 6458, 27 https://tests.stockfishchess.org/tests/view/664cfd4e830eb9f886616aa6 closes https://github.com/official-stockfish/Stockfish/pull/5281 Bench: 1119412 --- src/search.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 08141818..a98468ec 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1083,12 +1083,7 @@ moves_loop: // When in check, search starts here // we assume this expected cut-node is not singular (multiple moves fail high), // and we can prune the whole subtree by returning a softbound. else if (singularBeta >= beta) - { - if (!ttCapture) - update_quiet_histories(pos, ss, *this, ttMove, -stat_malus(depth)); - return singularBeta; - } // Negative extensions // If other moves failed high over (ttValue - margin) without the ttMove on a reduced search, From c6a1e7fd4232ec151206fab16cb7daa23bfd7137 Mon Sep 17 00:00:00 2001 From: cj5716 <125858804+cj5716@users.noreply.github.com> Date: Sun, 19 May 2024 13:15:42 +0800 Subject: [PATCH 054/315] Optimise pairwise multiplication This speedup was first inspired by a comment by @AndyGrant on my recent PR "If mullo_epi16 would preserve the signedness, then this could be used to remove 50% of the max operations during the halfkp-pairwise mat-mul relu deal." That got me thinking, because although mullo_epi16 did not preserve the signedness, mulhi_epi16 did, and so we could shift left and then use mulhi_epi16, instead of shifting right after the mullo. However, due to some issues with shifting into the sign bit, the FT weights and biases had to be multiplied by 2 for the optimisation to work. Speedup on "Arch=x86-64-bmi2 COMP=clang", courtesy of @Torom Result of 50 runs base (...es/stockfish) = 962946 +/- 1202 test (...ise-max-less) = 979696 +/- 1084 diff = +16750 +/- 1794 speedup = +0.0174 P(speedup > 0) = 1.0000 CPU: 4 x Intel(R) Core(TM) i7-6700K CPU @ 4.00GHz Hyperthreading: on Also a speedup on "COMP=gcc", courtesy of Torom once again Result of 50 runs base (...tockfish_gcc) = 966033 +/- 1574 test (...max-less_gcc) = 983319 +/- 1513 diff = +17286 +/- 2515 speedup = +0.0179 P(speedup > 0) = 1.0000 CPU: 4 x Intel(R) Core(TM) i7-6700K CPU @ 4.00GHz Hyperthreading: on Passed STC: LLR: 2.96 (-2.94,2.94) <0.00,2.00> Total: 67712 W: 17715 L: 17358 D: 32639 Ptnml(0-2): 225, 7472, 18140, 7759, 260 https://tests.stockfishchess.org/tests/view/664c1d75830eb9f886616906 closes https://github.com/official-stockfish/Stockfish/pull/5282 No functional change --- src/nnue/nnue_feature_transformer.h | 80 +++++++++++++++++++---------- 1 file changed, 54 insertions(+), 26 deletions(-) diff --git a/src/nnue/nnue_feature_transformer.h b/src/nnue/nnue_feature_transformer.h index 7b7aada3..483b84a8 100644 --- a/src/nnue/nnue_feature_transformer.h +++ b/src/nnue/nnue_feature_transformer.h @@ -55,14 +55,14 @@ using psqt_vec_t = __m256i; #define vec_store(a, b) _mm512_store_si512(a, b) #define vec_add_16(a, b) _mm512_add_epi16(a, b) #define vec_sub_16(a, b) _mm512_sub_epi16(a, b) - #define vec_mul_16(a, b) _mm512_mullo_epi16(a, b) + #define vec_mulhi_16(a, b) _mm512_mulhi_epi16(a, b) #define vec_zero() _mm512_setzero_epi32() #define vec_set_16(a) _mm512_set1_epi16(a) #define vec_max_16(a, b) _mm512_max_epi16(a, b) #define vec_min_16(a, b) _mm512_min_epi16(a, b) + #define vec_slli_16(a, b) _mm512_slli_epi16(a, b) // Inverse permuted at load time - #define vec_msb_pack_16(a, b) \ - _mm512_packs_epi16(_mm512_srli_epi16(a, 7), _mm512_srli_epi16(b, 7)) + #define vec_packus_16(a, b) _mm512_packus_epi16(a, b) #define vec_load_psqt(a) _mm256_load_si256(a) #define vec_store_psqt(a, b) _mm256_store_si256(a, b) #define vec_add_psqt_32(a, b) _mm256_add_epi32(a, b) @@ -78,14 +78,14 @@ using psqt_vec_t = __m256i; #define vec_store(a, b) _mm256_store_si256(a, b) #define vec_add_16(a, b) _mm256_add_epi16(a, b) #define vec_sub_16(a, b) _mm256_sub_epi16(a, b) - #define vec_mul_16(a, b) _mm256_mullo_epi16(a, b) + #define vec_mulhi_16(a, b) _mm256_mulhi_epi16(a, b) #define vec_zero() _mm256_setzero_si256() #define vec_set_16(a) _mm256_set1_epi16(a) #define vec_max_16(a, b) _mm256_max_epi16(a, b) #define vec_min_16(a, b) _mm256_min_epi16(a, b) + #define vec_slli_16(a, b) _mm256_slli_epi16(a, b) // Inverse permuted at load time - #define vec_msb_pack_16(a, b) \ - _mm256_packs_epi16(_mm256_srli_epi16(a, 7), _mm256_srli_epi16(b, 7)) + #define vec_packus_16(a, b) _mm256_packus_epi16(a, b) #define vec_load_psqt(a) _mm256_load_si256(a) #define vec_store_psqt(a, b) _mm256_store_si256(a, b) #define vec_add_psqt_32(a, b) _mm256_add_epi32(a, b) @@ -101,12 +101,13 @@ using psqt_vec_t = __m128i; #define vec_store(a, b) *(a) = (b) #define vec_add_16(a, b) _mm_add_epi16(a, b) #define vec_sub_16(a, b) _mm_sub_epi16(a, b) - #define vec_mul_16(a, b) _mm_mullo_epi16(a, b) + #define vec_mulhi_16(a, b) _mm_mulhi_epi16(a, b) #define vec_zero() _mm_setzero_si128() #define vec_set_16(a) _mm_set1_epi16(a) #define vec_max_16(a, b) _mm_max_epi16(a, b) #define vec_min_16(a, b) _mm_min_epi16(a, b) - #define vec_msb_pack_16(a, b) _mm_packs_epi16(_mm_srli_epi16(a, 7), _mm_srli_epi16(b, 7)) + #define vec_slli_16(a, b) _mm_slli_epi16(a, b) + #define vec_packus_16(a, b) _mm_packus_epi16(a, b) #define vec_load_psqt(a) (*(a)) #define vec_store_psqt(a, b) *(a) = (b) #define vec_add_psqt_32(a, b) _mm_add_epi32(a, b) @@ -122,18 +123,14 @@ using psqt_vec_t = int32x4_t; #define vec_store(a, b) *(a) = (b) #define vec_add_16(a, b) vaddq_s16(a, b) #define vec_sub_16(a, b) vsubq_s16(a, b) - #define vec_mul_16(a, b) vmulq_s16(a, b) + #define vec_mulhi_16(a, b) vqdmulhq_s16(a, b) #define vec_zero() \ vec_t { 0 } #define vec_set_16(a) vdupq_n_s16(a) #define vec_max_16(a, b) vmaxq_s16(a, b) #define vec_min_16(a, b) vminq_s16(a, b) -inline vec_t vec_msb_pack_16(vec_t a, vec_t b) { - const int8x8_t shifta = vshrn_n_s16(a, 7); - const int8x8_t shiftb = vshrn_n_s16(b, 7); - const int8x16_t compacted = vcombine_s8(shifta, shiftb); - return *reinterpret_cast(&compacted); -} + #define vec_slli_16(a, b) vshlq_s16(a, vec_set_16(b)) + #define vec_packus_16(a, b) reinterpret_cast(vcombine_u8(vqmovun_s16(a), vqmovun_s16(b))) #define vec_load_psqt(a) (*(a)) #define vec_store_psqt(a, b) *(a) = (b) #define vec_add_psqt_32(a, b) vaddq_s32(a, b) @@ -281,6 +278,19 @@ class FeatureTransformer { #endif } + inline void scale_weights(bool read) const { + for (IndexType j = 0; j < InputDimensions; ++j) + { + WeightType* w = const_cast(&weights[j * HalfDimensions]); + for (IndexType i = 0; i < HalfDimensions; ++i) + w[i] = read ? w[i] * 2 : w[i] / 2; + } + + BiasType* b = const_cast(biases); + for (IndexType i = 0; i < HalfDimensions; ++i) + b[i] = read ? b[i] * 2 : b[i] / 2; + } + // Read network parameters bool read_parameters(std::istream& stream) { @@ -289,6 +299,7 @@ class FeatureTransformer { read_leb_128(stream, psqtWeights, PSQTBuckets * InputDimensions); permute_weights(inverse_order_packs); + scale_weights(true); return !stream.fail(); } @@ -296,12 +307,14 @@ class FeatureTransformer { bool write_parameters(std::ostream& stream) const { permute_weights(order_packs); + scale_weights(false); write_leb_128(stream, biases, HalfDimensions); write_leb_128(stream, weights, HalfDimensions * InputDimensions); write_leb_128(stream, psqtWeights, PSQTBuckets * InputDimensions); permute_weights(inverse_order_packs); + scale_weights(true); return !stream.fail(); } @@ -332,7 +345,7 @@ class FeatureTransformer { constexpr IndexType NumOutputChunks = HalfDimensions / 2 / OutputChunkSize; const vec_t Zero = vec_zero(); - const vec_t One = vec_set_16(127); + const vec_t One = vec_set_16(127 * 2); const vec_t* in0 = reinterpret_cast(&(accumulation[perspectives[p]][0])); const vec_t* in1 = @@ -341,15 +354,30 @@ class FeatureTransformer { for (IndexType j = 0; j < NumOutputChunks; ++j) { - const vec_t sum0a = vec_max_16(vec_min_16(in0[j * 2 + 0], One), Zero); - const vec_t sum0b = vec_max_16(vec_min_16(in0[j * 2 + 1], One), Zero); - const vec_t sum1a = vec_max_16(vec_min_16(in1[j * 2 + 0], One), Zero); - const vec_t sum1b = vec_max_16(vec_min_16(in1[j * 2 + 1], One), Zero); + // What we want to do is multiply inputs in a pairwise manner (after clipping), and then shift right by 9. + // Instead, we shift left by 7, and use mulhi, stripping the bottom 16 bits, effectively shifting right by 16, + // resulting in a net shift of 9 bits. We use mulhi because it maintains the sign of the multiplication (unlike mullo), + // allowing us to make use of packus to clip 2 of the inputs, resulting in a save of 2 "vec_max_16" calls. + // A special case is when we use NEON, where we shift left by 6 instead, because the instruction "vqdmulhq_s16" + // also doubles the return value after the multiplication, adding an extra shift to the left by 1, so we + // compensate by shifting less before the multiplication. - const vec_t pa = vec_mul_16(sum0a, sum1a); - const vec_t pb = vec_mul_16(sum0b, sum1b); + #if defined(USE_SSE2) + constexpr int shift = 7; + #else + constexpr int shift = 6; + #endif + const vec_t sum0a = + vec_slli_16(vec_max_16(vec_min_16(in0[j * 2 + 0], One), Zero), shift); + const vec_t sum0b = + vec_slli_16(vec_max_16(vec_min_16(in0[j * 2 + 1], One), Zero), shift); + const vec_t sum1a = vec_min_16(in1[j * 2 + 0], One); + const vec_t sum1b = vec_min_16(in1[j * 2 + 1], One); - out[j] = vec_msb_pack_16(pa, pb); + const vec_t pa = vec_mulhi_16(sum0a, sum1a); + const vec_t pb = vec_mulhi_16(sum0b, sum1b); + + out[j] = vec_packus_16(pa, pb); } #else @@ -359,9 +387,9 @@ class FeatureTransformer { BiasType sum0 = accumulation[static_cast(perspectives[p])][j + 0]; BiasType sum1 = accumulation[static_cast(perspectives[p])][j + HalfDimensions / 2]; - sum0 = std::clamp(sum0, 0, 127); - sum1 = std::clamp(sum1, 0, 127); - output[offset + j] = static_cast(unsigned(sum0 * sum1) / 128); + sum0 = std::clamp(sum0, 0, 127 * 2); + sum1 = std::clamp(sum1, 0, 127 * 2); + output[offset + j] = static_cast(unsigned(sum0 * sum1) / 512); } #endif From 72a345873d9cf24542dc73cd5a28eba7d23b0d2b Mon Sep 17 00:00:00 2001 From: Muzhen Gaming <61100393+XInTheDark@users.noreply.github.com> Date: Wed, 22 May 2024 09:09:04 +0800 Subject: [PATCH 055/315] Revert "Reduce When TTValue is Above Alpha" The patch regressed significantly at longer time controls. In particular, the `depth--` behavior was predicted to scale badly based on data from other variations of the patch. Passed VVLTC 1st sprt: https://tests.stockfishchess.org/tests/view/664d45cf830eb9f886616c7d LLR: 2.95 (-2.94,2.94) <0.00,2.00> Total: 51292 W: 13242 L: 12954 D: 25096 Ptnml(0-2): 5, 4724, 15896, 5020, 1 Passed VVLTC 2nd sprt: https://tests.stockfishchess.org/tests/view/664e641a928b1fb18de4e385 LLR: 2.94 (-2.94,2.94) <0.50,2.50> Total: 41884 W: 10933 L: 10634 D: 20317 Ptnml(0-2): 1, 3759, 13125, 4054, 3 closes https://github.com/official-stockfish/Stockfish/pull/5283 Bench: 1503815 --- src/search.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index a98468ec..47766730 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -833,12 +833,9 @@ Value Search::Worker::search( if (PvNode && !ttMove) depth -= 3; - if (!PvNode && ss->ttHit && (tte->bound() & BOUND_UPPER) && ttValue > alpha + 5 * depth) - depth--; - // Use qsearch if depth <= 0. if (depth <= 0) - return qsearch < PvNode ? PV : NonPV > (pos, ss, alpha, beta); + return qsearch(pos, ss, alpha, beta); // For cutNodes, if depth is high enough, decrease depth by 2 if there is no ttMove, or // by 1 if there is a ttMove with an upper bound. From 365aa85dcea3adee21b5e01a7941b4b18fdc8194 Mon Sep 17 00:00:00 2001 From: Linmiao Xu Date: Tue, 21 May 2024 16:24:49 -0400 Subject: [PATCH 056/315] Remove material imbalance param when adjusting optimism Passed non-regression STC: https://tests.stockfishchess.org/tests/view/664d033d830eb9f886616aff LLR: 2.93 (-2.94,2.94) <-1.75,0.25> Total: 102144 W: 26283 L: 26135 D: 49726 Ptnml(0-2): 292, 12201, 25991, 12243, 345 Passed non-regression LTC: https://tests.stockfishchess.org/tests/view/664d5c00830eb9f886616cb3 LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 250032 W: 63022 L: 63036 D: 123974 Ptnml(0-2): 103, 27941, 68970, 27871, 131 closes https://github.com/official-stockfish/Stockfish/pull/5284 Bench: 1330940 --- src/evaluate.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index 4c449774..7ca470af 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -73,8 +73,8 @@ Value Eval::evaluate(const Eval::NNUE::Networks& networks, smallNet = false; } - // Blend optimism and eval with nnue complexity and material imbalance - optimism += optimism * (nnueComplexity + std::abs(simpleEval - nnue)) / 620; + // Blend optimism and eval with nnue complexity + optimism += optimism * nnueComplexity / 512; nnue -= nnue * (nnueComplexity * 5 / 3) / 32082; v = (nnue From 61acbfc7d310ed6044ba4fc5ef91a6c382d1c9a6 Mon Sep 17 00:00:00 2001 From: Muzhen Gaming <61100393+XInTheDark@users.noreply.github.com> Date: Thu, 23 May 2024 08:28:46 +0800 Subject: [PATCH 057/315] VVLTC search tune Parameters were tuned in 2 stages: 1. 127k games at VVLTC: https://tests.stockfishchess.org/tests/view/6649f8dfb8fa20e74c39f52a. 2. 106k games at VVLTC: https://tests.stockfishchess.org/tests/view/664bfb77830eb9f886615a9d. Passed VVLTC 1st sprt: https://tests.stockfishchess.org/tests/view/664e8dd9928b1fb18de4e410 LLR: 2.94 (-2.94,2.94) <0.00,2.00> Total: 20466 W: 5340 L: 5093 D: 10033 Ptnml(0-2): 0, 1796, 6397, 2037, 3 Passed VVLTC 2nd sprt: https://tests.stockfishchess.org/tests/view/664eb4aa928b1fb18de4e47d LLR: 2.95 (-2.94,2.94) <0.50,2.50> Total: 15854 W: 4186 L: 3934 D: 7734 Ptnml(0-2): 1, 1367, 4938, 1621, 0 closes https://github.com/official-stockfish/Stockfish/pull/5286 Bench: 1558110 --- src/search.cpp | 88 +++++++++++++++++++++++++------------------------- 1 file changed, 44 insertions(+), 44 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 47766730..563a5710 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -60,9 +60,9 @@ static constexpr double EvalLevel[10] = {0.981, 0.956, 0.895, 0.949, 0.913, // Futility margin Value futility_margin(Depth d, bool noTtCutNode, bool improving, bool oppWorsening) { - Value futilityMult = 127 - 48 * noTtCutNode; - Value improvingDeduction = 65 * improving * futilityMult / 32; - Value worseningDeduction = 334 * oppWorsening * futilityMult / 1024; + Value futilityMult = 129 - 43 * noTtCutNode; + Value improvingDeduction = 56 * improving * futilityMult / 32; + Value worseningDeduction = 336 * oppWorsening * futilityMult / 1024; return futilityMult * d - improvingDeduction - worseningDeduction; } @@ -74,15 +74,15 @@ constexpr int futility_move_count(bool improving, Depth depth) { // Add correctionHistory value to raw staticEval and guarantee evaluation does not hit the tablebase range Value to_corrected_static_eval(Value v, const Worker& w, const Position& pos) { auto cv = w.correctionHistory[pos.side_to_move()][pawn_structure_index(pos)]; - v += cv * std::abs(cv) / 6047; + v += cv * std::abs(cv) / 5435; return std::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1); } // History and stats update bonus, based on depth -int stat_bonus(Depth d) { return std::clamp(187 * d - 288, 17, 1548); } +int stat_bonus(Depth d) { return std::clamp(205 * d - 283, 18, 1544); } // History and stats update malus, based on depth -int stat_malus(Depth d) { return (d < 4 ? 630 * d - 281 : 1741); } +int stat_malus(Depth d) { return (d < 4 ? 767 * d - 275 : 1911); } // Add a small random component to draw evaluations to avoid 3-fold blindness Value value_draw(size_t nodes) { return VALUE_DRAW - 1 + Value(nodes & 0x2); } @@ -312,12 +312,12 @@ void Search::Worker::iterative_deepening() { // Reset aspiration window starting size Value avg = rootMoves[pvIdx].averageScore; - delta = 10 + avg * avg / 9828; + delta = 9 + avg * avg / 10502; alpha = std::max(avg - delta, -VALUE_INFINITE); beta = std::min(avg + delta, VALUE_INFINITE); // Adjust optimism based on root move's averageScore (~4 Elo) - optimism[us] = 116 * avg / (std::abs(avg) + 84); + optimism[us] = 122 * avg / (std::abs(avg) + 92); optimism[~us] = -optimism[us]; // Start with a small aspiration window and, in the case of a fail @@ -507,7 +507,7 @@ void Search::Worker::clear() { h->fill(-60); for (size_t i = 1; i < reductions.size(); ++i) - reductions[i] = int((21.69 + std::log(size_t(options["Threads"])) / 2) * std::log(i)); + reductions[i] = int((19.90 + std::log(size_t(options["Threads"])) / 2) * std::log(i)); refreshTable.clear(networks); } @@ -740,7 +740,7 @@ Value Search::Worker::search( // Use static evaluation difference to improve quiet move ordering (~9 Elo) if (((ss - 1)->currentMove).is_ok() && !(ss - 1)->inCheck && !priorCapture) { - int bonus = std::clamp(-11 * int((ss - 1)->staticEval + ss->staticEval), -1729, 1517); + int bonus = std::clamp(-11 * int((ss - 1)->staticEval + ss->staticEval), -1592, 1390); bonus = bonus > 0 ? 2 * bonus : bonus / 2; thisThread->mainHistory[~us][((ss - 1)->currentMove).from_to()] << bonus; if (type_of(pos.piece_on(prevSq)) != PAWN && ((ss - 1)->currentMove).type_of() != PROMOTION) @@ -762,7 +762,7 @@ Value Search::Worker::search( // Step 7. Razoring (~1 Elo) // If eval is really low check with qsearch if it can exceed alpha, if it can't, // return a fail low. - if (eval < alpha - 474 - 324 * depth * depth) + if (eval < alpha - 501 - 305 * depth * depth) { value = qsearch(pos, ss, alpha - 1, alpha); if (value < alpha) @@ -771,23 +771,23 @@ Value Search::Worker::search( // Step 8. Futility pruning: child node (~40 Elo) // The depth condition is important for mate finding. - if (!ss->ttPv && depth < 11 + if (!ss->ttPv && depth < 12 && eval - futility_margin(depth, cutNode && !ss->ttHit, improving, opponentWorsening) - - (ss - 1)->statScore / 252 + - (ss - 1)->statScore / 248 >= beta && eval >= beta && eval < VALUE_TB_WIN_IN_MAX_PLY && (!ttMove || ttCapture)) return beta > VALUE_TB_LOSS_IN_MAX_PLY ? (eval + beta) / 2 : eval; // Step 9. Null move search with verification search (~35 Elo) - if (!PvNode && (ss - 1)->currentMove != Move::null() && (ss - 1)->statScore < 15246 - && eval >= beta && ss->staticEval >= beta - 21 * depth + 366 && !excludedMove + if (!PvNode && (ss - 1)->currentMove != Move::null() && (ss - 1)->statScore < 13999 + && eval >= beta && ss->staticEval >= beta - 21 * depth + 390 && !excludedMove && pos.non_pawn_material(us) && ss->ply >= thisThread->nmpMinPly && beta > VALUE_TB_LOSS_IN_MAX_PLY) { assert(eval - beta >= 0); // Null move dynamic reduction based on depth and eval - Depth R = std::min(int(eval - beta) / 152, 6) + depth / 3 + 5; + Depth R = std::min(int(eval - beta) / 177, 6) + depth / 3 + 5; ss->currentMove = Move::null(); ss->continuationHistory = &thisThread->continuationHistory[0][0][NO_PIECE][0]; @@ -845,7 +845,7 @@ Value Search::Worker::search( // Step 11. ProbCut (~10 Elo) // If we have a good enough capture (or queen promotion) and a reduced search returns a value // much above beta, we can (almost) safely prune the previous move. - probCutBeta = beta + 176 - 65 * improving; + probCutBeta = beta + 185 - 60 * improving; if ( !PvNode && depth > 3 && std::abs(beta) < VALUE_TB_WIN_IN_MAX_PLY @@ -901,7 +901,7 @@ Value Search::Worker::search( moves_loop: // When in check, search starts here // Step 12. A small Probcut idea, when we are in check (~4 Elo) - probCutBeta = beta + 440; + probCutBeta = beta + 361; if (ss->inCheck && !PvNode && ttCapture && (tte->bound() & BOUND_LOWER) && tte->depth() >= depth - 4 && ttValue >= probCutBeta && std::abs(ttValue) < VALUE_TB_WIN_IN_MAX_PLY && std::abs(beta) < VALUE_TB_WIN_IN_MAX_PLY) @@ -985,15 +985,15 @@ moves_loop: // When in check, search starts here // Futility pruning for captures (~2 Elo) if (!givesCheck && lmrDepth < 7 && !ss->inCheck) { - Value futilityValue = ss->staticEval + 276 + 256 * lmrDepth + Value futilityValue = ss->staticEval + 283 + 235 * lmrDepth + PieceValue[capturedPiece] + captHist / 7; if (futilityValue <= alpha) continue; } // SEE based pruning for captures and checks (~11 Elo) - int seeHist = std::clamp(captHist / 32, -177 * depth, 175 * depth); - if (!pos.see_ge(move, -183 * depth - seeHist)) + int seeHist = std::clamp(captHist / 32, -183 * depth, 162 * depth); + if (!pos.see_ge(move, -166 * depth - seeHist)) continue; } else @@ -1004,18 +1004,18 @@ moves_loop: // When in check, search starts here + thisThread->pawnHistory[pawn_structure_index(pos)][movedPiece][move.to_sq()]; // Continuation history based pruning (~2 Elo) - if (lmrDepth < 6 && history < -4076 * depth) + if (lmrDepth < 6 && history < -4427 * depth) continue; history += 2 * thisThread->mainHistory[us][move.from_to()]; - lmrDepth += history / 4401; + lmrDepth += history / 3670; Value futilityValue = - ss->staticEval + (bestValue < ss->staticEval - 53 ? 151 : 57) + 140 * lmrDepth; + ss->staticEval + (bestValue < ss->staticEval - 51 ? 149 : 55) + 141 * lmrDepth; // Futility pruning: parent node (~13 Elo) - if (!ss->inCheck && lmrDepth < 10 && futilityValue <= alpha) + if (!ss->inCheck && lmrDepth < 11 && futilityValue <= alpha) { if (bestValue <= futilityValue && std::abs(bestValue) < VALUE_TB_WIN_IN_MAX_PLY && futilityValue < VALUE_TB_WIN_IN_MAX_PLY) @@ -1049,11 +1049,11 @@ moves_loop: // When in check, search starts here // margins scale well. if (!rootNode && move == ttMove && !excludedMove - && depth >= 4 - (thisThread->completedDepth > 35) + ss->ttPv + && depth >= 4 - (thisThread->completedDepth > 38) + ss->ttPv && std::abs(ttValue) < VALUE_TB_WIN_IN_MAX_PLY && (tte->bound() & BOUND_LOWER) && tte->depth() >= depth - 3) { - Value singularBeta = ttValue - (57 + 50 * (ss->ttPv && !PvNode)) * depth / 64; + Value singularBeta = ttValue - (58 + 64 * (ss->ttPv && !PvNode)) * depth / 64; Depth singularDepth = newDepth / 2; ss->excludedMove = move; @@ -1063,15 +1063,15 @@ moves_loop: // When in check, search starts here if (value < singularBeta) { - int doubleMargin = 298 * PvNode - 209 * !ttCapture; - int tripleMargin = 117 + 252 * PvNode - 270 * !ttCapture + 111 * ss->ttPv; - int quadMargin = 471 + 343 * PvNode - 281 * !ttCapture + 217 * ss->ttPv; + int doubleMargin = 304 * PvNode - 203 * !ttCapture; + int tripleMargin = 117 + 259 * PvNode - 296 * !ttCapture + 97 * ss->ttPv; + int quadMargin = 486 + 343 * PvNode - 273 * !ttCapture + 232 * ss->ttPv; extension = 1 + (value < singularBeta - doubleMargin) + (value < singularBeta - tripleMargin) + (value < singularBeta - quadMargin); - depth += ((!PvNode) && (depth < 15)); + depth += ((!PvNode) && (depth < 16)); } // Multi-cut pruning @@ -1101,7 +1101,7 @@ moves_loop: // When in check, search starts here else if (PvNode && move == ttMove && move.to_sq() == prevSq && thisThread->captureHistory[movedPiece][move.to_sq()] [type_of(pos.piece_on(move.to_sq()))] - > 3748) + > 3988) extension = 1; } @@ -1157,10 +1157,10 @@ moves_loop: // When in check, search starts here ss->statScore = 2 * thisThread->mainHistory[us][move.from_to()] + (*contHist[0])[movedPiece][move.to_sq()] - + (*contHist[1])[movedPiece][move.to_sq()] - 5266; + + (*contHist[1])[movedPiece][move.to_sq()] - 5169; // Decrease/increase reduction for moves with a good/bad history (~8 Elo) - r -= ss->statScore / (14519 - std::min(depth, 15) * 103); + r -= ss->statScore / (12219 - std::min(depth, 13) * 120); // Step 17. Late moves reduction / extension (LMR, ~117 Elo) if (depth >= 2 && moveCount > 1 + rootNode) @@ -1179,7 +1179,7 @@ moves_loop: // When in check, search starts here { // Adjust full-depth search based on LMR results - if the result // was good enough search deeper, if it was bad enough search shallower. - const bool doDeeperSearch = value > (bestValue + 40 + 2 * newDepth); // (~1 Elo) + const bool doDeeperSearch = value > (bestValue + 36 + 2 * newDepth); // (~1 Elo) const bool doShallowerSearch = value < bestValue + newDepth; // (~2 Elo) newDepth += doDeeperSearch - doShallowerSearch; @@ -1340,9 +1340,9 @@ moves_loop: // When in check, search starts here // Bonus for prior countermove that caused the fail low else if (!priorCapture && prevSq != SQ_NONE) { - int bonus = (depth > 4) + (depth > 5) + (PvNode || cutNode) + ((ss - 1)->statScore < -13241) - + ((ss - 1)->moveCount > 10) + (!ss->inCheck && bestValue <= ss->staticEval - 127) - + (!(ss - 1)->inCheck && bestValue <= -(ss - 1)->staticEval - 74); + int bonus = (depth > 4) + (depth > 5) + (PvNode || cutNode) + ((ss - 1)->statScore < -14144) + + ((ss - 1)->moveCount > 9) + (!ss->inCheck && bestValue <= ss->staticEval - 115) + + (!(ss - 1)->inCheck && bestValue <= -(ss - 1)->staticEval - 81); update_continuation_histories(ss - 1, pos.piece_on(prevSq), prevSq, stat_bonus(depth) * bonus); thisThread->mainHistory[~us][((ss - 1)->currentMove).from_to()] @@ -1513,7 +1513,7 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta, if (bestValue > alpha) alpha = bestValue; - futilityBase = ss->staticEval + 264; + futilityBase = ss->staticEval + 279; } const PieceToHistory* contHist[] = {(ss - 1)->continuationHistory, @@ -1585,11 +1585,11 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta, + (*contHist[1])[pos.moved_piece(move)][move.to_sq()] + thisThread->pawnHistory[pawn_structure_index(pos)][pos.moved_piece(move)] [move.to_sq()] - <= 4348) + <= 4181) continue; // Do not search moves with bad enough SEE values (~5 Elo) - if (!pos.see_ge(move, -63)) + if (!pos.see_ge(move, -67)) continue; } @@ -1655,7 +1655,7 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta, Depth Search::Worker::reduction(bool i, Depth d, int mn, int delta) { int reductionScale = reductions[d] * reductions[mn]; - return (reductionScale + 1147 - delta * 755 / rootDelta) / 1024 + (!i && reductionScale > 1125); + return (reductionScale + 1222 - delta * 733 / rootDelta) / 1024 + (!i && reductionScale > 1231); } // elapsed() returns the time elapsed since the search started. If the @@ -1758,7 +1758,7 @@ void update_all_stats(const Position& pos, if (!pos.capture_stage(bestMove)) { - int bestMoveBonus = bestValue > beta + 165 ? quietMoveBonus // larger bonus + int bestMoveBonus = bestValue > beta + 176 ? quietMoveBonus // larger bonus : stat_bonus(depth); // smaller bonus update_quiet_stats(pos, ss, workerThread, bestMove, bestMoveBonus); @@ -1796,7 +1796,7 @@ void update_all_stats(const Position& pos, // by moves at ply -1, -2, -3, -4, and -6 with current move. void update_continuation_histories(Stack* ss, Piece pc, Square to, int bonus) { - bonus = bonus * 45 / 64; + bonus = bonus * 47 / 64; for (int i : {1, 2, 3, 4, 6}) { From 4d876275cf127b9e7cf91cef984deafa2abb47d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ste=CC=81phane=20Nicolet?= Date: Thu, 23 May 2024 22:03:43 +0200 Subject: [PATCH 058/315] Simplify material weights in evaluation This patch uses the same material weights for the nnue amplification term and the optimism term in evaluate(). STC: LLR: 2.99 (-2.94,2.94) <-1.75,0.25> Total: 83360 W: 21489 L: 21313 D: 40558 Ptnml(0-2): 303, 9934, 21056, 10058, 329 https://tests.stockfishchess.org/tests/view/664eee69928b1fb18de500d9 LTC: LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 192648 W: 48675 L: 48630 D: 95343 Ptnml(0-2): 82, 21484, 53161, 21501, 96 https://tests.stockfishchess.org/tests/view/664fa17aa86388d5e27d7d6e closes https://github.com/official-stockfish/Stockfish/pull/5287 Bench: 1495602 --- src/evaluate.cpp | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index 7ca470af..75fe0f92 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -77,13 +77,10 @@ Value Eval::evaluate(const Eval::NNUE::Networks& networks, optimism += optimism * nnueComplexity / 512; nnue -= nnue * (nnueComplexity * 5 / 3) / 32082; - v = (nnue - * (32961 + 381 * pos.count() + 349 * pos.count() - + 392 * pos.count() + 649 * pos.count() + 1211 * pos.count()) - + optimism - * (4835 + 136 * pos.count() + 375 * pos.count() - + 403 * pos.count() + 628 * pos.count() + 1124 * pos.count())) - / 36860; + int material = 200 * pos.count() + 350 * pos.count() + 400 * pos.count() + + 640 * pos.count() + 1200 * pos.count(); + + v = (nnue * (34000 + material) + optimism * (4400 + material)) / 36860; // Damp down the evaluation linearly when shuffling v = v * (204 - pos.rule50_count()) / 208; From 8bc3fd3871aaa2437105bdc141d5ac25a88ea885 Mon Sep 17 00:00:00 2001 From: Linmiao Xu Date: Fri, 24 May 2024 10:58:13 -0400 Subject: [PATCH 059/315] Lower smallnet threshold with tuned eval params The smallnet threshold is now below the training data range of the current smallnet (simple eval diff > 1k, nn-baff1edelf90.nnue) when no pawns are on the board. Params found with spsa at 93k / 120k games at 60+06: https://tests.stockfishchess.org/tests/view/664fa166a86388d5e27d7d6b Tuned on top of: https://github.com/official-stockfish/Stockfish/pull/5287 Passed STC: https://tests.stockfishchess.org/tests/view/664fc8b7a86388d5e27d8dac LLR: 2.96 (-2.94,2.94) <0.00,2.00> Total: 64672 W: 16731 L: 16371 D: 31570 Ptnml(0-2): 239, 7463, 16517, 7933, 184 Passed LTC: https://tests.stockfishchess.org/tests/view/664fd5f9a86388d5e27d8dfe LLR: 2.94 (-2.94,2.94) <0.50,2.50> Total: 210648 W: 53489 L: 52813 D: 104346 Ptnml(0-2): 102, 23129, 58164, 23849, 80 closes https://github.com/official-stockfish/Stockfish/pull/5288 Bench: 1717838 --- src/evaluate.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index 75fe0f92..13a3f211 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -46,7 +46,7 @@ int Eval::simple_eval(const Position& pos, Color c) { bool Eval::use_smallnet(const Position& pos) { int simpleEval = simple_eval(pos, pos.side_to_move()); - return std::abs(simpleEval) > 1018 + 5 * pos.count(); + return std::abs(simpleEval) > 992 + 6 * pos.count(); } // Evaluate is the evaluator for the outer world. It returns a static evaluation @@ -74,13 +74,15 @@ Value Eval::evaluate(const Eval::NNUE::Networks& networks, } // Blend optimism and eval with nnue complexity - optimism += optimism * nnueComplexity / 512; - nnue -= nnue * (nnueComplexity * 5 / 3) / 32082; + optimism += optimism * nnueComplexity / 470; + nnue -= nnue * (nnueComplexity * 5 / 3) / 32621; int material = 200 * pos.count() + 350 * pos.count() + 400 * pos.count() + 640 * pos.count() + 1200 * pos.count(); - v = (nnue * (34000 + material) + optimism * (4400 + material)) / 36860; + v = (nnue * (34000 + material + 135 * pos.count()) + + optimism * (4400 + material + 99 * pos.count())) + / 35967; // Damp down the evaluation linearly when shuffling v = v * (204 - pos.rule50_count()) / 208; From 8e1f273c7d10e2b49c07cdc16b09a3d4574acf4c Mon Sep 17 00:00:00 2001 From: "Shahin M. Shahin" <41402573+peregrineshahin@users.noreply.github.com> Date: Fri, 24 May 2024 01:19:16 +0300 Subject: [PATCH 060/315] Remove rootDelta branch This makes rootDelta logic easier to understand, recalculating the value where it belongs so removes an unnecessary branch. Passed non-regression STC: https://tests.stockfishchess.org/tests/view/664fc147a86388d5e27d8d8e LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 206016 W: 53120 L: 53089 D: 99807 Ptnml(0-2): 591, 20928, 59888, 21061, 540 closes https://github.com/official-stockfish/Stockfish/pull/5289 No functional change --- src/search.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 563a5710..ed264f55 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -330,6 +330,7 @@ void Search::Worker::iterative_deepening() { // for every four searchAgain steps (see issue #2717). Depth adjustedDepth = std::max(1, rootDepth - failedHighCnt - 3 * (searchAgainCounter + 1) / 4); + rootDelta = beta - alpha; bestValue = search(rootPos, ss, alpha, beta, adjustedDepth, false); // Bring the best move to the front. It is critical that sorting @@ -590,8 +591,6 @@ Value Search::Worker::search( if (alpha >= beta) return alpha; } - else - thisThread->rootDelta = beta - alpha; assert(0 <= ss->ply && ss->ply < MAX_PLY); From 5e98a4e43dd1c2698162bc3f848a0a98943f86c6 Mon Sep 17 00:00:00 2001 From: Shawn Xu Date: Fri, 24 May 2024 22:46:03 -0700 Subject: [PATCH 061/315] Simplify Away TT Cutoff Return Value Adjustments Passed Non-regression STC: LLR: 2.93 (-2.94,2.94) <-1.75,0.25> Total: 198432 W: 51161 L: 51119 D: 96152 Ptnml(0-2): 772, 23670, 50273, 23746, 755 https://tests.stockfishchess.org/tests/view/66517b9ea86388d5e27da966 Passed Non-regression LTC: LLR: 2.95 (-2.94,2.94) <-1.75,0.25> Total: 234150 W: 59200 L: 59197 D: 115753 Ptnml(0-2): 126, 26200, 64404, 26235, 110 https://tests.stockfishchess.org/tests/view/6653a84da86388d5e27daa63 closes https://github.com/official-stockfish/Stockfish/pull/5292 bench 1555200 --- src/search.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index ed264f55..d253601d 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -637,9 +637,7 @@ Value Search::Worker::search( // Partial workaround for the graph history interaction problem // For high rule50 counts don't produce transposition table cutoffs. if (pos.rule50_count() < 90) - return ttValue >= beta && std::abs(ttValue) < VALUE_TB_WIN_IN_MAX_PLY - ? (ttValue * 3 + beta) / 4 - : ttValue; + return ttValue; } // Step 5. Tablebases probe From d0b9411b8275369074bb0de041257db2bccc6430 Mon Sep 17 00:00:00 2001 From: FauziAkram Date: Tue, 28 May 2024 13:49:30 +0300 Subject: [PATCH 062/315] Tweak return value in futility pruning Tweak the return value formula in futility pruning. Passed STC: LLR: 2.93 (-2.94,2.94) <0.00,2.00> Total: 60544 W: 15791 L: 15440 D: 29313 Ptnml(0-2): 193, 7024, 15520, 7309, 226 https://tests.stockfishchess.org/tests/view/6654ef22a86388d5e27db122 Passed LTC: LLR: 2.96 (-2.94,2.94) <0.50,2.50> Total: 126426 W: 32317 L: 31812 D: 62297 Ptnml(0-2): 55, 13871, 34869, 14350, 68 https://tests.stockfishchess.org/tests/view/66550644a86388d5e27db649 closes https://github.com/official-stockfish/Stockfish/pull/5295 bench: 1856147 --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index d253601d..0dbc6a3a 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -773,7 +773,7 @@ Value Search::Worker::search( - (ss - 1)->statScore / 248 >= beta && eval >= beta && eval < VALUE_TB_WIN_IN_MAX_PLY && (!ttMove || ttCapture)) - return beta > VALUE_TB_LOSS_IN_MAX_PLY ? (eval + beta) / 2 : eval; + return beta > VALUE_TB_LOSS_IN_MAX_PLY ? beta + (eval - beta) / 3 : eval; // Step 9. Null move search with verification search (~35 Elo) if (!PvNode && (ss - 1)->currentMove != Move::null() && (ss - 1)->statScore < 13999 From b0287dcb1c436887075962b596cf2068d2ca9ba8 Mon Sep 17 00:00:00 2001 From: Disservin Date: Tue, 28 May 2024 18:00:22 +0200 Subject: [PATCH 063/315] apply const to prefetch parameter closes https://github.com/official-stockfish/Stockfish/pull/5296 No functional change --- src/misc.cpp | 6 +++--- src/misc.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/misc.cpp b/src/misc.cpp index 1abb81b1..58f80420 100644 --- a/src/misc.cpp +++ b/src/misc.cpp @@ -415,14 +415,14 @@ void start_logger(const std::string& fname) { Logger::start(fname); } #ifdef NO_PREFETCH -void prefetch(void*) {} +void prefetch(const void*) {} #else -void prefetch(void* addr) { +void prefetch(const void* addr) { #if defined(_MSC_VER) - _mm_prefetch((char*) addr, _MM_HINT_T0); + _mm_prefetch((char const*) addr, _MM_HINT_T0); #else __builtin_prefetch(addr); #endif diff --git a/src/misc.h b/src/misc.h index d75b236f..3a905dfa 100644 --- a/src/misc.h +++ b/src/misc.h @@ -40,7 +40,7 @@ std::string compiler_info(); // Preloads the given address in L1/L2 cache. This is a non-blocking // function that doesn't stall the CPU waiting for data to be loaded from memory, // which can be quite slow. -void prefetch(void* addr); +void prefetch(const void* addr); void start_logger(const std::string& fname); void* std_aligned_alloc(size_t alignment, size_t size); From a169c78b6d3b082068deb49a39aaa1fd75464c7f Mon Sep 17 00:00:00 2001 From: Tomasz Sobczyk Date: Fri, 17 May 2024 12:10:31 +0200 Subject: [PATCH 064/315] Improve performance on NUMA systems Allow for NUMA memory replication for NNUE weights. Bind threads to ensure execution on a specific NUMA node. This patch introduces NUMA memory replication, currently only utilized for the NNUE weights. Along with it comes all machinery required to identify NUMA nodes and bind threads to specific processors/nodes. It also comes with small changes to Thread and ThreadPool to allow easier execution of custom functions on the designated thread. Old thread binding (WinProcGroup) machinery is removed because it's incompatible with this patch. Small changes to unrelated parts of the code were made to ensure correctness, like some classes being made unmovable, raw pointers replaced with unique_ptr. etc. Windows 7 and Windows 10 is partially supported. Windows 11 is fully supported. Linux is fully supported, with explicit exclusion of Android. No additional dependencies. ----------------- A new UCI option `NumaPolicy` is introduced. It can take the following values: ``` system - gathers NUMA node information from the system (lscpu or windows api), for each threads binds it to a single NUMA node none - assumes there is 1 NUMA node, never binds threads auto - this is the default value, depends on the number of set threads and NUMA nodes, will only enable binding on multinode systems and when the number of threads reaches a threshold (dependent on node size and count) [[custom]] - // ':'-separated numa nodes // ','-separated cpu indices // supports "first-last" range syntax for cpu indices, for example '0-15,32-47:16-31,48-63' ``` Setting `NumaPolicy` forces recreation of the threads in the ThreadPool, which in turn forces the recreation of the TT. The threads are distributed among NUMA nodes in a round-robin fashion based on fill percentage (i.e. it will strive to fill all NUMA nodes evenly). Threads are bound to NUMA nodes, not specific processors, because that's our only requirement and the OS can schedule them better. Special care is made that maximum memory usage on systems that do not require memory replication stays as previously, that is, unnecessary copies are avoided. On linux the process' processor affinity is respected. This means that if you for example use taskset to restrict Stockfish to a single NUMA node then the `system` and `auto` settings will only see a single NUMA node (more precisely, the processors included in the current affinity mask) and act accordingly. ----------------- We can't ensure that a memory allocation takes place on a given NUMA node without using libnuma on linux, or using appropriate custom allocators on windows (https://learn.microsoft.com/en-us/windows/win32/memory/allocating-memory-from-a-numa-node), so to avoid complications the current implementation relies on first-touch policy. Due to this we also rely on the memory allocator to give us a new chunk of untouched memory from the system. This appears to work reliably on linux, but results may vary. MacOS is not supported, because AFAIK it's not affected, and implementation would be problematic anyway. Windows is supported since Windows 7 (https://learn.microsoft.com/en-us/windows/win32/api/processtopologyapi/nf-processtopologyapi-setthreadgroupaffinity). Until Windows 11/Server 2022 NUMA nodes are split such that they cannot span processor groups. This is because before Windows 11/Server 2022 it's not possible to set thread affinity spanning processor groups. The splitting is done manually in some cases (required after Windows 10 Build 20348). Since Windows 11/Server 2022 we can set affinites spanning processor group so this splitting is not done, so the behaviour is pretty much like on linux. Linux is supported, **without** libnuma requirement. `lscpu` is expected. ----------------- Passed 60+1 @ 256t 16000MB hash: https://tests.stockfishchess.org/tests/view/6654e443a86388d5e27db0d8 ``` LLR: 2.95 (-2.94,2.94) <0.00,10.00> Total: 278 W: 110 L: 29 D: 139 Ptnml(0-2): 0, 1, 56, 82, 0 ``` Passed SMP STC: https://tests.stockfishchess.org/tests/view/6654fc74a86388d5e27db1cd ``` LLR: 2.95 (-2.94,2.94) <-1.75,0.25> Total: 67152 W: 17354 L: 17177 D: 32621 Ptnml(0-2): 64, 7428, 18408, 7619, 57 ``` Passed STC: https://tests.stockfishchess.org/tests/view/6654fb27a86388d5e27db15c ``` LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 131648 W: 34155 L: 34045 D: 63448 Ptnml(0-2): 426, 13878, 37096, 14008, 416 ``` fixes #5253 closes https://github.com/official-stockfish/Stockfish/pull/5285 No functional change --- .github/ci/libcxx17.imp | 1 + src/Makefile | 2 +- src/engine.cpp | 88 +++- src/engine.h | 31 +- src/misc.cpp | 134 +----- src/misc.h | 56 ++- src/nnue/network.cpp | 42 ++ src/nnue/network.h | 6 + src/numa.h | 904 ++++++++++++++++++++++++++++++++++++++++ src/search.cpp | 41 +- src/search.h | 37 +- src/thread.cpp | 192 ++++++--- src/thread.h | 91 +++- src/tt.cpp | 29 +- src/tt.h | 5 +- src/uci.cpp | 42 +- src/uci.h | 3 + src/ucioption.cpp | 2 + src/ucioption.h | 1 + 19 files changed, 1418 insertions(+), 289 deletions(-) create mode 100644 src/numa.h diff --git a/.github/ci/libcxx17.imp b/.github/ci/libcxx17.imp index 7bdcf5bc..d3a262b5 100644 --- a/.github/ci/libcxx17.imp +++ b/.github/ci/libcxx17.imp @@ -7,6 +7,7 @@ { include: [ "<__fwd/sstream.h>", private, "", public ] }, { include: [ "<__fwd/streambuf.h>", private, "", public ] }, { include: [ "<__fwd/string_view.h>", private, "", public ] }, + { include: [ "<__system_error/errc.h>", private, "", public ] }, # Mappings for includes between public headers { include: [ "", public, "", public ] }, diff --git a/src/Makefile b/src/Makefile index 45f38b01..5119b615 100644 --- a/src/Makefile +++ b/src/Makefile @@ -63,7 +63,7 @@ HEADERS = benchmark.h bitboard.h evaluate.h misc.h movegen.h movepick.h \ nnue/layers/sqr_clipped_relu.h nnue/nnue_accumulator.h nnue/nnue_architecture.h \ nnue/nnue_common.h nnue/nnue_feature_transformer.h position.h \ search.h syzygy/tbprobe.h thread.h thread_win32_osx.h timeman.h \ - tt.h tune.h types.h uci.h ucioption.h perft.h nnue/network.h engine.h score.h + tt.h tune.h types.h uci.h ucioption.h perft.h nnue/network.h engine.h score.h numa.h OBJS = $(notdir $(SRCS:.cpp=.o)) diff --git a/src/engine.cpp b/src/engine.cpp index e8da24aa..3fc27223 100644 --- a/src/engine.cpp +++ b/src/engine.cpp @@ -18,15 +18,15 @@ #include "engine.h" +#include #include +#include #include #include +#include #include #include #include -#include -#include -#include #include "evaluate.h" #include "misc.h" @@ -48,10 +48,14 @@ constexpr auto StartFEN = "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - Engine::Engine(std::string path) : binaryDirectory(CommandLine::get_binary_directory(path)), + numaContext(NumaConfig::from_system()), states(new std::deque(1)), - networks(NN::Networks( - NN::NetworkBig({EvalFileDefaultNameBig, "None", ""}, NN::EmbeddedNNUEType::BIG), - NN::NetworkSmall({EvalFileDefaultNameSmall, "None", ""}, NN::EmbeddedNNUEType::SMALL))) { + threads(), + networks( + numaContext, + NN::Networks( + NN::NetworkBig({EvalFileDefaultNameBig, "None", ""}, NN::EmbeddedNNUEType::BIG), + NN::NetworkSmall({EvalFileDefaultNameSmall, "None", ""}, NN::EmbeddedNNUEType::SMALL))) { pos.set(StartFEN, false, &states->back()); capSq = SQ_NONE; } @@ -74,7 +78,7 @@ void Engine::stop() { threads.stop = true; } void Engine::search_clear() { wait_for_search_finished(); - tt.clear(options["Threads"]); + tt.clear(threads); threads.clear(); // @TODO wont work with multiple instances @@ -124,11 +128,35 @@ void Engine::set_position(const std::string& fen, const std::vector // modifiers -void Engine::resize_threads() { threads.set({options, threads, tt, networks}, updateContext); } +void Engine::set_numa_config_from_option(const std::string& o) { + if (o == "auto" || o == "system") + { + numaContext.set_numa_config(NumaConfig::from_system()); + } + else if (o == "none") + { + numaContext.set_numa_config(NumaConfig{}); + } + else + { + numaContext.set_numa_config(NumaConfig::from_string(o)); + } + + // Force reallocation of threads in case affinities need to change. + resize_threads(); +} + +void Engine::resize_threads() { + threads.wait_for_search_finished(); + threads.set(numaContext.get_numa_config(), {options, threads, tt, networks}, updateContext); + + // Reallocate the hash with the new threadpool size + set_tt_size(options["Hash"]); +} void Engine::set_tt_size(size_t mb) { wait_for_search_finished(); - tt.resize(mb, options["Threads"]); + tt.resize(mb, threads); } void Engine::set_ponderhit(bool b) { threads.main_manager()->ponder = b; } @@ -136,28 +164,35 @@ void Engine::set_ponderhit(bool b) { threads.main_manager()->ponder = b; } // network related void Engine::verify_networks() const { - networks.big.verify(options["EvalFile"]); - networks.small.verify(options["EvalFileSmall"]); + networks->big.verify(options["EvalFile"]); + networks->small.verify(options["EvalFileSmall"]); } void Engine::load_networks() { - load_big_network(options["EvalFile"]); - load_small_network(options["EvalFileSmall"]); + networks.modify_and_replicate([this](NN::Networks& networks_) { + networks_.big.load(binaryDirectory, options["EvalFile"]); + networks_.small.load(binaryDirectory, options["EvalFileSmall"]); + }); + threads.clear(); } void Engine::load_big_network(const std::string& file) { - networks.big.load(binaryDirectory, file); + networks.modify_and_replicate( + [this, &file](NN::Networks& networks_) { networks_.big.load(binaryDirectory, file); }); threads.clear(); } void Engine::load_small_network(const std::string& file) { - networks.small.load(binaryDirectory, file); + networks.modify_and_replicate( + [this, &file](NN::Networks& networks_) { networks_.small.load(binaryDirectory, file); }); threads.clear(); } void Engine::save_network(const std::pair, std::string> files[2]) { - networks.big.save(files[0].first); - networks.small.save(files[1].first); + networks.modify_and_replicate([&files](NN::Networks& networks_) { + networks_.big.save(files[0].first); + networks_.small.save(files[1].first); + }); } // utility functions @@ -169,7 +204,7 @@ void Engine::trace_eval() const { verify_networks(); - sync_cout << "\n" << Eval::trace(p, networks) << sync_endl; + sync_cout << "\n" << Eval::trace(p, *networks) << sync_endl; } OptionsMap& Engine::get_options() { return options; } @@ -184,4 +219,21 @@ std::string Engine::visualize() const { return ss.str(); } +std::vector> Engine::get_bound_thread_count_by_numa_node() const { + auto counts = threads.get_bound_thread_count_by_numa_node(); + const NumaConfig& cfg = numaContext.get_numa_config(); + std::vector> ratios; + NumaIndex n = 0; + for (; n < counts.size(); ++n) + ratios.emplace_back(counts[n], cfg.num_cpus_in_numa_node(n)); + if (!counts.empty()) + for (; n < cfg.num_numa_nodes(); ++n) + ratios.emplace_back(0, cfg.num_cpus_in_numa_node(n)); + return ratios; +} + +std::string Engine::get_numa_config_as_string() const { + return numaContext.get_numa_config().to_string(); +} + } diff --git a/src/engine.h b/src/engine.h index 64a814cb..91a8a96b 100644 --- a/src/engine.h +++ b/src/engine.h @@ -35,6 +35,7 @@ #include "thread.h" #include "tt.h" #include "ucioption.h" +#include "numa.h" namespace Stockfish { @@ -47,6 +48,13 @@ class Engine { using InfoIter = Search::InfoIteration; Engine(std::string path = ""); + + // Can't be movable due to components holding backreferences to fields + Engine(const Engine&) = delete; + Engine(Engine&&) = delete; + Engine& operator=(const Engine&) = delete; + Engine& operator=(Engine&&) = delete; + ~Engine() { wait_for_search_finished(); } std::uint64_t perft(const std::string& fen, Depth depth, bool isChess960); @@ -63,6 +71,7 @@ class Engine { // modifiers + void set_numa_config_from_option(const std::string& o); void resize_threads(); void set_tt_size(size_t mb); void set_ponderhit(bool); @@ -83,23 +92,27 @@ class Engine { // utility functions - void trace_eval() const; - OptionsMap& get_options(); - std::string fen() const; - void flip(); - std::string visualize() const; + void trace_eval() const; + OptionsMap& get_options(); + std::string fen() const; + void flip(); + std::string visualize() const; + std::vector> get_bound_thread_count_by_numa_node() const; + std::string get_numa_config_as_string() const; private: const std::string binaryDirectory; + NumaReplicationContext numaContext; + Position pos; StateListPtr states; Square capSq; - OptionsMap options; - ThreadPool threads; - TranspositionTable tt; - Eval::NNUE::Networks networks; + OptionsMap options; + ThreadPool threads; + TranspositionTable tt; + NumaReplicated networks; Search::SearchManager::UpdateContext updateContext; }; diff --git a/src/misc.cpp b/src/misc.cpp index 58f80420..d48b75e1 100644 --- a/src/misc.cpp +++ b/src/misc.cpp @@ -48,6 +48,7 @@ using fun8_t = bool (*)(HANDLE, BOOL, PTOKEN_PRIVILEGES, DWORD, PTOKEN_PRIVILEGE #endif #include +#include #include #include #include @@ -56,6 +57,7 @@ using fun8_t = bool (*)(HANDLE, BOOL, PTOKEN_PRIVILEGES, DWORD, PTOKEN_PRIVILEGE #include #include #include +#include #include "types.h" @@ -592,129 +594,6 @@ void aligned_large_pages_free(void* mem) { std_aligned_free(mem); } #endif -namespace WinProcGroup { - -#ifndef _WIN32 - -void bind_this_thread(size_t) {} - -#else - -namespace { -// Retrieves logical processor information using Windows-specific -// API and returns the best node id for the thread with index idx. Original -// code from Texel by Peter Österlund. -int best_node(size_t idx) { - - int threads = 0; - int nodes = 0; - int cores = 0; - DWORD returnLength = 0; - DWORD byteOffset = 0; - - // Early exit if the needed API is not available at runtime - HMODULE k32 = GetModuleHandle(TEXT("Kernel32.dll")); - auto fun1 = (fun1_t) (void (*)()) GetProcAddress(k32, "GetLogicalProcessorInformationEx"); - if (!fun1) - return -1; - - // First call to GetLogicalProcessorInformationEx() to get returnLength. - // We expect the call to fail due to null buffer. - if (fun1(RelationAll, nullptr, &returnLength)) - return -1; - - // Once we know returnLength, allocate the buffer - SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *buffer, *ptr; - ptr = buffer = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*) malloc(returnLength); - - // Second call to GetLogicalProcessorInformationEx(), now we expect to succeed - if (!fun1(RelationAll, buffer, &returnLength)) - { - free(buffer); - return -1; - } - - while (byteOffset < returnLength) - { - if (ptr->Relationship == RelationNumaNode) - nodes++; - - else if (ptr->Relationship == RelationProcessorCore) - { - cores++; - threads += (ptr->Processor.Flags == LTP_PC_SMT) ? 2 : 1; - } - - assert(ptr->Size); - byteOffset += ptr->Size; - ptr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*) (((char*) ptr) + ptr->Size); - } - - free(buffer); - - std::vector groups; - - // Run as many threads as possible on the same node until the core limit is - // reached, then move on to filling the next node. - for (int n = 0; n < nodes; n++) - for (int i = 0; i < cores / nodes; i++) - groups.push_back(n); - - // In case a core has more than one logical processor (we assume 2) and we - // still have threads to allocate, spread them evenly across available nodes. - for (int t = 0; t < threads - cores; t++) - groups.push_back(t % nodes); - - // If we still have more threads than the total number of logical processors - // then return -1 and let the OS to decide what to do. - return idx < groups.size() ? groups[idx] : -1; -} -} - - -// Sets the group affinity of the current thread -void bind_this_thread(size_t idx) { - - // Use only local variables to be thread-safe - int node = best_node(idx); - - if (node == -1) - return; - - // Early exit if the needed API are not available at runtime - HMODULE k32 = GetModuleHandle(TEXT("Kernel32.dll")); - auto fun2 = fun2_t((void (*)()) GetProcAddress(k32, "GetNumaNodeProcessorMaskEx")); - auto fun3 = fun3_t((void (*)()) GetProcAddress(k32, "SetThreadGroupAffinity")); - auto fun4 = fun4_t((void (*)()) GetProcAddress(k32, "GetNumaNodeProcessorMask2")); - auto fun5 = fun5_t((void (*)()) GetProcAddress(k32, "GetMaximumProcessorGroupCount")); - - if (!fun2 || !fun3) - return; - - if (!fun4 || !fun5) - { - GROUP_AFFINITY affinity; - if (fun2(node, &affinity)) // GetNumaNodeProcessorMaskEx - fun3(GetCurrentThread(), &affinity, nullptr); // SetThreadGroupAffinity - } - else - { - // If a numa node has more than one processor group, we assume they are - // sized equal and we spread threads evenly across the groups. - USHORT elements, returnedElements; - elements = fun5(); // GetMaximumProcessorGroupCount - GROUP_AFFINITY* affinity = (GROUP_AFFINITY*) malloc(elements * sizeof(GROUP_AFFINITY)); - if (fun4(node, affinity, elements, &returnedElements)) // GetNumaNodeProcessorMask2 - fun3(GetCurrentThread(), &affinity[idx % returnedElements], - nullptr); // SetThreadGroupAffinity - free(affinity); - } -} - -#endif - -} // namespace WinProcGroup - #ifdef _WIN32 #include #define GETCWD _getcwd @@ -723,6 +602,15 @@ void bind_this_thread(size_t idx) { #define GETCWD getcwd #endif +size_t str_to_size_t(const std::string& s) { + size_t value; + auto result = std::from_chars(s.data(), s.data() + s.size(), value); + + if (result.ec != std::errc()) + std::exit(EXIT_FAILURE); + + return value; +} std::string CommandLine::get_binary_directory(std::string argv0) { std::string pathSeparator; diff --git a/src/misc.h b/src/misc.h index 3a905dfa..99cbecfd 100644 --- a/src/misc.h +++ b/src/misc.h @@ -24,10 +24,12 @@ #include #include #include +#include #include #include #include #include +#include #define stringify2(x) #x #define stringify(x) stringify2(x) @@ -50,6 +52,8 @@ void* aligned_large_pages_alloc(size_t size); // nop if mem == nullptr void aligned_large_pages_free(void* mem); +size_t str_to_size_t(const std::string& s); + // Deleter for automating release of memory area template struct AlignedDeleter { @@ -73,6 +77,31 @@ using AlignedPtr = std::unique_ptr>; template using LargePagePtr = std::unique_ptr>; +struct PipeDeleter { + void operator()(FILE* file) const { + if (file != nullptr) + { + pclose(file); + } + } +}; + +#if defined(__linux__) + +inline std::optional get_system_command_output(const std::string& command) { + std::unique_ptr pipe(popen(command.c_str(), "r")); + if (!pipe) + return std::nullopt; + + std::string result; + char buffer[1024]; + while (fgets(buffer, sizeof(buffer), pipe.get()) != nullptr) + result += buffer; + + return result; +} + +#endif void dbg_hit_on(bool cond, int slot = 0); void dbg_mean_of(int64_t value, int slot = 0); @@ -88,6 +117,24 @@ inline TimePoint now() { .count(); } +inline std::vector split(const std::string& s, const std::string& delimiter) { + size_t begin = 0; + std::vector res; + + for (;;) + { + const size_t end = s.find(delimiter, begin); + if (end == std::string::npos) + break; + + res.emplace_back(s.substr(begin, end - begin)); + begin = end + delimiter.size(); + } + + res.emplace_back(s.substr(begin)); + + return res; +} enum SyncCout { IO_LOCK, @@ -194,15 +241,6 @@ inline uint64_t mul_hi64(uint64_t a, uint64_t b) { #endif } -// Under Windows it is not possible for a process to run on more than one -// logical processor group. This usually means being limited to using max 64 -// cores. To overcome this, some special platform-specific API should be -// called to set group affinity for each thread. Original code from Texel by -// Peter Österlund. -namespace WinProcGroup { -void bind_this_thread(size_t idx); -} - struct CommandLine { public: diff --git a/src/nnue/network.cpp b/src/nnue/network.cpp index de2c7eca..db864fcd 100644 --- a/src/nnue/network.cpp +++ b/src/nnue/network.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -123,6 +124,47 @@ bool write_parameters(std::ostream& stream, const T& reference) { } // namespace Detail +template +Network::Network(const Network& other) : + evalFile(other.evalFile), + embeddedType(other.embeddedType) { + if (other.featureTransformer) + { + Detail::initialize(featureTransformer); + *featureTransformer = *other.featureTransformer; + } + for (std::size_t i = 0; i < LayerStacks; ++i) + { + if (other.network[i]) + { + Detail::initialize(network[i]); + *(network[i]) = *(other.network[i]); + } + } +} + +template +Network& +Network::operator=(const Network& other) { + evalFile = other.evalFile; + embeddedType = other.embeddedType; + + if (other.featureTransformer) + { + Detail::initialize(featureTransformer); + *featureTransformer = *other.featureTransformer; + } + for (std::size_t i = 0; i < LayerStacks; ++i) + { + if (other.network[i]) + { + Detail::initialize(network[i]); + *(network[i]) = *(other.network[i]); + } + } + + return *this; +} template void Network::load(const std::string& rootDirectory, std::string evalfilePath) { diff --git a/src/nnue/network.h b/src/nnue/network.h index 23f56663..f0ccfafc 100644 --- a/src/nnue/network.h +++ b/src/nnue/network.h @@ -50,6 +50,12 @@ class Network { evalFile(file), embeddedType(type) {} + Network(const Network& other); + Network(Network&& other) = default; + + Network& operator=(const Network& other); + Network& operator=(Network&& other) = default; + void load(const std::string& rootDirectory, std::string evalfilePath); bool save(const std::optional& filename) const; diff --git a/src/numa.h b/src/numa.h new file mode 100644 index 00000000..c04292da --- /dev/null +++ b/src/numa.h @@ -0,0 +1,904 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifndef NUMA_H_INCLUDED +#define NUMA_H_INCLUDED + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// We support linux very well, but we explicitly do NOT support Android, partially because +// there are potential issues with `lscpu`, `popen` availability, and partially because +// there's no NUMA environments running Android and there probably won't be. +#if defined(__linux__) && !defined(__ANDROID__) + #if !defined(_GNU_SOURCE) + #define _GNU_SOURCE + #endif + #include +#elif defined(_WIN32) + +// On Windows each processor group can have up to 64 processors. +// https://learn.microsoft.com/en-us/windows/win32/procthread/processor-groups +static constexpr size_t WIN_PROCESSOR_GROUP_SIZE = 64; + + #if !defined(NOMINMAX) + #define NOMINMAX + #endif + #include + +// https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-setthreadselectedcpusetmasks +using SetThreadSelectedCpuSetMasks_t = BOOL (*)(HANDLE, PGROUP_AFFINITY, USHORT); + +// https://learn.microsoft.com/en-us/windows/win32/api/processtopologyapi/nf-processtopologyapi-setthreadgroupaffinity +using SetThreadGroupAffinity_t = BOOL (*)(HANDLE, const GROUP_AFFINITY*, PGROUP_AFFINITY); + +#endif + +#include "misc.h" + +namespace Stockfish { + +using CpuIndex = size_t; +using NumaIndex = size_t; + +inline const CpuIndex SYSTEM_THREADS_NB = + std::max(1, std::thread::hardware_concurrency()); + +// We want to abstract the purpose of storing the numa node index somewhat. +// Whoever is using this does not need to know the specifics of the replication +// machinery to be able to access NUMA replicated memory. +class NumaReplicatedAccessToken { + public: + NumaReplicatedAccessToken() : + n(0) {} + + explicit NumaReplicatedAccessToken(NumaIndex idx) : + n(idx) {} + + NumaIndex get_numa_index() const { return n; } + + private: + NumaIndex n; +}; + +// Designed as immutable, because there is no good reason to alter an already existing config +// in a way that doesn't require recreating it completely, and it would be complex and expensive +// to maintain class invariants. +// The CPU (processor) numbers always correspond to the actual numbering used by the system. +// NOTE: the numbering is only valid within the process, as for example on Windows +// every process gets a "virtualized" set of processors that respects the current affinity +// The NUMA node numbers MAY NOT correspond to the system's numbering of the NUMA nodes. +// In particular, empty nodes may be removed, or the user may create custom nodes. +// It is guaranteed that NUMA nodes are NOT empty, i.e. every node exposed by NumaConfig +// has at least one processor assigned. +// +// Until Stockfish doesn't support exceptions all places where an exception should be thrown +// are replaced by std::exit. +class NumaConfig { + public: + NumaConfig() : + highestCpuIndex(0), + customAffinity(false) { + const auto numCpus = SYSTEM_THREADS_NB; + add_cpu_range_to_node(NumaIndex{0}, CpuIndex{0}, numCpus - 1); + } + + static std::set get_process_affinity() { + std::set cpus; + + // For unsupported systems, or in case of a soft error, we may assume all processors + // are available for use. + [[maybe_unused]] auto set_to_all_cpus = [&]() { + for (CpuIndex c = 0; c < SYSTEM_THREADS_NB; ++c) + cpus.insert(c); + }; + +#if defined(__linux__) && !defined(__ANDROID__) + + // cpu_set_t by default holds 1024 entries. This may not be enough soon, + // but there is no easy way to determine how many threads there actually is. + // In this case we just choose a reasonable upper bound. + static constexpr CpuIndex MaxNumCpus = 1024 * 64; + + cpu_set_t* mask = CPU_ALLOC(MaxNumCpus); + if (mask == nullptr) + std::exit(EXIT_FAILURE); + + const size_t masksize = CPU_ALLOC_SIZE(MaxNumCpus); + + CPU_ZERO_S(masksize, mask); + + const int status = sched_getaffinity(0, masksize, mask); + + if (status != 0) + { + CPU_FREE(mask); + std::exit(EXIT_FAILURE); + } + + for (CpuIndex c = 0; c < MaxNumCpus; ++c) + if (CPU_ISSET_S(c, masksize, mask)) + cpus.insert(c); + + CPU_FREE(mask); + +#elif defined(_WIN32) + + // Windows is problematic and weird due to multiple ways of setting affinity, processor groups, + // and behaviour changes between versions. It's unclear if we can support this feature + // on Windows in the same way we do on Linux. + // Apparently when affinity is set via either start /affinity or msys2 taskset + // the function GetNumaProcessorNodeEx completely disregards the processors that we do not + // have affinity more. Moreover, the indices are shifted to start from 0, indicating that Windows + // is providing a whole new mapping of processors to this process. This is problematic in some cases + // but it at least allows us to [probably] support this affinity restriction feature by default. + // So overall, Windows appears to "virtualize" a set of processors and processor groups for every + // process. It's unclear if this assignment can change while the process is running. + // std::thread::hardware_concurrency() returns the number of processors that's consistent + // with GetNumaProcessorNodeEx, so we can just add all of them. + + set_to_all_cpus(); + +#else + + // For other systems we assume the process is allowed to execute on all processors. + set_to_all_cpus(); + +#endif + + return cpus; + } + + // This function queries the system for the mapping of processors to NUMA nodes. + // On Linux we utilize `lscpu` to avoid libnuma. + // On Windows we utilize GetNumaProcessorNodeEx, which has its quirks, see + // comment for Windows implementation of get_process_affinity + static NumaConfig from_system(bool respectProcessAffinity = true) { + NumaConfig cfg = empty(); + + std::set allowedCpus; + + if (respectProcessAffinity) + allowedCpus = get_process_affinity(); + else + { + for (CpuIndex c = 0; c < SYSTEM_THREADS_NB; ++c) + allowedCpus.insert(c); + } + + auto is_cpu_allowed = [&](CpuIndex c) { return allowedCpus.count(c) == 1; }; + +#if defined(__linux__) && !defined(__ANDROID__) + + // On Linux things are straightforward, since there's no processor groups and + // any thread can be scheduled on all processors. + // This command produces output in the following form + // CPU NODE + // 0 0 + // 1 0 + // 2 1 + // 3 1 + // + // On some systems it may use '-' to signify no NUMA node, in which case we assume it's in node 0. + auto lscpuOpt = get_system_command_output("lscpu -e=cpu,node"); + if (lscpuOpt.has_value()) + { + + std::istringstream ss(*lscpuOpt); + + // skip the list header + ss.ignore(std::numeric_limits::max(), '\n'); + + while (true) + { + CpuIndex c; + NumaIndex n; + + ss >> c; + + if (!ss) + break; + + ss >> n; + + if (!ss) + { + ss.clear(); + std::string dummy; + ss >> dummy; + n = 0; + } + + if (is_cpu_allowed(c)) + cfg.add_cpu_to_node(n, c); + } + } + else + { + for (CpuIndex c = 0; c < SYSTEM_THREADS_NB; ++c) + if (is_cpu_allowed(c)) + cfg.add_cpu_to_node(NumaIndex{0}, c); + } + +#elif defined(_WIN32) + + // Since Windows 11 and Windows Server 2022 thread affinities can span + // processor groups and can be set as such by a new WinAPI function. + static const bool CanAffinitySpanProcessorGroups = []() { + HMODULE k32 = GetModuleHandle(TEXT("Kernel32.dll")); + auto SetThreadSelectedCpuSetMasks_f = SetThreadSelectedCpuSetMasks_t( + (void (*)()) GetProcAddress(k32, "SetThreadSelectedCpuSetMasks")); + return SetThreadSelectedCpuSetMasks_f != nullptr; + }(); + + WORD numProcGroups = GetActiveProcessorGroupCount(); + for (WORD procGroup = 0; procGroup < numProcGroups; ++procGroup) + { + for (BYTE number = 0; number < WIN_PROCESSOR_GROUP_SIZE; ++number) + { + PROCESSOR_NUMBER procnum; + procnum.Group = procGroup; + procnum.Number = number; + procnum.Reserved = 0; + USHORT nodeNumber; + + // When start /affinity or taskset was used to run this process with restricted affinity + // GetNumaProcessorNodeEx will NOT correspond to the system's processor setup, instead + // it appears to follow a completely new processor assignment, made specifically for this process, + // in which processors that this process has affinity for are remapped, and only those are remapped, + // to form a new set of processors. In other words, we can only get processors + // which we have affinity for this way. This means that the behaviour for + // `respectProcessAffinity == false` may be unexpected when affinity is set from outside, + // while the behaviour for `respectProcessAffinity == true` is given by default. + const BOOL status = GetNumaProcessorNodeEx(&procnum, &nodeNumber); + const CpuIndex c = static_cast(procGroup) * WIN_PROCESSOR_GROUP_SIZE + + static_cast(number); + if (status != 0 && nodeNumber != std::numeric_limits::max() + && is_cpu_allowed(c)) + { + cfg.add_cpu_to_node(nodeNumber, c); + } + } + } + + // Split the NUMA nodes to be contained within a group if necessary. + // This is needed between Windows 10 Build 20348 and Windows 11, because + // the new NUMA allocation behaviour was introduced while there was + // still no way to set thread affinity spanning multiple processor groups. + // See https://learn.microsoft.com/en-us/windows/win32/procthread/numa-support + if (!CanAffinitySpanProcessorGroups) + { + NumaConfig splitCfg = empty(); + + NumaIndex splitNodeIndex = 0; + for (const auto& cpus : cfg.nodes) + { + if (cpus.empty()) + continue; + + size_t lastProcGroupIndex = *(cpus.begin()) / WIN_PROCESSOR_GROUP_SIZE; + for (CpuIndex c : cpus) + { + const size_t procGroupIndex = c / WIN_PROCESSOR_GROUP_SIZE; + if (procGroupIndex != lastProcGroupIndex) + { + splitNodeIndex += 1; + lastProcGroupIndex = procGroupIndex; + } + splitCfg.add_cpu_to_node(splitNodeIndex, c); + } + splitNodeIndex += 1; + } + + cfg = std::move(splitCfg); + } + +#else + + // Fallback for unsupported systems. + for (CpuIndex c = 0; c < SYSTEM_THREADS_NB; ++c) + if (is_cpu_allowed(c)) + cfg.add_cpu_to_node(NumaIndex{0}, c); + +#endif + + // We have to ensure no empty NUMA nodes persist. + cfg.remove_empty_numa_nodes(); + + return cfg; + } + + // ':'-separated numa nodes + // ','-separated cpu indices + // supports "first-last" range syntax for cpu indices + // For example "0-15,128-143:16-31,144-159:32-47,160-175:48-63,176-191" + static NumaConfig from_string(const std::string& s) { + NumaConfig cfg = empty(); + + NumaIndex n = 0; + for (auto&& nodeStr : split(s, ":")) + { + bool addedAnyCpuInThisNode = false; + + for (const std::string& cpuStr : split(nodeStr, ",")) + { + if (cpuStr.empty()) + continue; + + auto parts = split(cpuStr, "-"); + if (parts.size() == 1) + { + const CpuIndex c = CpuIndex{str_to_size_t(parts[0])}; + if (!cfg.add_cpu_to_node(n, c)) + std::exit(EXIT_FAILURE); + } + else if (parts.size() == 2) + { + const CpuIndex cfirst = CpuIndex{str_to_size_t(parts[0])}; + const CpuIndex clast = CpuIndex{str_to_size_t(parts[1])}; + + if (!cfg.add_cpu_range_to_node(n, cfirst, clast)) + std::exit(EXIT_FAILURE); + } + else + { + std::exit(EXIT_FAILURE); + } + + addedAnyCpuInThisNode = true; + } + + if (addedAnyCpuInThisNode) + n += 1; + } + + cfg.customAffinity = true; + + return cfg; + } + + NumaConfig(const NumaConfig&) = delete; + NumaConfig(NumaConfig&&) = default; + NumaConfig& operator=(const NumaConfig&) = delete; + NumaConfig& operator=(NumaConfig&&) = default; + + bool is_cpu_assigned(CpuIndex n) const { return nodeByCpu.count(n) == 1; } + + NumaIndex num_numa_nodes() const { return nodes.size(); } + + CpuIndex num_cpus_in_numa_node(NumaIndex n) const { + assert(n < nodes.size()); + return nodes[n].size(); + } + + CpuIndex num_cpus() const { return nodeByCpu.size(); } + + bool requires_memory_replication() const { return customAffinity || nodes.size() > 1; } + + std::string to_string() const { + std::string str; + + bool isFirstNode = true; + for (auto&& cpus : nodes) + { + if (!isFirstNode) + str += ":"; + + bool isFirstSet = true; + auto rangeStart = cpus.begin(); + for (auto it = cpus.begin(); it != cpus.end(); ++it) + { + auto next = std::next(it); + if (next == cpus.end() || *next != *it + 1) + { + // cpus[i] is at the end of the range (may be of size 1) + if (!isFirstSet) + str += ","; + + const CpuIndex last = *it; + + if (it != rangeStart) + { + const CpuIndex first = *rangeStart; + + str += std::to_string(first); + str += "-"; + str += std::to_string(last); + } + else + str += std::to_string(last); + + rangeStart = next; + isFirstSet = false; + } + } + + isFirstNode = false; + } + + return str; + } + + bool suggests_binding_threads(CpuIndex numThreads) const { + // If we can reasonably determine that the threads can't be contained + // by the OS within the first NUMA node then we advise distributing + // and binding threads. When the threads are not bound we can only use + // NUMA memory replicated objects from the first node, so when the OS + // has to schedule on other nodes we lose performance. + // We also suggest binding if there's enough threads to distribute among nodes + // with minimal disparity. + // We try to ignore small nodes, in particular the empty ones. + + // If the affinity set by the user does not match the affinity given by the OS + // then binding is necessary to ensure the threads are running on correct processors. + if (customAffinity) + return true; + + // We obviously can't distribute a single thread, so a single thread should never be bound. + if (numThreads <= 1) + return false; + + size_t largestNodeSize = 0; + for (auto&& cpus : nodes) + if (cpus.size() > largestNodeSize) + largestNodeSize = cpus.size(); + + auto is_node_small = [largestNodeSize](const std::set& node) { + static constexpr double SmallNodeThreshold = 0.6; + return static_cast(node.size()) / static_cast(largestNodeSize) + <= SmallNodeThreshold; + }; + + size_t numNotSmallNodes = 0; + for (auto&& cpus : nodes) + if (!is_node_small(cpus)) + numNotSmallNodes += 1; + + return (numThreads > largestNodeSize / 2 || numThreads >= numNotSmallNodes * 4) + && nodes.size() > 1; + } + + std::vector distribute_threads_among_numa_nodes(CpuIndex numThreads) const { + std::vector ns; + + if (nodes.size() == 1) + { + // special case for when there's no NUMA nodes + // doesn't buy us much, but let's keep the default path simple + ns.resize(numThreads, NumaIndex{0}); + } + else + { + std::vector occupation(nodes.size(), 0); + for (CpuIndex c = 0; c < numThreads; ++c) + { + NumaIndex bestNode{0}; + float bestNodeFill = std::numeric_limits::max(); + for (NumaIndex n = 0; n < nodes.size(); ++n) + { + float fill = + static_cast(occupation[n] + 1) / static_cast(nodes[n].size()); + // NOTE: Do we want to perhaps fill the first available node up to 50% first before considering other nodes? + // Probably not, because it would interfere with running multiple instances. We basically shouldn't + // favor any particular node. + if (fill < bestNodeFill) + { + bestNode = n; + bestNodeFill = fill; + } + } + ns.emplace_back(bestNode); + occupation[bestNode] += 1; + } + } + + return ns; + } + + NumaReplicatedAccessToken bind_current_thread_to_numa_node(NumaIndex n) const { + if (n >= nodes.size() || nodes[n].size() == 0) + std::exit(EXIT_FAILURE); + +#if defined(__linux__) && !defined(__ANDROID__) + + cpu_set_t* mask = CPU_ALLOC(highestCpuIndex + 1); + if (mask == nullptr) + std::exit(EXIT_FAILURE); + + const size_t masksize = CPU_ALLOC_SIZE(highestCpuIndex + 1); + + CPU_ZERO_S(masksize, mask); + + for (CpuIndex c : nodes[n]) + CPU_SET_S(c, masksize, mask); + + const int status = sched_setaffinity(0, masksize, mask); + + CPU_FREE(mask); + + if (status != 0) + std::exit(EXIT_FAILURE); + + // We yield this thread just to be sure it gets rescheduled. + // This is defensive, allowed because this code is not performance critical. + sched_yield(); + +#elif defined(_WIN32) + + // Requires Windows 11. No good way to set thread affinity spanning processor groups before that. + HMODULE k32 = GetModuleHandle(TEXT("Kernel32.dll")); + auto SetThreadSelectedCpuSetMasks_f = SetThreadSelectedCpuSetMasks_t( + (void (*)()) GetProcAddress(k32, "SetThreadSelectedCpuSetMasks")); + auto SetThreadGroupAffinity_f = + SetThreadGroupAffinity_t((void (*)()) GetProcAddress(k32, "SetThreadGroupAffinity")); + + if (SetThreadSelectedCpuSetMasks_f != nullptr) + { + // Only available on Windows 11 and Windows Server 2022 onwards. + const USHORT numProcGroups = + ((highestCpuIndex + 1) + WIN_PROCESSOR_GROUP_SIZE - 1) / WIN_PROCESSOR_GROUP_SIZE; + auto groupAffinities = std::make_unique(numProcGroups); + std::memset(groupAffinities.get(), 0, sizeof(GROUP_AFFINITY) * numProcGroups); + for (WORD i = 0; i < numProcGroups; ++i) + groupAffinities[i].Group = i; + + for (CpuIndex c : nodes[n]) + { + const size_t procGroupIndex = c / WIN_PROCESSOR_GROUP_SIZE; + const size_t idxWithinProcGroup = c % WIN_PROCESSOR_GROUP_SIZE; + groupAffinities[procGroupIndex].Mask |= KAFFINITY(1) << idxWithinProcGroup; + } + + HANDLE hThread = GetCurrentThread(); + + const BOOL status = + SetThreadSelectedCpuSetMasks_f(hThread, groupAffinities.get(), numProcGroups); + if (status == 0) + std::exit(EXIT_FAILURE); + + // We yield this thread just to be sure it gets rescheduled. + // This is defensive, allowed because this code is not performance critical. + SwitchToThread(); + } + else if (SetThreadGroupAffinity_f != nullptr) + { + // On earlier windows version (since windows 7) we can't run a single thread + // on multiple processor groups, so we need to restrict the group. + // We assume the group of the first processor listed for this node. + // Processors from outside this group will not be assigned for this thread. + // Normally this won't be an issue because windows used to assign NUMA nodes + // such that they can't span processor groups. However, since Windows 10 Build 20348 + // the behaviour changed, so there's a small window of versions between this and Windows 11 + // that might exhibit problems with not all processors being utilized. + // We handle this in NumaConfig::from_system by manually splitting the nodes when + // we detect that there's no function to set affinity spanning processor nodes. + // This is required because otherwise our thread distribution code may produce + // suboptimal results. + // See https://learn.microsoft.com/en-us/windows/win32/procthread/numa-support + GROUP_AFFINITY affinity; + std::memset(&affinity, 0, sizeof(GROUP_AFFINITY)); + affinity.Group = static_cast(n); + // We use an ordered set so we're guaranteed to get the smallest cpu number here. + const size_t forcedProcGroupIndex = *(nodes[n].begin()) / WIN_PROCESSOR_GROUP_SIZE; + for (CpuIndex c : nodes[n]) + { + const size_t procGroupIndex = c / WIN_PROCESSOR_GROUP_SIZE; + const size_t idxWithinProcGroup = c % WIN_PROCESSOR_GROUP_SIZE; + // We skip processors that are not in the same proccessor group. + // If everything was set up correctly this will never be an issue, + // but we have to account for bad NUMA node specification. + if (procGroupIndex != forcedProcGroupIndex) + continue; + + affinity.Mask |= KAFFINITY(1) << idxWithinProcGroup; + } + + HANDLE hThread = GetCurrentThread(); + + const BOOL status = SetThreadGroupAffinity_f(hThread, &affinity, nullptr); + if (status == 0) + std::exit(EXIT_FAILURE); + + // We yield this thread just to be sure it gets rescheduled. + // This is defensive, allowed because this code is not performance critical. + SwitchToThread(); + } + +#endif + + return NumaReplicatedAccessToken(n); + } + + template + void execute_on_numa_node(NumaIndex n, FuncT&& f) const { + std::thread th([this, &f, n]() { + bind_current_thread_to_numa_node(n); + std::forward(f)(); + }); + + th.join(); + } + + private: + std::vector> nodes; + std::map nodeByCpu; + CpuIndex highestCpuIndex; + + bool customAffinity; + + static NumaConfig empty() { return NumaConfig(EmptyNodeTag{}); } + + struct EmptyNodeTag {}; + + NumaConfig(EmptyNodeTag) : + highestCpuIndex(0), + customAffinity(false) {} + + void remove_empty_numa_nodes() { + std::vector> newNodes; + for (auto&& cpus : nodes) + if (!cpus.empty()) + newNodes.emplace_back(std::move(cpus)); + nodes = std::move(newNodes); + } + + // Returns true if successful + // Returns false if failed, i.e. when the cpu is already present + // strong guarantee, the structure remains unmodified + bool add_cpu_to_node(NumaIndex n, CpuIndex c) { + if (is_cpu_assigned(c)) + return false; + + while (nodes.size() <= n) + nodes.emplace_back(); + + nodes[n].insert(c); + nodeByCpu[c] = n; + + if (c > highestCpuIndex) + highestCpuIndex = c; + + return true; + } + + // Returns true if successful + // Returns false if failed, i.e. when any of the cpus is already present + // strong guarantee, the structure remains unmodified + bool add_cpu_range_to_node(NumaIndex n, CpuIndex cfirst, CpuIndex clast) { + for (CpuIndex c = cfirst; c <= clast; ++c) + if (is_cpu_assigned(c)) + return false; + + while (nodes.size() <= n) + nodes.emplace_back(); + + for (CpuIndex c = cfirst; c <= clast; ++c) + { + nodes[n].insert(c); + nodeByCpu[c] = n; + } + + if (clast > highestCpuIndex) + highestCpuIndex = clast; + + return true; + } +}; + +class NumaReplicationContext; + +// Instances of this class are tracked by the NumaReplicationContext instance +// NumaReplicationContext informs all tracked instances whenever NUMA configuration changes. +class NumaReplicatedBase { + public: + NumaReplicatedBase(NumaReplicationContext& ctx); + + NumaReplicatedBase(const NumaReplicatedBase&) = delete; + NumaReplicatedBase(NumaReplicatedBase&& other) noexcept; + + NumaReplicatedBase& operator=(const NumaReplicatedBase&) = delete; + NumaReplicatedBase& operator=(NumaReplicatedBase&& other) noexcept; + + virtual void on_numa_config_changed() = 0; + virtual ~NumaReplicatedBase(); + + const NumaConfig& get_numa_config() const; + + private: + NumaReplicationContext* context; +}; + +// We force boxing with a unique_ptr. If this becomes an issue due to added indirection we +// may need to add an option for a custom boxing type. +// When the NUMA config changes the value stored at the index 0 is replicated to other nodes. +template +class NumaReplicated: public NumaReplicatedBase { + public: + using ReplicatorFuncType = std::function; + + NumaReplicated(NumaReplicationContext& ctx) : + NumaReplicatedBase(ctx) { + replicate_from(T{}); + } + + NumaReplicated(NumaReplicationContext& ctx, T&& source) : + NumaReplicatedBase(ctx) { + replicate_from(std::move(source)); + } + + NumaReplicated(const NumaReplicated&) = delete; + NumaReplicated(NumaReplicated&& other) noexcept : + NumaReplicatedBase(std::move(other)), + instances(std::exchange(other.instances, {})) {} + + NumaReplicated& operator=(const NumaReplicated&) = delete; + NumaReplicated& operator=(NumaReplicated&& other) noexcept { + NumaReplicatedBase::operator=(*this, std::move(other)); + instances = std::exchange(other.instances, {}); + + return *this; + } + + NumaReplicated& operator=(T&& source) { + replicate_from(std::move(source)); + + return *this; + } + + ~NumaReplicated() override = default; + + const T& operator[](NumaReplicatedAccessToken token) const { + assert(token.get_numa_index() < instances.size()); + return *(instances[token.get_numa_index()]); + } + + const T& operator*() const { return *(instances[0]); } + + const T* operator->() const { return instances[0].get(); } + + template + void modify_and_replicate(FuncT&& f) { + auto source = std::move(instances[0]); + std::forward(f)(*source); + replicate_from(std::move(*source)); + } + + void on_numa_config_changed() override { + // Use the first one as the source. It doesn't matter which one we use, because they all must + // be identical, but the first one is guaranteed to exist. + auto source = std::move(instances[0]); + replicate_from(std::move(*source)); + } + + private: + std::vector> instances; + + void replicate_from(T&& source) { + instances.clear(); + + const NumaConfig& cfg = get_numa_config(); + if (cfg.requires_memory_replication()) + { + for (NumaIndex n = 0; n < cfg.num_numa_nodes(); ++n) + { + cfg.execute_on_numa_node( + n, [this, &source]() { instances.emplace_back(std::make_unique(source)); }); + } + } + else + { + assert(cfg.num_numa_nodes() == 1); + // We take advantage of the fact that replication is not required + // and reuse the source value, avoiding one copy operation. + instances.emplace_back(std::make_unique(std::move(source))); + } + } +}; + +class NumaReplicationContext { + public: + NumaReplicationContext(NumaConfig&& cfg) : + config(std::move(cfg)) {} + + NumaReplicationContext(const NumaReplicationContext&) = delete; + NumaReplicationContext(NumaReplicationContext&&) = delete; + + NumaReplicationContext& operator=(const NumaReplicationContext&) = delete; + NumaReplicationContext& operator=(NumaReplicationContext&&) = delete; + + ~NumaReplicationContext() { + // The context must outlive replicated objects + if (!trackedReplicatedObjects.empty()) + std::exit(EXIT_FAILURE); + } + + void attach(NumaReplicatedBase* obj) { + assert(trackedReplicatedObjects.count(obj) == 0); + trackedReplicatedObjects.insert(obj); + } + + void detach(NumaReplicatedBase* obj) { + assert(trackedReplicatedObjects.count(obj) == 1); + trackedReplicatedObjects.erase(obj); + } + + // oldObj may be invalid at this point + void move_attached([[maybe_unused]] NumaReplicatedBase* oldObj, NumaReplicatedBase* newObj) { + assert(trackedReplicatedObjects.count(oldObj) == 1); + assert(trackedReplicatedObjects.count(newObj) == 0); + trackedReplicatedObjects.erase(oldObj); + trackedReplicatedObjects.insert(newObj); + } + + void set_numa_config(NumaConfig&& cfg) { + config = std::move(cfg); + for (auto&& obj : trackedReplicatedObjects) + obj->on_numa_config_changed(); + } + + const NumaConfig& get_numa_config() const { return config; } + + private: + NumaConfig config; + + // std::set uses std::less by default, which is required for pointer comparison to be defined. + std::set trackedReplicatedObjects; +}; + +inline NumaReplicatedBase::NumaReplicatedBase(NumaReplicationContext& ctx) : + context(&ctx) { + context->attach(this); +} + +inline NumaReplicatedBase::NumaReplicatedBase(NumaReplicatedBase&& other) noexcept : + context(std::exchange(other.context, nullptr)) { + context->move_attached(&other, this); +} + +inline NumaReplicatedBase& NumaReplicatedBase::operator=(NumaReplicatedBase&& other) noexcept { + context = std::exchange(other.context, nullptr); + + context->move_attached(&other, this); + + return *this; +} + +inline NumaReplicatedBase::~NumaReplicatedBase() { + if (context != nullptr) + context->detach(this); +} + +inline const NumaConfig& NumaReplicatedBase::get_numa_config() const { + return context->get_numa_config(); +} + +} // namespace Stockfish + + +#endif // #ifndef NUMA_H_INCLUDED diff --git a/src/search.cpp b/src/search.cpp index 0dbc6a3a..c074e342 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -137,15 +137,17 @@ void update_all_stats(const Position& pos, Search::Worker::Worker(SharedState& sharedState, std::unique_ptr sm, - size_t thread_id) : + size_t thread_id, + NumaReplicatedAccessToken token) : // Unpack the SharedState struct into member variables thread_idx(thread_id), + numaAccessToken(token), manager(std::move(sm)), options(sharedState.options), threads(sharedState.threads), tt(sharedState.tt), networks(sharedState.networks), - refreshTable(networks) { + refreshTable(networks[token]) { clear(); } @@ -428,7 +430,7 @@ void Search::Worker::iterative_deepening() { skill.pick_best(rootMoves, multiPV); // Use part of the gained time from a previous stable move for the current move - for (Thread* th : threads) + for (auto&& th : threads) { totBestMoveChanges += th->worker->bestMoveChanges; th->worker->bestMoveChanges = 0; @@ -510,7 +512,7 @@ void Search::Worker::clear() { for (size_t i = 1; i < reductions.size(); ++i) reductions[i] = int((19.90 + std::log(size_t(options["Threads"])) / 2) * std::log(i)); - refreshTable.clear(networks); + refreshTable.clear(networks[numaAccessToken]); } @@ -576,9 +578,9 @@ Value Search::Worker::search( // Step 2. Check for aborted search and immediate draw if (threads.stop.load(std::memory_order_relaxed) || pos.is_draw(ss->ply) || ss->ply >= MAX_PLY) - return (ss->ply >= MAX_PLY && !ss->inCheck) - ? evaluate(networks, pos, refreshTable, thisThread->optimism[us]) - : value_draw(thisThread->nodes); + return (ss->ply >= MAX_PLY && !ss->inCheck) ? evaluate( + networks[numaAccessToken], pos, refreshTable, thisThread->optimism[us]) + : value_draw(thisThread->nodes); // Step 3. Mate distance pruning. Even if we mate at the next move our score // would be at best mate_in(ss->ply + 1), but if alpha is already bigger because @@ -706,7 +708,7 @@ Value Search::Worker::search( { // Providing the hint that this node's accumulator will be used often // brings significant Elo gain (~13 Elo). - Eval::NNUE::hint_common_parent_position(pos, networks, refreshTable); + Eval::NNUE::hint_common_parent_position(pos, networks[numaAccessToken], refreshTable); unadjustedStaticEval = eval = ss->staticEval; } else if (ss->ttHit) @@ -714,9 +716,10 @@ Value Search::Worker::search( // Never assume anything about values stored in TT unadjustedStaticEval = tte->eval(); if (unadjustedStaticEval == VALUE_NONE) - unadjustedStaticEval = evaluate(networks, pos, refreshTable, thisThread->optimism[us]); + unadjustedStaticEval = + evaluate(networks[numaAccessToken], pos, refreshTable, thisThread->optimism[us]); else if (PvNode) - Eval::NNUE::hint_common_parent_position(pos, networks, refreshTable); + Eval::NNUE::hint_common_parent_position(pos, networks[numaAccessToken], refreshTable); ss->staticEval = eval = to_corrected_static_eval(unadjustedStaticEval, *thisThread, pos); @@ -726,7 +729,8 @@ Value Search::Worker::search( } else { - unadjustedStaticEval = evaluate(networks, pos, refreshTable, thisThread->optimism[us]); + unadjustedStaticEval = + evaluate(networks[numaAccessToken], pos, refreshTable, thisThread->optimism[us]); ss->staticEval = eval = to_corrected_static_eval(unadjustedStaticEval, *thisThread, pos); // Static evaluation is saved as it was before adjustment by correction history @@ -892,7 +896,7 @@ Value Search::Worker::search( } } - Eval::NNUE::hint_common_parent_position(pos, networks, refreshTable); + Eval::NNUE::hint_common_parent_position(pos, networks[numaAccessToken], refreshTable); } moves_loop: // When in check, search starts here @@ -1441,7 +1445,7 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta, // Step 2. Check for an immediate draw or maximum ply reached if (pos.is_draw(ss->ply) || ss->ply >= MAX_PLY) return (ss->ply >= MAX_PLY && !ss->inCheck) - ? evaluate(networks, pos, refreshTable, thisThread->optimism[us]) + ? evaluate(networks[numaAccessToken], pos, refreshTable, thisThread->optimism[us]) : VALUE_DRAW; assert(0 <= ss->ply && ss->ply < MAX_PLY); @@ -1476,7 +1480,7 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta, unadjustedStaticEval = tte->eval(); if (unadjustedStaticEval == VALUE_NONE) unadjustedStaticEval = - evaluate(networks, pos, refreshTable, thisThread->optimism[us]); + evaluate(networks[numaAccessToken], pos, refreshTable, thisThread->optimism[us]); ss->staticEval = bestValue = to_corrected_static_eval(unadjustedStaticEval, *thisThread, pos); @@ -1488,10 +1492,11 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta, else { // In case of null move search, use previous static eval with a different sign - unadjustedStaticEval = (ss - 1)->currentMove != Move::null() - ? evaluate(networks, pos, refreshTable, thisThread->optimism[us]) - : -(ss - 1)->staticEval; - ss->staticEval = bestValue = + unadjustedStaticEval = + (ss - 1)->currentMove != Move::null() + ? evaluate(networks[numaAccessToken], pos, refreshTable, thisThread->optimism[us]) + : -(ss - 1)->staticEval; + ss->staticEval = bestValue = to_corrected_static_eval(unadjustedStaticEval, *thisThread, pos); } diff --git a/src/search.h b/src/search.h index 6e5b22bd..a61f253c 100644 --- a/src/search.h +++ b/src/search.h @@ -32,19 +32,17 @@ #include "misc.h" #include "movepick.h" +#include "nnue/network.h" +#include "nnue/nnue_accumulator.h" +#include "numa.h" #include "position.h" #include "score.h" #include "syzygy/tbprobe.h" #include "timeman.h" #include "types.h" -#include "nnue/nnue_accumulator.h" namespace Stockfish { -namespace Eval::NNUE { -struct Networks; -} - // Different node types, used as a template parameter enum NodeType { NonPV, @@ -133,19 +131,19 @@ struct LimitsType { // The UCI stores the uci options, thread pool, and transposition table. // This struct is used to easily forward data to the Search::Worker class. struct SharedState { - SharedState(const OptionsMap& optionsMap, - ThreadPool& threadPool, - TranspositionTable& transpositionTable, - const Eval::NNUE::Networks& nets) : + SharedState(const OptionsMap& optionsMap, + ThreadPool& threadPool, + TranspositionTable& transpositionTable, + const NumaReplicated& nets) : options(optionsMap), threads(threadPool), tt(transpositionTable), networks(nets) {} - const OptionsMap& options; - ThreadPool& threads; - TranspositionTable& tt; - const Eval::NNUE::Networks& networks; + const OptionsMap& options; + ThreadPool& threads; + TranspositionTable& tt; + const NumaReplicated& networks; }; class Worker; @@ -236,7 +234,7 @@ class NullSearchManager: public ISearchManager { // of the search history, and storing data required for the search. class Worker { public: - Worker(SharedState&, std::unique_ptr, size_t); + Worker(SharedState&, std::unique_ptr, size_t, NumaReplicatedAccessToken); // Called at instantiation to initialize Reductions tables // Reset histories, usually before a new game @@ -293,7 +291,8 @@ class Worker { Depth rootDepth, completedDepth; Value rootDelta; - size_t thread_idx; + size_t thread_idx; + NumaReplicatedAccessToken numaAccessToken; // Reductions lookup table initialized at startup std::array reductions; // [depth or moveNumber] @@ -303,10 +302,10 @@ class Worker { Tablebases::Config tbConfig; - const OptionsMap& options; - ThreadPool& threads; - TranspositionTable& tt; - const Eval::NNUE::Networks& networks; + const OptionsMap& options; + ThreadPool& threads; + TranspositionTable& tt; + const NumaReplicated& networks; // Used by NNUE Eval::NNUE::AccumulatorCaches refreshTable; diff --git a/src/thread.cpp b/src/thread.cpp index 8724cb49..5893f4b6 100644 --- a/src/thread.cpp +++ b/src/thread.cpp @@ -22,19 +22,17 @@ #include #include #include +#include #include #include -#include -#include "misc.h" #include "movegen.h" #include "search.h" #include "syzygy/tbprobe.h" #include "timeman.h" -#include "tt.h" #include "types.h" -#include "ucioption.h" #include "uci.h" +#include "ucioption.h" namespace Stockfish { @@ -42,13 +40,24 @@ namespace Stockfish { // in idle_loop(). Note that 'searching' and 'exit' should be already set. Thread::Thread(Search::SharedState& sharedState, std::unique_ptr sm, - size_t n) : - worker(std::make_unique(sharedState, std::move(sm), n)), + size_t n, + OptionalThreadToNumaNodeBinder binder) : idx(n), nthreads(sharedState.options["Threads"]), stdThread(&Thread::idle_loop, this) { wait_for_search_finished(); + + run_custom_job([this, &binder, &sharedState, &sm, n]() { + // Use the binder to [maybe] bind the threads to a NUMA node before doing + // the Worker allocation. + // Ideally we would also allocate the SearchManager here, but that's minor. + this->numaAccessToken = binder(); + this->worker = + std::make_unique(sharedState, std::move(sm), n, this->numaAccessToken); + }); + + wait_for_search_finished(); } @@ -66,12 +75,15 @@ Thread::~Thread() { // Wakes up the thread that will start the search void Thread::start_searching() { - mutex.lock(); - searching = true; - mutex.unlock(); // Unlock before notifying saves a few CPU-cycles - cv.notify_one(); // Wake up the thread in idle_loop() + assert(worker != nullptr); + run_custom_job([this]() { worker->start_searching(); }); } +// Wakes up the thread that will start the search +void Thread::clear_worker() { + assert(worker != nullptr); + run_custom_job([this]() { worker->clear(); }); +} // Blocks on the condition variable // until the thread has finished searching. @@ -81,20 +93,20 @@ void Thread::wait_for_search_finished() { cv.wait(lk, [&] { return !searching; }); } +void Thread::run_custom_job(std::function f) { + { + std::unique_lock lk(mutex); + cv.wait(lk, [&] { return !searching; }); + jobFunc = std::move(f); + searching = true; + } + cv.notify_one(); +} // Thread gets parked here, blocked on the // condition variable, when it has no work to do. void Thread::idle_loop() { - - // If OS already scheduled us on a different group than 0 then don't overwrite - // the choice, eventually we are one of many one-threaded processes running on - // some Windows NUMA hardware, for instance in fishtest. To make it simple, - // just check if running threads are below a threshold, in this case, all this - // NUMA machinery is not needed. - if (nthreads > 8) - WinProcGroup::bind_this_thread(idx); - while (true) { std::unique_lock lk(mutex); @@ -105,9 +117,13 @@ void Thread::idle_loop() { if (exit) return; + std::function job = std::move(jobFunc); + jobFunc = nullptr; + lk.unlock(); - worker->start_searching(); + if (job) + job(); } } @@ -121,49 +137,82 @@ uint64_t ThreadPool::tb_hits() const { return accumulate(&Search::Worker::tbHits // Creates/destroys threads to match the requested number. // Created and launched threads will immediately go to sleep in idle_loop. // Upon resizing, threads are recreated to allow for binding if necessary. -void ThreadPool::set(Search::SharedState sharedState, +void ThreadPool::set(const NumaConfig& numaConfig, + Search::SharedState sharedState, const Search::SearchManager::UpdateContext& updateContext) { if (threads.size() > 0) // destroy any existing thread(s) { main_thread()->wait_for_search_finished(); - while (threads.size() > 0) - delete threads.back(), threads.pop_back(); + threads.clear(); + + boundThreadToNumaNode.clear(); } const size_t requested = sharedState.options["Threads"]; if (requested > 0) // create new thread(s) { - auto manager = std::make_unique(updateContext); - threads.push_back(new Thread(sharedState, std::move(manager), 0)); + // Binding threads may be problematic when there's multiple NUMA nodes and + // multiple Stockfish instances running. In particular, if each instance + // runs a single thread then they would all be mapped to the first NUMA node. + // This is undesirable, and so the default behaviour (i.e. when the user does not + // change the NumaConfig UCI setting) is to not bind the threads to processors + // unless we know for sure that we span NUMA nodes and replication is required. + const std::string numaPolicy(sharedState.options["NumaPolicy"]); + const bool doBindThreads = [&]() { + if (numaPolicy == "none") + return false; + + if (numaPolicy == "auto") + return numaConfig.suggests_binding_threads(requested); + + // numaPolicy == "system", or explicitly set by the user + return true; + }(); + + boundThreadToNumaNode = doBindThreads + ? numaConfig.distribute_threads_among_numa_nodes(requested) + : std::vector{}; while (threads.size() < requested) { - auto null_manager = std::make_unique(); - threads.push_back(new Thread(sharedState, std::move(null_manager), threads.size())); + const size_t threadId = threads.size(); + const NumaIndex numaId = doBindThreads ? boundThreadToNumaNode[threadId] : 0; + auto manager = threadId == 0 ? std::unique_ptr( + std::make_unique(updateContext)) + : std::make_unique(); + + // When not binding threads we want to force all access to happen + // from the same NUMA node, because in case of NUMA replicated memory + // accesses we don't want to trash cache in case the threads get scheduled + // on the same NUMA node. + auto binder = doBindThreads ? OptionalThreadToNumaNodeBinder(numaConfig, numaId) + : OptionalThreadToNumaNodeBinder(numaId); + + threads.emplace_back( + std::make_unique(sharedState, std::move(manager), threadId, binder)); } clear(); main_thread()->wait_for_search_finished(); - - // Reallocate the hash with the new threadpool size - sharedState.tt.resize(sharedState.options["Hash"], requested); } } // Sets threadPool data to initial values void ThreadPool::clear() { - - for (Thread* th : threads) - th->worker->clear(); - if (threads.size() == 0) return; + for (auto&& th : threads) + th->clear_worker(); + + for (auto&& th : threads) + th->wait_for_search_finished(); + main_manager()->callsCnt = 0; main_manager()->bestPreviousScore = VALUE_INFINITE; main_manager()->bestPreviousAverageScore = VALUE_INFINITE; @@ -172,6 +221,17 @@ void ThreadPool::clear() { main_manager()->tm.clear(); } +void ThreadPool::run_on_thread(size_t threadId, std::function f) { + assert(threads.size() > threadId); + threads[threadId]->run_custom_job(std::move(f)); +} + +void ThreadPool::wait_on_thread(size_t threadId) { + assert(threads.size() > threadId); + threads[threadId]->wait_for_search_finished(); +} + +size_t ThreadPool::num_threads() const { return threads.size(); } // Wakes up main thread waiting in idle_loop() and // returns immediately. Main thread will wake up other threads and start the search. @@ -216,31 +276,36 @@ void ThreadPool::start_thinking(const OptionsMap& options, // be deduced from a fen string, so set() clears them and they are set from // setupStates->back() later. The rootState is per thread, earlier states are shared // since they are read-only. - for (Thread* th : threads) + for (auto&& th : threads) { - th->worker->limits = limits; - th->worker->nodes = th->worker->tbHits = th->worker->nmpMinPly = - th->worker->bestMoveChanges = 0; - th->worker->rootDepth = th->worker->completedDepth = 0; - th->worker->rootMoves = rootMoves; - th->worker->rootPos.set(pos.fen(), pos.is_chess960(), &th->worker->rootState); - th->worker->rootState = setupStates->back(); - th->worker->tbConfig = tbConfig; + th->run_custom_job([&]() { + th->worker->limits = limits; + th->worker->nodes = th->worker->tbHits = th->worker->nmpMinPly = + th->worker->bestMoveChanges = 0; + th->worker->rootDepth = th->worker->completedDepth = 0; + th->worker->rootMoves = rootMoves; + th->worker->rootPos.set(pos.fen(), pos.is_chess960(), &th->worker->rootState); + th->worker->rootState = setupStates->back(); + th->worker->tbConfig = tbConfig; + }); } + for (auto&& th : threads) + th->wait_for_search_finished(); + main_thread()->start_searching(); } Thread* ThreadPool::get_best_thread() const { - Thread* bestThread = threads.front(); + Thread* bestThread = threads.front().get(); Value minScore = VALUE_NONE; std::unordered_map votes( 2 * std::min(size(), bestThread->worker->rootMoves.size())); // Find the minimum score of all threads - for (Thread* th : threads) + for (auto&& th : threads) minScore = std::min(minScore, th->worker->rootMoves[0].score); // Vote according to score and depth, and select the best thread @@ -248,10 +313,10 @@ Thread* ThreadPool::get_best_thread() const { return (th->worker->rootMoves[0].score - minScore + 14) * int(th->worker->completedDepth); }; - for (Thread* th : threads) - votes[th->worker->rootMoves[0].pv[0]] += thread_voting_value(th); + for (auto&& th : threads) + votes[th->worker->rootMoves[0].pv[0]] += thread_voting_value(th.get()); - for (Thread* th : threads) + for (auto&& th : threads) { const auto bestThreadScore = bestThread->worker->rootMoves[0].score; const auto newThreadScore = th->worker->rootMoves[0].score; @@ -272,26 +337,26 @@ Thread* ThreadPool::get_best_thread() const { // Note that we make sure not to pick a thread with truncated-PV for better viewer experience. const bool betterVotingValue = - thread_voting_value(th) * int(newThreadPV.size() > 2) + thread_voting_value(th.get()) * int(newThreadPV.size() > 2) > thread_voting_value(bestThread) * int(bestThreadPV.size() > 2); if (bestThreadInProvenWin) { // Make sure we pick the shortest mate / TB conversion if (newThreadScore > bestThreadScore) - bestThread = th; + bestThread = th.get(); } else if (bestThreadInProvenLoss) { // Make sure we pick the shortest mated / TB conversion if (newThreadInProvenLoss && newThreadScore < bestThreadScore) - bestThread = th; + bestThread = th.get(); } else if (newThreadInProvenWin || newThreadInProvenLoss || (newThreadScore > VALUE_TB_LOSS_IN_MAX_PLY && (newThreadMoveVote > bestThreadMoveVote || (newThreadMoveVote == bestThreadMoveVote && betterVotingValue)))) - bestThread = th; + bestThread = th.get(); } return bestThread; @@ -302,7 +367,7 @@ Thread* ThreadPool::get_best_thread() const { // Will be invoked by main thread after it has started searching void ThreadPool::start_searching() { - for (Thread* th : threads) + for (auto&& th : threads) if (th != threads.front()) th->start_searching(); } @@ -312,9 +377,28 @@ void ThreadPool::start_searching() { void ThreadPool::wait_for_search_finished() const { - for (Thread* th : threads) + for (auto&& th : threads) if (th != threads.front()) th->wait_for_search_finished(); } +std::vector ThreadPool::get_bound_thread_count_by_numa_node() const { + std::vector counts; + + if (!boundThreadToNumaNode.empty()) + { + NumaIndex highestNumaNode = 0; + for (NumaIndex n : boundThreadToNumaNode) + if (n > highestNumaNode) + highestNumaNode = n; + + counts.resize(highestNumaNode + 1, 0); + + for (NumaIndex n : boundThreadToNumaNode) + counts[n] += 1; + } + + return counts; +} + } // namespace Stockfish diff --git a/src/thread.h b/src/thread.h index 223652ae..102b2299 100644 --- a/src/thread.h +++ b/src/thread.h @@ -26,10 +26,12 @@ #include #include #include +#include #include "position.h" #include "search.h" #include "thread_win32_osx.h" +#include "numa.h" namespace Stockfish { @@ -37,6 +39,32 @@ namespace Stockfish { class OptionsMap; using Value = int; +// Sometimes we don't want to actually bind the threads, but the recipent still +// needs to think it runs on *some* NUMA node, such that it can access structures +// that rely on NUMA node knowledge. This class encapsulates this optional process +// such that the recipent does not need to know whether the binding happened or not. +class OptionalThreadToNumaNodeBinder { + public: + OptionalThreadToNumaNodeBinder(NumaIndex n) : + numaConfig(nullptr), + numaId(n) {} + + OptionalThreadToNumaNodeBinder(const NumaConfig& cfg, NumaIndex n) : + numaConfig(&cfg), + numaId(n) {} + + NumaReplicatedAccessToken operator()() const { + if (numaConfig != nullptr) + return numaConfig->bind_current_thread_to_numa_node(numaId); + else + return NumaReplicatedAccessToken(numaId); + } + + private: + const NumaConfig* numaConfig; + NumaIndex numaId; +}; + // Abstraction of a thread. It contains a pointer to the worker and a native thread. // After construction, the native thread is started with idle_loop() // waiting for a signal to start searching. @@ -44,22 +72,35 @@ using Value = int; // the search is finished, it goes back to idle_loop() waiting for a new signal. class Thread { public: - Thread(Search::SharedState&, std::unique_ptr, size_t); + Thread(Search::SharedState&, + std::unique_ptr, + size_t, + OptionalThreadToNumaNodeBinder); virtual ~Thread(); - void idle_loop(); - void start_searching(); + void idle_loop(); + void start_searching(); + void clear_worker(); + void run_custom_job(std::function f); + + // Thread has been slightly altered to allow running custom jobs, so + // this name is no longer correct. However, this class (and ThreadPool) + // require further work to make them properly generic while maintaining + // appropriate specificity regarding search, from the point of view of an + // outside user, so renaming of this function in left for whenever that happens. void wait_for_search_finished(); size_t id() const { return idx; } std::unique_ptr worker; + std::function jobFunc; private: - std::mutex mutex; - std::condition_variable cv; - size_t idx, nthreads; - bool exit = false, searching = true; // Set before starting std::thread - NativeThread stdThread; + std::mutex mutex; + std::condition_variable cv; + size_t idx, nthreads; + bool exit = false, searching = true; // Set before starting std::thread + NativeThread stdThread; + NumaReplicatedAccessToken numaAccessToken; }; @@ -67,31 +108,44 @@ class Thread { // parking and, most importantly, launching a thread. All the access to threads // is done through this class. class ThreadPool { - public: + ThreadPool() {} + ~ThreadPool() { // destroy any existing thread(s) if (threads.size() > 0) { main_thread()->wait_for_search_finished(); - while (threads.size() > 0) - delete threads.back(), threads.pop_back(); + threads.clear(); } } - void start_thinking(const OptionsMap&, Position&, StateListPtr&, Search::LimitsType); - void clear(); - void set(Search::SharedState, const Search::SearchManager::UpdateContext&); + ThreadPool(const ThreadPool&) = delete; + ThreadPool(ThreadPool&&) = delete; + + ThreadPool& operator=(const ThreadPool&) = delete; + ThreadPool& operator=(ThreadPool&&) = delete; + + void start_thinking(const OptionsMap&, Position&, StateListPtr&, Search::LimitsType); + void run_on_thread(size_t threadId, std::function f); + void wait_on_thread(size_t threadId); + size_t num_threads() const; + void clear(); + void set(const NumaConfig& numaConfig, + Search::SharedState, + const Search::SearchManager::UpdateContext&); Search::SearchManager* main_manager(); - Thread* main_thread() const { return threads.front(); } + Thread* main_thread() const { return threads.front().get(); } uint64_t nodes_searched() const; uint64_t tb_hits() const; Thread* get_best_thread() const; void start_searching(); void wait_for_search_finished() const; + std::vector get_bound_thread_count_by_numa_node() const; + std::atomic_bool stop, abortedSearch, increaseDepth; auto cbegin() const noexcept { return threads.cbegin(); } @@ -102,13 +156,14 @@ class ThreadPool { auto empty() const noexcept { return threads.empty(); } private: - StateListPtr setupStates; - std::vector threads; + StateListPtr setupStates; + std::vector> threads; + std::vector boundThreadToNumaNode; uint64_t accumulate(std::atomic Search::Worker::*member) const { uint64_t sum = 0; - for (Thread* th : threads) + for (auto&& th : threads) sum += (th->worker.get()->*member).load(std::memory_order_relaxed); return sum; } diff --git a/src/tt.cpp b/src/tt.cpp index 3f5b9d4d..79274f52 100644 --- a/src/tt.cpp +++ b/src/tt.cpp @@ -23,10 +23,10 @@ #include #include #include -#include -#include #include "misc.h" +#include "syzygy/tbprobe.h" +#include "thread.h" namespace Stockfish { @@ -74,7 +74,7 @@ uint8_t TTEntry::relative_age(const uint8_t generation8) const { // Sets the size of the transposition table, // measured in megabytes. Transposition table consists // of clusters and each cluster consists of ClusterSize number of TTEntry. -void TranspositionTable::resize(size_t mbSize, int threadCount) { +void TranspositionTable::resize(size_t mbSize, ThreadPool& threads) { aligned_large_pages_free(table); clusterCount = mbSize * 1024 * 1024 / sizeof(Cluster); @@ -86,32 +86,29 @@ void TranspositionTable::resize(size_t mbSize, int threadCount) { exit(EXIT_FAILURE); } - clear(threadCount); + clear(threads); } // Initializes the entire transposition table to zero, // in a multi-threaded way. -void TranspositionTable::clear(size_t threadCount) { - std::vector threads; +void TranspositionTable::clear(ThreadPool& threads) { + const size_t threadCount = threads.num_threads(); - for (size_t idx = 0; idx < size_t(threadCount); ++idx) + for (size_t i = 0; i < threadCount; ++i) { - threads.emplace_back([this, idx, threadCount]() { - // Thread binding gives faster search on systems with a first-touch policy - if (threadCount > 8) - WinProcGroup::bind_this_thread(idx); - + threads.run_on_thread(i, [this, i, threadCount]() { // Each thread will zero its part of the hash table - const size_t stride = size_t(clusterCount / threadCount), start = size_t(stride * idx), - len = idx != size_t(threadCount) - 1 ? stride : clusterCount - start; + const size_t stride = clusterCount / threadCount; + const size_t start = stride * i; + const size_t len = i + 1 != threadCount ? stride : clusterCount - start; std::memset(&table[start], 0, len * sizeof(Cluster)); }); } - for (std::thread& th : threads) - th.join(); + for (size_t i = 0; i < threadCount; ++i) + threads.wait_on_thread(i); } diff --git a/src/tt.h b/src/tt.h index 7cc876fb..3b09ec4e 100644 --- a/src/tt.h +++ b/src/tt.h @@ -63,6 +63,7 @@ struct TTEntry { int16_t eval16; }; +class ThreadPool; // A TranspositionTable is an array of Cluster, of size clusterCount. Each // cluster consists of ClusterSize number of TTEntry. Each non-empty TTEntry @@ -102,8 +103,8 @@ class TranspositionTable { TTEntry* probe(const Key key, bool& found) const; int hashfull() const; - void resize(size_t mbSize, int threadCount); - void clear(size_t threadCount); + void resize(size_t mbSize, ThreadPool& threads); + void clear(ThreadPool& threads); TTEntry* first_entry(const Key key) const { return &table[mul_hi64(key, clusterCount)].entry[0]; diff --git a/src/uci.cpp b/src/uci.cpp index cb686a02..ab0dae39 100644 --- a/src/uci.cpp +++ b/src/uci.cpp @@ -60,7 +60,16 @@ UCIEngine::UCIEngine(int argc, char** argv) : options["Debug Log File"] << Option("", [](const Option& o) { start_logger(o); }); - options["Threads"] << Option(1, 1, 1024, [this](const Option&) { engine.resize_threads(); }); + options["NumaPolicy"] << Option("auto", [this](const Option& o) { + engine.set_numa_config_from_option(o); + print_numa_config_information(); + print_thread_binding_information(); + }); + + options["Threads"] << Option(1, 1, 1024, [this](const Option&) { + engine.resize_threads(); + print_thread_binding_information(); + }); options["Hash"] << Option(16, 1, MaxHashMB, [this](const Option& o) { engine.set_tt_size(o); }); @@ -123,8 +132,15 @@ void UCIEngine::loop() { engine.set_ponderhit(false); else if (token == "uci") + { sync_cout << "id name " << engine_info(true) << "\n" - << engine.get_options() << "\nuciok" << sync_endl; + << engine.get_options() << sync_endl; + + print_numa_config_information(); + print_thread_binding_information(); + + sync_cout << "uciok" << sync_endl; + } else if (token == "setoption") setoption(is); @@ -177,6 +193,28 @@ void UCIEngine::loop() { } while (token != "quit" && cli.argc == 1); // The command-line arguments are one-shot } +void UCIEngine::print_numa_config_information() const { + auto cfgStr = engine.get_numa_config_as_string(); + sync_cout << "info string Available Processors: " << cfgStr << sync_endl; +} + +void UCIEngine::print_thread_binding_information() const { + auto boundThreadsByNode = engine.get_bound_thread_count_by_numa_node(); + if (!boundThreadsByNode.empty()) + { + sync_cout << "info string NUMA Node Thread Binding: "; + bool isFirst = true; + for (auto&& [current, total] : boundThreadsByNode) + { + if (!isFirst) + std::cout << ":"; + std::cout << current << "/" << total; + isFirst = false; + } + std::cout << sync_endl; + } +} + Search::LimitsType UCIEngine::parse_limits(std::istream& is) { Search::LimitsType limits; std::string token; diff --git a/src/uci.h b/src/uci.h index 55d580f9..bac62bb9 100644 --- a/src/uci.h +++ b/src/uci.h @@ -42,6 +42,9 @@ class UCIEngine { void loop(); + void print_numa_config_information() const; + void print_thread_binding_information() const; + static int to_cp(Value v, const Position& pos); static std::string format_score(const Score& s); static std::string square(Square s); diff --git a/src/ucioption.cpp b/src/ucioption.cpp index e1ffe546..4819a68d 100644 --- a/src/ucioption.cpp +++ b/src/ucioption.cpp @@ -118,6 +118,8 @@ bool Option::operator==(const char* s) const { return !CaseInsensitiveLess()(currentValue, s) && !CaseInsensitiveLess()(s, currentValue); } +bool Option::operator!=(const char* s) const { return !(*this == s); } + // Inits options and assigns idx in the correct printing order diff --git a/src/ucioption.h b/src/ucioption.h index b575d164..16d46696 100644 --- a/src/ucioption.h +++ b/src/ucioption.h @@ -67,6 +67,7 @@ class Option { operator int() const; operator std::string() const; bool operator==(const char*) const; + bool operator!=(const char*) const; friend std::ostream& operator<<(std::ostream&, const OptionsMap&); From 41acbcae1a8af4b23be397f7fe7234f3bc49a26e Mon Sep 17 00:00:00 2001 From: FauziAkram Date: Wed, 29 May 2024 16:14:24 +0300 Subject: [PATCH 065/315] Simplifying malus for putting piece en prise formula Patch author: @ehsanrashid Passed STC: LLR: 2.93 (-2.94,2.94) <-1.75,0.25> Total: 116192 W: 30229 L: 30094 D: 55869 Ptnml(0-2): 451, 13880, 29351, 13911, 503 https://tests.stockfishchess.org/tests/view/66510a40a86388d5e27da936 Passed LTC: LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 441312 W: 111009 L: 111220 D: 219083 Ptnml(0-2): 217, 49390, 121659, 49167, 223 https://tests.stockfishchess.org/tests/view/66530696a86388d5e27da9e3 closes https://github.com/official-stockfish/Stockfish/pull/5304 Bench: 1987574 --- AUTHORS | 1 + src/movepick.cpp | 12 +++++------- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/AUTHORS b/AUTHORS index 36b2b6f7..a232e115 100644 --- a/AUTHORS +++ b/AUTHORS @@ -68,6 +68,7 @@ Douglas Matos Gomes (dsmsgms) Dubslow Eduardo Cáceres (eduherminio) Eelco de Groot (KingDefender) +Ehsan Rashid (erashid) Elvin Liu (solarlight2) erbsenzaehler Ernesto Gatti diff --git a/src/movepick.cpp b/src/movepick.cpp index 7def0ce8..55f9ca0e 100644 --- a/src/movepick.cpp +++ b/src/movepick.cpp @@ -197,13 +197,11 @@ void MovePicker::score() { : 0; // malus for putting piece en prise - m.value -= !(threatenedPieces & from) - ? (pt == QUEEN ? bool(to & threatenedByRook) * 48150 - + bool(to & threatenedByMinor) * 10650 - : pt == ROOK ? bool(to & threatenedByMinor) * 24335 - : pt != PAWN ? bool(to & threatenedByPawn) * 14950 - : 0) - : 0; + m.value -= (pt == QUEEN ? bool(to & threatenedByRook) * 48150 + + bool(to & threatenedByMinor) * 10650 + : pt == ROOK ? bool(to & threatenedByMinor) * 24335 + : pt != PAWN ? bool(to & threatenedByPawn) * 14950 + : 0); } else // Type == EVASIONS From c7b80f6c8a7b8267e019fc4ecb496f14f5256f3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ste=CC=81phane=20Nicolet?= Date: Mon, 27 May 2024 04:32:04 +0200 Subject: [PATCH 066/315] Merge pawn count terms using their average This simplification patch merges the pawn count terms in the eval formula with the material term, updating the offset constant for the nnue part of the formula from 34000 to 34300 because the average pawn count in middlegame positions evaluated during search is around 8. STC: LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 138240 W: 35834 L: 35723 D: 66683 Ptnml(0-2): 527, 16587, 34817, 16626, 563 https://tests.stockfishchess.org/tests/view/6653f474a86388d5e27daaac LTC: LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 454272 W: 114787 L: 115012 D: 224473 Ptnml(0-2): 246, 51168, 124553, 50903, 266 https://tests.stockfishchess.org/tests/view/6654f256a86388d5e27db131 closes https://github.com/official-stockfish/Stockfish/pull/5303 Bench: 1279635 --- src/evaluate.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index 13a3f211..849b7bb6 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -77,12 +77,10 @@ Value Eval::evaluate(const Eval::NNUE::Networks& networks, optimism += optimism * nnueComplexity / 470; nnue -= nnue * (nnueComplexity * 5 / 3) / 32621; - int material = 200 * pos.count() + 350 * pos.count() + 400 * pos.count() + int material = 300 * pos.count() + 350 * pos.count() + 400 * pos.count() + 640 * pos.count() + 1200 * pos.count(); - v = (nnue * (34000 + material + 135 * pos.count()) - + optimism * (4400 + material + 99 * pos.count())) - / 35967; + v = (nnue * (34300 + material) + optimism * (4400 + material)) / 35967; // Damp down the evaluation linearly when shuffling v = v * (204 - pos.rule50_count()) / 208; From c14297a483f7905d61e6f22068d33b199916257a Mon Sep 17 00:00:00 2001 From: Shawn Xu Date: Mon, 27 May 2024 01:21:32 -0700 Subject: [PATCH 067/315] Tune Fail Low Bonus Fractional bonus idea is from @Ergodice on [discord](https://discord.com/channels/435943710472011776/735707599353151579/1244039134499180614). Values are tuned for 149k games at LTC. SPSA tune: https://tests.stockfishchess.org/tests/view/6652d5d5a86388d5e27da9d6 Failed STC: LLR: -2.95 (-2.94,2.94) <0.00,2.00> Total: 67424 W: 17364 L: 17528 D: 32532 Ptnml(0-2): 238, 8043, 17299, 7909, 223 https://tests.stockfishchess.org/tests/view/66551e1ba86388d5e27db9f9 Passed LTC: LLR: 2.94 (-2.94,2.94) <0.00,2.00> Total: 146910 W: 37141 L: 36695 D: 73074 Ptnml(0-2): 84, 16201, 40441, 16643, 86 https://tests.stockfishchess.org/tests/view/66559949a86388d5e27dcc5d Passed VLTC: LLR: 2.95 (-2.94,2.94) <0.50,2.50> Total: 27248 W: 6924 L: 6633 D: 13691 Ptnml(0-2): 5, 2744, 7835, 3035, 5 https://tests.stockfishchess.org/tests/view/66563f4da86388d5e27dd27a closes https://github.com/official-stockfish/Stockfish/pull/5299 Bench: 1390709 --- src/search.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index c074e342..425782eb 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1341,18 +1341,19 @@ moves_loop: // When in check, search starts here // Bonus for prior countermove that caused the fail low else if (!priorCapture && prevSq != SQ_NONE) { - int bonus = (depth > 4) + (depth > 5) + (PvNode || cutNode) + ((ss - 1)->statScore < -14144) - + ((ss - 1)->moveCount > 9) + (!ss->inCheck && bestValue <= ss->staticEval - 115) - + (!(ss - 1)->inCheck && bestValue <= -(ss - 1)->staticEval - 81); + int bonus = (54 * (depth > 4) + 62 * (depth > 5) + 115 * (PvNode || cutNode) + + 186 * ((ss - 1)->statScore < -14144) + 121 * ((ss - 1)->moveCount > 9) + + 64 * (!ss->inCheck && bestValue <= ss->staticEval - 115) + + 137 * (!(ss - 1)->inCheck && bestValue <= -(ss - 1)->staticEval - 81)); update_continuation_histories(ss - 1, pos.piece_on(prevSq), prevSq, - stat_bonus(depth) * bonus); + stat_bonus(depth) * bonus / 100); thisThread->mainHistory[~us][((ss - 1)->currentMove).from_to()] - << stat_bonus(depth) * bonus / 2; + << stat_bonus(depth) * bonus / 200; if (type_of(pos.piece_on(prevSq)) != PAWN && ((ss - 1)->currentMove).type_of() != PROMOTION) thisThread->pawnHistory[pawn_structure_index(pos)][pos.piece_on(prevSq)][prevSq] - << stat_bonus(depth) * bonus * 4; + << stat_bonus(depth) * bonus / 25; } if (PvNode) From a2f4e988aa03a1011b671af07a152682e35b4617 Mon Sep 17 00:00:00 2001 From: mstembera Date: Tue, 28 May 2024 13:32:09 -0700 Subject: [PATCH 068/315] Fix MSVC NUMA compile issues closes https://github.com/official-stockfish/Stockfish/pull/5298 No functional change --- src/misc.h | 4 ++-- src/numa.h | 7 +++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/misc.h b/src/misc.h index 99cbecfd..ec7f7b76 100644 --- a/src/misc.h +++ b/src/misc.h @@ -77,6 +77,8 @@ using AlignedPtr = std::unique_ptr>; template using LargePagePtr = std::unique_ptr>; +#if defined(__linux__) + struct PipeDeleter { void operator()(FILE* file) const { if (file != nullptr) @@ -86,8 +88,6 @@ struct PipeDeleter { } }; -#if defined(__linux__) - inline std::optional get_system_command_output(const std::string& command) { std::unique_ptr pipe(popen(command.c_str(), "r")); if (!pipe) diff --git a/src/numa.h b/src/numa.h index c04292da..03ee1fdf 100644 --- a/src/numa.h +++ b/src/numa.h @@ -51,6 +51,9 @@ static constexpr size_t WIN_PROCESSOR_GROUP_SIZE = 64; #define NOMINMAX #endif #include + #if defined small + #undef small + #endif // https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-setthreadselectedcpusetmasks using SetThreadSelectedCpuSetMasks_t = BOOL (*)(HANDLE, PGROUP_AFFINITY, USHORT); @@ -561,8 +564,8 @@ class NumaConfig { if (SetThreadSelectedCpuSetMasks_f != nullptr) { // Only available on Windows 11 and Windows Server 2022 onwards. - const USHORT numProcGroups = - ((highestCpuIndex + 1) + WIN_PROCESSOR_GROUP_SIZE - 1) / WIN_PROCESSOR_GROUP_SIZE; + const USHORT numProcGroups = USHORT( + ((highestCpuIndex + 1) + WIN_PROCESSOR_GROUP_SIZE - 1) / WIN_PROCESSOR_GROUP_SIZE); auto groupAffinities = std::make_unique(numProcGroups); std::memset(groupAffinities.get(), 0, sizeof(GROUP_AFFINITY) * numProcGroups); for (WORD i = 0; i < numProcGroups; ++i) From ae7eef51fde6d74f1a10269dec36bf6d80855a0a Mon Sep 17 00:00:00 2001 From: Shawn Xu Date: Tue, 28 May 2024 14:31:56 -0700 Subject: [PATCH 069/315] Simplify Fail Low Bonus Formula Tested against PR #5299 Passed Non-regression STC: LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 76352 W: 19797 L: 19619 D: 36936 Ptnml(0-2): 236, 9017, 19509, 9161, 253 https://tests.stockfishchess.org/tests/view/66564f60a86388d5e27dd307 Passed Non-regression LTC: LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 114624 W: 28946 L: 28821 D: 56857 Ptnml(0-2): 59, 12675, 31714, 12810, 54 https://tests.stockfishchess.org/tests/view/6656543da86388d5e27dd329 closes https://github.com/official-stockfish/Stockfish/pull/5301 Bench: 1212167 --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index 425782eb..5e9f6476 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1341,7 +1341,7 @@ moves_loop: // When in check, search starts here // Bonus for prior countermove that caused the fail low else if (!priorCapture && prevSq != SQ_NONE) { - int bonus = (54 * (depth > 4) + 62 * (depth > 5) + 115 * (PvNode || cutNode) + int bonus = (116 * (depth > 5) + 115 * (PvNode || cutNode) + 186 * ((ss - 1)->statScore < -14144) + 121 * ((ss - 1)->moveCount > 9) + 64 * (!ss->inCheck && bestValue <= ss->staticEval - 115) + 137 * (!(ss - 1)->inCheck && bestValue <= -(ss - 1)->staticEval - 81)); From 3c62ad7e077a5ed0ea7b55422e03e7316dcbce7e Mon Sep 17 00:00:00 2001 From: xoto10 <23479932+xoto10@users.noreply.github.com> Date: Tue, 28 May 2024 19:40:40 +0100 Subject: [PATCH 070/315] Add compensation factor to adjust extra time according to time control As stockfish nets and search evolve, the existing time control appears to give too little time at STC, roughly correct at LTC, and too little at VLTC+. This change adds an adjustment to the optExtra calculation. This adjustment is easy to retune and refine, so it should be easier to keep up-to-date than the more complex calculations used for optConstant and optScale. Passed STC 10+0.1: LLR: 2.93 (-2.94,2.94) <0.00,2.00> Total: 169568 W: 43803 L: 43295 D: 82470 Ptnml(0-2): 485, 19679, 44055, 19973, 592 https://tests.stockfishchess.org/tests/view/66531865a86388d5e27da9fa Yellow LTC 60+0.6: LLR: -2.94 (-2.94,2.94) <0.50,2.50> Total: 209970 W: 53087 L: 52914 D: 103969 Ptnml(0-2): 91, 19652, 65314, 19849, 79 https://tests.stockfishchess.org/tests/view/6653e38ba86388d5e27daaa0 Passed VLTC 180+1.8 : LLR: 2.94 (-2.94,2.94) <0.50,2.50> Total: 85618 W: 21735 L: 21342 D: 42541 Ptnml(0-2): 15, 8267, 25848, 8668, 11 https://tests.stockfishchess.org/tests/view/6655131da86388d5e27db95f closes https://github.com/official-stockfish/Stockfish/pull/5297 Bench: 1212167 --- src/search.cpp | 2 +- src/search.h | 1 + src/thread.cpp | 1 + src/timeman.cpp | 14 ++++++++++++-- src/timeman.h | 8 ++++++-- 5 files changed, 21 insertions(+), 5 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 5e9f6476..ec4ae79d 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -161,7 +161,7 @@ void Search::Worker::start_searching() { } main_manager()->tm.init(limits, rootPos.side_to_move(), rootPos.game_ply(), options, - main_manager()->originalPly); + main_manager()->originalPly, main_manager()->originalTimeAdjust); tt.new_search(); if (rootMoves.empty()) diff --git a/src/search.h b/src/search.h index a61f253c..7cff10d5 100644 --- a/src/search.h +++ b/src/search.h @@ -208,6 +208,7 @@ class SearchManager: public ISearchManager { Depth depth) const; Stockfish::TimeManagement tm; + double originalTimeAdjust; int originalPly; int callsCnt; std::atomic_bool ponder; diff --git a/src/thread.cpp b/src/thread.cpp index 5893f4b6..71134ead 100644 --- a/src/thread.cpp +++ b/src/thread.cpp @@ -217,6 +217,7 @@ void ThreadPool::clear() { main_manager()->bestPreviousScore = VALUE_INFINITE; main_manager()->bestPreviousAverageScore = VALUE_INFINITE; main_manager()->originalPly = -1; + main_manager()->originalTimeAdjust = -1; main_manager()->previousTimeReduction = 1.0; main_manager()->tm.clear(); } diff --git a/src/timeman.cpp b/src/timeman.cpp index f389e082..f6ca298a 100644 --- a/src/timeman.cpp +++ b/src/timeman.cpp @@ -44,8 +44,12 @@ void TimeManagement::advance_nodes_time(std::int64_t nodes) { // the bounds of time allowed for the current game ply. We currently support: // 1) x basetime (+ z increment) // 2) x moves in y seconds (+ z increment) -void TimeManagement::init( - Search::LimitsType& limits, Color us, int ply, const OptionsMap& options, int& originalPly) { +void TimeManagement::init(Search::LimitsType& limits, + Color us, + int ply, + const OptionsMap& options, + int& originalPly, + double& originalTimeAdjust) { TimePoint npmsec = TimePoint(options["nodestime"]); // If we have no time, we don't need to fully initialize TM. @@ -100,6 +104,10 @@ void TimeManagement::init( TimePoint timeLeft = std::max(TimePoint(1), limits.time[us] + limits.inc[us] * (mtg - 1) - moveOverhead * (2 + mtg)); + // Extra time according to timeLeft + if (originalTimeAdjust < 0) + originalTimeAdjust = 0.2078 + 0.1623 * std::log10(timeLeft); + // x basetime (+ z increment) // If there is a healthy increment, timeLeft can exceed the actual available // game time for the current move, so also cap to a percentage of available game time. @@ -109,6 +117,7 @@ void TimeManagement::init( double optExtra = scaledInc < 500 ? 1.0 : 1.13; if (ply - originalPly < 2) optExtra *= 0.95; + optExtra *= originalTimeAdjust; // Calculate time constants based on current time left. double logTimeInSec = std::log10(scaledTime / 1000.0); @@ -118,6 +127,7 @@ void TimeManagement::init( optScale = std::min(0.0122 + std::pow(ply + 2.95, 0.462) * optConstant, 0.213 * limits.time[us] / timeLeft) * optExtra; + maxScale = std::min(6.64, maxConstant + ply / 12.0); } diff --git a/src/timeman.h b/src/timeman.h index 8f1bb563..8b763089 100644 --- a/src/timeman.h +++ b/src/timeman.h @@ -36,8 +36,12 @@ struct LimitsType; // the maximum available time, the game move number, and other parameters. class TimeManagement { public: - void init( - Search::LimitsType& limits, Color us, int ply, const OptionsMap& options, int& originalPly); + void init(Search::LimitsType& limits, + Color us, + int ply, + const OptionsMap& options, + int& originalPly, + double& originalTimeAdjust); TimePoint optimum() const; TimePoint maximum() const; From 4a2291ed337730e5093af1532d36acf1f066989b Mon Sep 17 00:00:00 2001 From: Shawn Xu Date: Thu, 23 May 2024 19:22:41 -0700 Subject: [PATCH 071/315] Simplify Away Quadruple Extension Passed non-regression VVLTC: LLR: 2.95 (-2.94,2.94) <-1.75,0.25> Total: 90792 W: 23155 L: 23018 D: 44619 Ptnml(0-2): 6, 8406, 28432, 8549, 3 https://tests.stockfishchess.org/tests/view/664ffa4ca86388d5e27d8e7a Passed non-regression VLTC: LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 288136 W: 72608 L: 72659 D: 142869 Ptnml(0-2): 38, 30258, 83525, 30211, 36 https://tests.stockfishchess.org/tests/view/66551609a86388d5e27db9ae closes https://github.com/official-stockfish/Stockfish/pull/5293 bench 1501735 --- src/search.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index ec4ae79d..22e82be8 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1066,11 +1066,9 @@ moves_loop: // When in check, search starts here { int doubleMargin = 304 * PvNode - 203 * !ttCapture; int tripleMargin = 117 + 259 * PvNode - 296 * !ttCapture + 97 * ss->ttPv; - int quadMargin = 486 + 343 * PvNode - 273 * !ttCapture + 232 * ss->ttPv; extension = 1 + (value < singularBeta - doubleMargin) - + (value < singularBeta - tripleMargin) - + (value < singularBeta - quadMargin); + + (value < singularBeta - tripleMargin); depth += ((!PvNode) && (depth < 16)); } From 5ab3fe6db8ea7dff1310c792d66f2a906a5c19c5 Mon Sep 17 00:00:00 2001 From: Linmiao Xu Date: Tue, 28 May 2024 19:39:55 -0400 Subject: [PATCH 072/315] Simplify blending eval with nnue complexity Passed non-regression STC: https://tests.stockfishchess.org/tests/view/66567377a86388d5e27dd89c LLR: 2.95 (-2.94,2.94) <-1.75,0.25> Total: 144000 W: 37443 L: 37338 D: 69219 Ptnml(0-2): 587, 17260, 36208, 17351, 594 Passed non-regression LTC: https://tests.stockfishchess.org/tests/view/66567f29a86388d5e27dd924 LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 112326 W: 28550 L: 28421 D: 55355 Ptnml(0-2): 66, 12732, 30434, 12869, 62 closes https://github.com/official-stockfish/Stockfish/pull/5305 bench 1554486 --- src/evaluate.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index 849b7bb6..666697dd 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -75,7 +75,7 @@ Value Eval::evaluate(const Eval::NNUE::Networks& networks, // Blend optimism and eval with nnue complexity optimism += optimism * nnueComplexity / 470; - nnue -= nnue * (nnueComplexity * 5 / 3) / 32621; + nnue -= nnue * nnueComplexity / 20000; int material = 300 * pos.count() + 350 * pos.count() + 400 * pos.count() + 640 * pos.count() + 1200 * pos.count(); From 0ea6337ccfffa39b665e3a8371fcde668dddf4aa Mon Sep 17 00:00:00 2001 From: FauziAkram Date: Thu, 30 May 2024 03:36:38 +0300 Subject: [PATCH 073/315] Remove Queen threatenedByMinor Remove Queen threatenedByMinor from movepick Passed STC: LLR: 2.93 (-2.94,2.94) <-1.75,0.25> Total: 54432 W: 14053 L: 13855 D: 26524 Ptnml(0-2): 124, 6347, 14090, 6517, 138 https://tests.stockfishchess.org/tests/view/66578d036b0e318cefa8d43d Passed LTC: LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 198168 W: 49979 L: 49940 D: 98249 Ptnml(0-2): 84, 21824, 55236, 21849, 91 https://tests.stockfishchess.org/tests/view/66579cf86b0e318cefa8d5b1 closes https://github.com/official-stockfish/Stockfish/pull/5306 bench: 1342438 --- src/movepick.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/movepick.cpp b/src/movepick.cpp index 55f9ca0e..6c41916c 100644 --- a/src/movepick.cpp +++ b/src/movepick.cpp @@ -197,8 +197,7 @@ void MovePicker::score() { : 0; // malus for putting piece en prise - m.value -= (pt == QUEEN ? bool(to & threatenedByRook) * 48150 - + bool(to & threatenedByMinor) * 10650 + m.value -= (pt == QUEEN ? bool(to & threatenedByRook) * 49000 : pt == ROOK ? bool(to & threatenedByMinor) * 24335 : pt != PAWN ? bool(to & threatenedByPawn) * 14950 : 0); From 35aff79843658aef55426d5d88be412f54d936b8 Mon Sep 17 00:00:00 2001 From: Linmiao Xu Date: Wed, 29 May 2024 21:18:55 -0400 Subject: [PATCH 074/315] Update default main net to nn-ddcfb9224cdb.nnue Created by further tuning the spsa-tuned main net `nn-c721dfca8cd3.nnue` with the same methods described in https://github.com/official-stockfish/Stockfish/pull/5254 This net was reached at 61k / 120k spsa games at 70+0.7 th 7: https://tests.stockfishchess.org/tests/view/665639d0a86388d5e27dd259 Passed STC: https://tests.stockfishchess.org/tests/view/6657d44e6b0e318cefa8d771 LLR: 2.95 (-2.94,2.94) <0.00,2.00> Total: 114688 W: 29775 L: 29344 D: 55569 Ptnml(0-2): 274, 13633, 29149, 13964, 324 Passed LTC: https://tests.stockfishchess.org/tests/view/6657e1e46b0e318cefa8d7a6 LLR: 2.94 (-2.94,2.94) <0.50,2.50> Total: 88152 W: 22412 L: 21988 D: 43752 Ptnml(0-2): 56, 9560, 24409, 10006, 45 closes https://github.com/official-stockfish/Stockfish/pull/5308 Bench: 1434678 --- src/evaluate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/evaluate.h b/src/evaluate.h index 4b3e91ac..4fab1a00 100644 --- a/src/evaluate.h +++ b/src/evaluate.h @@ -33,7 +33,7 @@ namespace Eval { // for the build process (profile-build and fishtest) to work. Do not change the // name of the macro or the location where this macro is defined, as it is used // in the Makefile/Fishtest. -#define EvalFileDefaultNameBig "nn-c721dfca8cd3.nnue" +#define EvalFileDefaultNameBig "nn-ddcfb9224cdb.nnue" #define EvalFileDefaultNameSmall "nn-baff1ede1f90.nnue" namespace NNUE { From a4ea183e7839f62665e706c13b508ccce86d5fd6 Mon Sep 17 00:00:00 2001 From: "Robert Nurnberg @ elitebook" Date: Thu, 30 May 2024 09:05:36 +0200 Subject: [PATCH 075/315] Tweak and update the WDL model This PR updates the internal WDL model, using data from 2.5M games played by SF-dev (3c62ad7). Note that the normalizing constant has increased from 329 to 368. Changes to the fitting procedure: * the value for --materialMin was increased from 10 to 17: including data with less material leads to less accuracy for larger material count values * the data was filtered to only include single thread LTC games at 60+0.6 * the data was filtered to only include games from master against patches that are (approximatively) within 5 nElo of master For more information and plots of the model see PR#5309 closes https://github.com/official-stockfish/Stockfish/pull/5309 No functional change --- src/uci.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/uci.cpp b/src/uci.cpp index ab0dae39..4b683116 100644 --- a/src/uci.cpp +++ b/src/uci.cpp @@ -382,12 +382,12 @@ WinRateParams win_rate_params(const Position& pos) { int material = pos.count() + 3 * pos.count() + 3 * pos.count() + 5 * pos.count() + 9 * pos.count(); - // The fitted model only uses data for material counts in [10, 78], and is anchored at count 58. - double m = std::clamp(material, 10, 78) / 58.0; + // The fitted model only uses data for material counts in [17, 78], and is anchored at count 58. + double m = std::clamp(material, 17, 78) / 58.0; // Return a = p_a(material) and b = p_b(material), see github.com/official-stockfish/WDL_model - constexpr double as[] = {-150.77043883, 394.96159472, -321.73403766, 406.15850091}; - constexpr double bs[] = {62.33245393, -91.02264855, 45.88486850, 51.63461272}; + constexpr double as[] = {-41.25712052, 121.47473115, -124.46958843, 411.84490997}; + constexpr double bs[] = {84.92998051, -143.66658718, 80.09988253, 49.80869370}; double a = (((as[0] * m + as[1]) * m + as[2]) * m) + as[3]; double b = (((bs[0] * m + bs[1]) * m + bs[2]) * m) + bs[3]; @@ -428,8 +428,8 @@ std::string UCIEngine::format_score(const Score& s) { // without treatment of mate and similar special scores. int UCIEngine::to_cp(Value v, const Position& pos) { - // In general, the score can be defined via the the WDL as - // (log(1/L - 1) - log(1/W - 1)) / ((log(1/L - 1) + log(1/W - 1)) + // In general, the score can be defined via the WDL as + // (log(1/L - 1) - log(1/W - 1)) / (log(1/L - 1) + log(1/W - 1)). // Based on our win_rate_model, this simply yields v / a. auto [a, b] = win_rate_params(pos); From a77a895c3b7460f86b11a3ddfe3528f5be1276b9 Mon Sep 17 00:00:00 2001 From: Viren6 <94880762+Viren6@users.noreply.github.com> Date: Thu, 30 May 2024 08:18:04 +0100 Subject: [PATCH 076/315] Add extension condition to cutoffCnt Decrease cutoffCnt increment by 1 if extension is 2 or greater. Passed STC: https://tests.stockfishchess.org/tests/view/66577a696b0e318cefa8d34d LLR: 2.94 (-2.94,2.94) <0.00,2.00> Total: 99200 W: 25703 L: 25297 D: 48200 Ptnml(0-2): 253, 11660, 25390, 12022, 275 Passed LTC: https://tests.stockfishchess.org/tests/view/665787ab6b0e318cefa8d411 LLR: 2.94 (-2.94,2.94) <0.50,2.50> Total: 124530 W: 31659 L: 31161 D: 61710 Ptnml(0-2): 58, 13578, 34489, 14088, 52 closes https://github.com/official-stockfish/Stockfish/pull/5310 bench 1623228 --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index 22e82be8..d72dbfa1 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1289,7 +1289,7 @@ moves_loop: // When in check, search starts here if (value >= beta) { - ss->cutoffCnt += 1 + !ttMove; + ss->cutoffCnt += 1 + !ttMove - (extension >= 2); assert(value >= beta); // Fail high break; } From d1a71fdaa7cc7d749495bbf5d63919a4a0b42303 Mon Sep 17 00:00:00 2001 From: FauziAkram Date: Sat, 18 May 2024 17:54:13 +0300 Subject: [PATCH 077/315] Functional simplification in the transposition table Passed STC: LLR: 2.98 (-2.94,2.94) <-1.75,0.25> Total: 154848 W: 39838 L: 39750 D: 75260 Ptnml(0-2): 404, 16214, 44087, 16328, 391 https://tests.stockfishchess.org/tests/view/664892b088b8c6a2bbe430fc Passed LTC: LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 68172 W: 17296 L: 17137 D: 33739 Ptnml(0-2): 23, 6349, 21185, 6504, 25 https://tests.stockfishchess.org/tests/view/6648aabfa0781149e383e526 closes https://github.com/official-stockfish/Stockfish/pull/5263 Bench: 1623228 --- src/tt.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tt.cpp b/src/tt.cpp index 79274f52..f95170e9 100644 --- a/src/tt.cpp +++ b/src/tt.cpp @@ -124,7 +124,7 @@ TTEntry* TranspositionTable::probe(const Key key, bool& found) const { const uint16_t key16 = uint16_t(key); // Use the low 16 bits as key inside the cluster for (int i = 0; i < ClusterSize; ++i) - if (tte[i].key16 == key16 || !tte[i].depth8) + if (tte[i].key16 == key16) return found = bool(tte[i].depth8), &tte[i]; // Find an entry to be replaced according to the replacement strategy From b280d2f06553e8c8d98379fe547f3b995cc56d59 Mon Sep 17 00:00:00 2001 From: Michael Chaly Date: Thu, 30 May 2024 19:27:12 +0300 Subject: [PATCH 078/315] Allow tt cutoffs for shallower depths in certain conditions Current master allows tt cutoffs only when depth from tt is strictly greater than current node depth. This patch also allows them when it's equal and if tt value is lower or equal to beta. Passed STC: https://tests.stockfishchess.org/tests/view/66578e2e6b0e318cefa8d447 LLR: 2.93 (-2.94,2.94) <0.00,2.00> Total: 26592 W: 6944 L: 6645 D: 13003 Ptnml(0-2): 67, 3039, 6795, 3318, 77 Passed LTC: https://tests.stockfishchess.org/tests/view/6657f46b6b0e318cefa8d7e9 LLR: 2.96 (-2.94,2.94) <0.50,2.50> Total: 142572 W: 36315 L: 35776 D: 70481 Ptnml(0-2): 70, 15666, 39288, 16179, 83 closes https://github.com/official-stockfish/Stockfish/pull/5314 Bench: 1368486 --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index d72dbfa1..638af546 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -618,7 +618,7 @@ Value Search::Worker::search( ss->ttPv = PvNode || (ss->ttHit && tte->is_pv()); // At non-PV nodes we check for an early TT cutoff - if (!PvNode && !excludedMove && tte->depth() > depth + if (!PvNode && !excludedMove && tte->depth() > depth - (ttValue <= beta) && ttValue != VALUE_NONE // Possible in case of TT access race or if !ttHit && (tte->bound() & (ttValue >= beta ? BOUND_LOWER : BOUND_UPPER))) { From 02eae528330347b4c91f3d8fa4de7fc8629a5ac0 Mon Sep 17 00:00:00 2001 From: FauziAkram Date: Thu, 30 May 2024 20:44:21 +0300 Subject: [PATCH 079/315] Simplifying the malus for putting piece en prise formula Simplifying the malus for putting piece en prise formula by merging the minor pieces and pawns (removing the pawn exclusion from the formula). Passed STC: https://tests.stockfishchess.org/tests/view/66578d9c6b0e318cefa8d441 LLR: 2.99 (-2.94,2.94) <-1.75,0.25> Total: 314272 W: 80705 L: 80786 D: 152781 Ptnml(0-2): 873, 37577, 80366, 37398, 922 Passed LTC (before rebasing): https://tests.stockfishchess.org/tests/view/6657b5ee6b0e318cefa8d6ab LLR: 2.95 (-2.94,2.94) <-1.75,0.25> Total: 117000 W: 29447 L: 29324 D: 58229 Ptnml(0-2): 47, 12877, 32535, 12988, 53 Passed LTC (also after rebasing): https://tests.stockfishchess.org/tests/view/6658803d6b0e318cefa8fd99 LLR: 2.96 (-2.94,2.94) <-1.75,0.25> Total: 244992 W: 61807 L: 61814 D: 121371 Ptnml(0-2): 125, 27420, 67414, 27411, 126 closes https://github.com/official-stockfish/Stockfish/pull/5316 Bench: 1484840 --- src/movepick.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/movepick.cpp b/src/movepick.cpp index 6c41916c..b6828a30 100644 --- a/src/movepick.cpp +++ b/src/movepick.cpp @@ -199,8 +199,7 @@ void MovePicker::score() { // malus for putting piece en prise m.value -= (pt == QUEEN ? bool(to & threatenedByRook) * 49000 : pt == ROOK ? bool(to & threatenedByMinor) * 24335 - : pt != PAWN ? bool(to & threatenedByPawn) * 14950 - : 0); + : bool(to & threatenedByPawn) * 14900); } else // Type == EVASIONS From 596fb4842bdbb872dae8023a930f1dda8b48cad1 Mon Sep 17 00:00:00 2001 From: Disservin Date: Thu, 30 May 2024 19:55:59 +0200 Subject: [PATCH 080/315] NUMA: Fix concurrency counting for windows systems If there is more than 1 processor group, std::thread::hardware_concurrency should not be used. fixes #5307 closes https://github.com/official-stockfish/Stockfish/pull/5311 No functional change --- src/misc.cpp | 41 ++++++++++++++++++----------------------- src/numa.h | 25 +++++++++++++++++++++++-- 2 files changed, 41 insertions(+), 25 deletions(-) diff --git a/src/misc.cpp b/src/misc.cpp index d48b75e1..a45becf5 100644 --- a/src/misc.cpp +++ b/src/misc.cpp @@ -34,16 +34,10 @@ // the calls at compile time), try to load them at runtime. To do this we need // first to define the corresponding function pointers. extern "C" { -using fun1_t = bool (*)(LOGICAL_PROCESSOR_RELATIONSHIP, - PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, - PDWORD); -using fun2_t = bool (*)(USHORT, PGROUP_AFFINITY); -using fun3_t = bool (*)(HANDLE, CONST GROUP_AFFINITY*, PGROUP_AFFINITY); -using fun4_t = bool (*)(USHORT, PGROUP_AFFINITY, USHORT, PUSHORT); -using fun5_t = WORD (*)(); -using fun6_t = bool (*)(HANDLE, DWORD, PHANDLE); -using fun7_t = bool (*)(LPCSTR, LPCSTR, PLUID); -using fun8_t = bool (*)(HANDLE, BOOL, PTOKEN_PRIVILEGES, DWORD, PTOKEN_PRIVILEGES, PDWORD); +using OpenProcessToken_t = bool (*)(HANDLE, DWORD, PHANDLE); +using LookupPrivilegeValueA_t = bool (*)(LPCSTR, LPCSTR, PLUID); +using AdjustTokenPrivileges_t = + bool (*)(HANDLE, BOOL, PTOKEN_PRIVILEGES, DWORD, PTOKEN_PRIVILEGES, PDWORD); } #endif @@ -488,23 +482,25 @@ static void* aligned_large_pages_alloc_windows([[maybe_unused]] size_t allocSize if (!hAdvapi32) hAdvapi32 = LoadLibrary(TEXT("advapi32.dll")); - auto fun6 = fun6_t((void (*)()) GetProcAddress(hAdvapi32, "OpenProcessToken")); - if (!fun6) + auto OpenProcessToken_f = + OpenProcessToken_t((void (*)()) GetProcAddress(hAdvapi32, "OpenProcessToken")); + if (!OpenProcessToken_f) return nullptr; - auto fun7 = fun7_t((void (*)()) GetProcAddress(hAdvapi32, "LookupPrivilegeValueA")); - if (!fun7) + auto LookupPrivilegeValueA_f = + LookupPrivilegeValueA_t((void (*)()) GetProcAddress(hAdvapi32, "LookupPrivilegeValueA")); + if (!LookupPrivilegeValueA_f) return nullptr; - auto fun8 = fun8_t((void (*)()) GetProcAddress(hAdvapi32, "AdjustTokenPrivileges")); - if (!fun8) + auto AdjustTokenPrivileges_f = + AdjustTokenPrivileges_t((void (*)()) GetProcAddress(hAdvapi32, "AdjustTokenPrivileges")); + if (!AdjustTokenPrivileges_f) return nullptr; // We need SeLockMemoryPrivilege, so try to enable it for the process - if (!fun6( // OpenProcessToken() + if (!OpenProcessToken_f( // OpenProcessToken() GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hProcessToken)) return nullptr; - if (fun7( // LookupPrivilegeValue(nullptr, SE_LOCK_MEMORY_NAME, &luid) - nullptr, "SeLockMemoryPrivilege", &luid)) + if (LookupPrivilegeValueA_f(nullptr, "SeLockMemoryPrivilege", &luid)) { TOKEN_PRIVILEGES tp{}; TOKEN_PRIVILEGES prevTp{}; @@ -516,8 +512,8 @@ static void* aligned_large_pages_alloc_windows([[maybe_unused]] size_t allocSize // Try to enable SeLockMemoryPrivilege. Note that even if AdjustTokenPrivileges() succeeds, // we still need to query GetLastError() to ensure that the privileges were actually obtained. - if (fun8( // AdjustTokenPrivileges() - hProcessToken, FALSE, &tp, sizeof(TOKEN_PRIVILEGES), &prevTp, &prevTpLen) + if (AdjustTokenPrivileges_f(hProcessToken, FALSE, &tp, sizeof(TOKEN_PRIVILEGES), &prevTp, + &prevTpLen) && GetLastError() == ERROR_SUCCESS) { // Round up size to full pages and allocate @@ -526,8 +522,7 @@ static void* aligned_large_pages_alloc_windows([[maybe_unused]] size_t allocSize PAGE_READWRITE); // Privilege no longer needed, restore previous state - fun8( // AdjustTokenPrivileges () - hProcessToken, FALSE, &prevTp, 0, nullptr, nullptr); + AdjustTokenPrivileges_f(hProcessToken, FALSE, &prevTp, 0, nullptr, nullptr); } } diff --git a/src/numa.h b/src/numa.h index 03ee1fdf..644f212e 100644 --- a/src/numa.h +++ b/src/numa.h @@ -61,6 +61,7 @@ using SetThreadSelectedCpuSetMasks_t = BOOL (*)(HANDLE, PGROUP_AFFINITY, USHORT) // https://learn.microsoft.com/en-us/windows/win32/api/processtopologyapi/nf-processtopologyapi-setthreadgroupaffinity using SetThreadGroupAffinity_t = BOOL (*)(HANDLE, const GROUP_AFFINITY*, PGROUP_AFFINITY); +using GetActiveProcessorCount_t = DWORD (*)(WORD); #endif #include "misc.h" @@ -70,8 +71,28 @@ namespace Stockfish { using CpuIndex = size_t; using NumaIndex = size_t; -inline const CpuIndex SYSTEM_THREADS_NB = - std::max(1, std::thread::hardware_concurrency()); +inline CpuIndex get_hardware_concurrency() { + CpuIndex concurrency = std::thread::hardware_concurrency(); + + // Get all processors across all processor groups on windows, since ::hardware_concurrency + // only returns the number of processors in the first group, because only these + // are available to std::thread. +#ifdef _WIN64 + HMODULE k32 = GetModuleHandle(TEXT("Kernel32.dll")); + auto GetActiveProcessorCount_f = + GetActiveProcessorCount_t((void (*)()) GetProcAddress(k32, "GetActiveProcessorCount")); + + if (GetActiveProcessorCount_f != nullptr) + { + concurrency = GetActiveProcessorCount_f(ALL_PROCESSOR_GROUPS); + } +#endif + + return concurrency; +} + +inline const CpuIndex SYSTEM_THREADS_NB = std::max(1, get_hardware_concurrency()); + // We want to abstract the purpose of storing the numa node index somewhat. // Whoever is using this does not need to know the specifics of the replication From f1bb4164bf481c44e707751aa8a4bb8da20d4fa1 Mon Sep 17 00:00:00 2001 From: Tomasz Sobczyk Date: Thu, 30 May 2024 12:56:44 +0200 Subject: [PATCH 081/315] Fix process' processor affinity determination on Windows. Specialize and privatize NumaConfig::get_process_affinity. Only enable NUMA capability for 64-bit Windows. Following #5307 and some more testing it was determined that the way affinity was being determined on Windows was incorrect, based on incorrect assumptions about GetNumaProcessorNodeEx. This patch fixes the issue by attempting to retrieve the actual process' processor affinity using Windows API. However one issue persists that is not addressable due to limitations of Windows, and will have to be considered a limitation. If affinities were set using SetThreadAffinityMask instead of SetThreadSelectedCpuSetMasks and GetProcessGroupAffinity returns more than 1 group it is NOT POSSIBLE to determine the affinity programmatically on Windows. In such case the implementation assumes no affinites are set and will consider all processors available for execution. closes https://github.com/official-stockfish/Stockfish/pull/5312 No functional change --- src/numa.h | 260 ++++++++++++++++++++++++++++++++++------------------- 1 file changed, 167 insertions(+), 93 deletions(-) diff --git a/src/numa.h b/src/numa.h index 644f212e..3c9c823a 100644 --- a/src/numa.h +++ b/src/numa.h @@ -41,7 +41,7 @@ #define _GNU_SOURCE #endif #include -#elif defined(_WIN32) +#elif defined(_WIN64) // On Windows each processor group can have up to 64 processors. // https://learn.microsoft.com/en-us/windows/win32/procthread/processor-groups @@ -61,7 +61,18 @@ using SetThreadSelectedCpuSetMasks_t = BOOL (*)(HANDLE, PGROUP_AFFINITY, USHORT) // https://learn.microsoft.com/en-us/windows/win32/api/processtopologyapi/nf-processtopologyapi-setthreadgroupaffinity using SetThreadGroupAffinity_t = BOOL (*)(HANDLE, const GROUP_AFFINITY*, PGROUP_AFFINITY); +// https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-getthreadselectedcpusetmasks +using GetThreadSelectedCpuSetMasks_t = BOOL (*)(HANDLE, PGROUP_AFFINITY, USHORT, PUSHORT); + +// https://learn.microsoft.com/en-us/windows/win32/api/winbase/nf-winbase-getprocessaffinitymask +using GetProcessAffinityMask_t = BOOL (*)(HANDLE, PDWORD_PTR, PDWORD_PTR); + +// https://learn.microsoft.com/en-us/windows/win32/api/processtopologyapi/nf-processtopologyapi-getprocessgroupaffinity +using GetProcessGroupAffinity_t = BOOL (*)(HANDLE, PUSHORT, PUSHORT); + +// https://learn.microsoft.com/en-us/windows/win32/api/winbase/nf-winbase-getactiveprocessorcount using GetActiveProcessorCount_t = DWORD (*)(WORD); + #endif #include "misc.h" @@ -115,8 +126,6 @@ class NumaReplicatedAccessToken { // in a way that doesn't require recreating it completely, and it would be complex and expensive // to maintain class invariants. // The CPU (processor) numbers always correspond to the actual numbering used by the system. -// NOTE: the numbering is only valid within the process, as for example on Windows -// every process gets a "virtualized" set of processors that respects the current affinity // The NUMA node numbers MAY NOT correspond to the system's numbering of the NUMA nodes. // In particular, empty nodes may be removed, or the user may create custom nodes. // It is guaranteed that NUMA nodes are NOT empty, i.e. every node exposed by NumaConfig @@ -133,92 +142,21 @@ class NumaConfig { add_cpu_range_to_node(NumaIndex{0}, CpuIndex{0}, numCpus - 1); } - static std::set get_process_affinity() { - std::set cpus; - - // For unsupported systems, or in case of a soft error, we may assume all processors - // are available for use. - [[maybe_unused]] auto set_to_all_cpus = [&]() { - for (CpuIndex c = 0; c < SYSTEM_THREADS_NB; ++c) - cpus.insert(c); - }; - -#if defined(__linux__) && !defined(__ANDROID__) - - // cpu_set_t by default holds 1024 entries. This may not be enough soon, - // but there is no easy way to determine how many threads there actually is. - // In this case we just choose a reasonable upper bound. - static constexpr CpuIndex MaxNumCpus = 1024 * 64; - - cpu_set_t* mask = CPU_ALLOC(MaxNumCpus); - if (mask == nullptr) - std::exit(EXIT_FAILURE); - - const size_t masksize = CPU_ALLOC_SIZE(MaxNumCpus); - - CPU_ZERO_S(masksize, mask); - - const int status = sched_getaffinity(0, masksize, mask); - - if (status != 0) - { - CPU_FREE(mask); - std::exit(EXIT_FAILURE); - } - - for (CpuIndex c = 0; c < MaxNumCpus; ++c) - if (CPU_ISSET_S(c, masksize, mask)) - cpus.insert(c); - - CPU_FREE(mask); - -#elif defined(_WIN32) - - // Windows is problematic and weird due to multiple ways of setting affinity, processor groups, - // and behaviour changes between versions. It's unclear if we can support this feature - // on Windows in the same way we do on Linux. - // Apparently when affinity is set via either start /affinity or msys2 taskset - // the function GetNumaProcessorNodeEx completely disregards the processors that we do not - // have affinity more. Moreover, the indices are shifted to start from 0, indicating that Windows - // is providing a whole new mapping of processors to this process. This is problematic in some cases - // but it at least allows us to [probably] support this affinity restriction feature by default. - // So overall, Windows appears to "virtualize" a set of processors and processor groups for every - // process. It's unclear if this assignment can change while the process is running. - // std::thread::hardware_concurrency() returns the number of processors that's consistent - // with GetNumaProcessorNodeEx, so we can just add all of them. - - set_to_all_cpus(); - -#else - - // For other systems we assume the process is allowed to execute on all processors. - set_to_all_cpus(); - -#endif - - return cpus; - } - // This function queries the system for the mapping of processors to NUMA nodes. // On Linux we utilize `lscpu` to avoid libnuma. - // On Windows we utilize GetNumaProcessorNodeEx, which has its quirks, see - // comment for Windows implementation of get_process_affinity - static NumaConfig from_system(bool respectProcessAffinity = true) { + static NumaConfig from_system([[maybe_unused]] bool respectProcessAffinity = true) { NumaConfig cfg = empty(); +#if defined(__linux__) && !defined(__ANDROID__) + std::set allowedCpus; if (respectProcessAffinity) allowedCpus = get_process_affinity(); - else - { - for (CpuIndex c = 0; c < SYSTEM_THREADS_NB; ++c) - allowedCpus.insert(c); - } - auto is_cpu_allowed = [&](CpuIndex c) { return allowedCpus.count(c) == 1; }; - -#if defined(__linux__) && !defined(__ANDROID__) + auto is_cpu_allowed = [respectProcessAffinity, &allowedCpus](CpuIndex c) { + return !respectProcessAffinity || allowedCpus.count(c) == 1; + }; // On Linux things are straightforward, since there's no processor groups and // any thread can be scheduled on all processors. @@ -270,7 +208,19 @@ class NumaConfig { cfg.add_cpu_to_node(NumaIndex{0}, c); } -#elif defined(_WIN32) +#elif defined(_WIN64) + + std::optional> allowedCpus; + + if (respectProcessAffinity) + allowedCpus = get_process_affinity(); + + // The affinity can't be determined in all cases on Windows, but we at least guarantee + // that the number of allowed processors is >= number of processors in the affinity mask. + // In case the user is not satisfied they must set the processor numbers explicitly. + auto is_cpu_allowed = [&allowedCpus](CpuIndex c) { + return !allowedCpus.has_value() || allowedCpus->count(c) == 1; + }; // Since Windows 11 and Windows Server 2022 thread affinities can span // processor groups and can be set as such by a new WinAPI function. @@ -292,14 +242,6 @@ class NumaConfig { procnum.Reserved = 0; USHORT nodeNumber; - // When start /affinity or taskset was used to run this process with restricted affinity - // GetNumaProcessorNodeEx will NOT correspond to the system's processor setup, instead - // it appears to follow a completely new processor assignment, made specifically for this process, - // in which processors that this process has affinity for are remapped, and only those are remapped, - // to form a new set of processors. In other words, we can only get processors - // which we have affinity for this way. This means that the behaviour for - // `respectProcessAffinity == false` may be unexpected when affinity is set from outside, - // while the behaviour for `respectProcessAffinity == true` is given by default. const BOOL status = GetNumaProcessorNodeEx(&procnum, &nodeNumber); const CpuIndex c = static_cast(procGroup) * WIN_PROCESSOR_GROUP_SIZE + static_cast(number); @@ -347,8 +289,7 @@ class NumaConfig { // Fallback for unsupported systems. for (CpuIndex c = 0; c < SYSTEM_THREADS_NB; ++c) - if (is_cpu_allowed(c)) - cfg.add_cpu_to_node(NumaIndex{0}, c); + cfg.add_cpu_to_node(NumaIndex{0}, c); #endif @@ -573,7 +514,7 @@ class NumaConfig { // This is defensive, allowed because this code is not performance critical. sched_yield(); -#elif defined(_WIN32) +#elif defined(_WIN64) // Requires Windows 11. No good way to set thread affinity spanning processor groups before that. HMODULE k32 = GetModuleHandle(TEXT("Kernel32.dll")); @@ -627,9 +568,9 @@ class NumaConfig { // See https://learn.microsoft.com/en-us/windows/win32/procthread/numa-support GROUP_AFFINITY affinity; std::memset(&affinity, 0, sizeof(GROUP_AFFINITY)); - affinity.Group = static_cast(n); // We use an ordered set so we're guaranteed to get the smallest cpu number here. const size_t forcedProcGroupIndex = *(nodes[n].begin()) / WIN_PROCESSOR_GROUP_SIZE; + affinity.Group = static_cast(forcedProcGroupIndex); for (CpuIndex c : nodes[n]) { const size_t procGroupIndex = c / WIN_PROCESSOR_GROUP_SIZE; @@ -733,6 +674,139 @@ class NumaConfig { return true; } + + +#if defined(__linux__) && !defined(__ANDROID__) + + static std::set get_process_affinity() { + + std::set cpus; + + // For unsupported systems, or in case of a soft error, we may assume all processors + // are available for use. + [[maybe_unused]] auto set_to_all_cpus = [&]() { + for (CpuIndex c = 0; c < SYSTEM_THREADS_NB; ++c) + cpus.insert(c); + }; + + // cpu_set_t by default holds 1024 entries. This may not be enough soon, + // but there is no easy way to determine how many threads there actually is. + // In this case we just choose a reasonable upper bound. + static constexpr CpuIndex MaxNumCpus = 1024 * 64; + + cpu_set_t* mask = CPU_ALLOC(MaxNumCpus); + if (mask == nullptr) + std::exit(EXIT_FAILURE); + + const size_t masksize = CPU_ALLOC_SIZE(MaxNumCpus); + + CPU_ZERO_S(masksize, mask); + + const int status = sched_getaffinity(0, masksize, mask); + + if (status != 0) + { + CPU_FREE(mask); + std::exit(EXIT_FAILURE); + } + + for (CpuIndex c = 0; c < MaxNumCpus; ++c) + if (CPU_ISSET_S(c, masksize, mask)) + cpus.insert(c); + + CPU_FREE(mask); + + return cpus; + } + +#elif defined(_WIN64) + + // On Windows there are two ways to set affinity, and therefore 2 ways to get it. + // These are not consistent, so we have to check both. + // In some cases it is actually not possible to determine affinity. + // For example when two different threads have affinity on different processor groups, + // set using SetThreadAffinityMask, we can't retrieve the actual affinities. + // From documentation on GetProcessAffinityMask: + // > If the calling process contains threads in multiple groups, + // > the function returns zero for both affinity masks. + // In such cases we just give up and assume we have affinity for all processors. + // nullopt means no affinity is set, that is, all processors are allowed + static std::optional> get_process_affinity() { + HMODULE k32 = GetModuleHandle(TEXT("Kernel32.dll")); + auto GetThreadSelectedCpuSetMasks_f = GetThreadSelectedCpuSetMasks_t( + (void (*)()) GetProcAddress(k32, "GetThreadSelectedCpuSetMasks")); + auto GetProcessAffinityMask_f = + GetProcessAffinityMask_t((void (*)()) GetProcAddress(k32, "GetProcessAffinityMask")); + auto GetProcessGroupAffinity_f = + GetProcessGroupAffinity_t((void (*)()) GetProcAddress(k32, "GetProcessGroupAffinity")); + + if (GetThreadSelectedCpuSetMasks_f != nullptr) + { + std::set cpus; + + USHORT RequiredMaskCount; + GetThreadSelectedCpuSetMasks_f(GetCurrentThread(), nullptr, 0, &RequiredMaskCount); + + // If RequiredMaskCount then these affinities were never set, but it's not consistent + // so GetProcessAffinityMask may still return some affinity. + if (RequiredMaskCount > 0) + { + auto groupAffinities = std::make_unique(RequiredMaskCount); + + GetThreadSelectedCpuSetMasks_f(GetCurrentThread(), groupAffinities.get(), + RequiredMaskCount, &RequiredMaskCount); + + for (USHORT i = 0; i < RequiredMaskCount; ++i) + { + const size_t procGroupIndex = groupAffinities[i].Group; + + for (size_t j = 0; j < WIN_PROCESSOR_GROUP_SIZE; ++j) + { + if (groupAffinities[i].Mask & (KAFFINITY(1) << j)) + cpus.insert(procGroupIndex * WIN_PROCESSOR_GROUP_SIZE + j); + } + } + + return cpus; + } + } + + if (GetProcessAffinityMask_f != nullptr && GetProcessGroupAffinity_f != nullptr) + { + std::set cpus; + + DWORD_PTR proc, sys; + BOOL status = GetProcessAffinityMask_f(GetCurrentProcess(), &proc, &sys); + if (status == 0) + return std::nullopt; + + // We can't determine affinity because it spans processor groups. + if (proc == 0) + return std::nullopt; + + // We are expecting a single group. + USHORT GroupCount = 1; + USHORT GroupArray[1]; + status = GetProcessGroupAffinity_f(GetCurrentProcess(), &GroupCount, GroupArray); + if (status == 0 || GroupCount != 1) + return std::nullopt; + + const size_t procGroupIndex = GroupArray[0]; + + uint64_t mask = static_cast(proc); + for (size_t j = 0; j < WIN_PROCESSOR_GROUP_SIZE; ++j) + { + if (mask & (KAFFINITY(1) << j)) + cpus.insert(procGroupIndex * WIN_PROCESSOR_GROUP_SIZE + j); + } + + return cpus; + } + + return std::nullopt; + } + +#endif }; class NumaReplicationContext; From 86694b5914c63ee5b0f964108cbd7eacca14c93a Mon Sep 17 00:00:00 2001 From: Tomasz Sobczyk Date: Thu, 30 May 2024 18:18:51 +0200 Subject: [PATCH 082/315] Replace std::from_chars with std::stoull the former was not widely supported, requiring newer compiler versions. closes https://github.com/official-stockfish/Stockfish/pull/5313 No functional change --- src/misc.cpp | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/misc.cpp b/src/misc.cpp index a45becf5..7a447329 100644 --- a/src/misc.cpp +++ b/src/misc.cpp @@ -42,16 +42,15 @@ using AdjustTokenPrivileges_t = #endif #include -#include #include #include #include #include #include +#include #include #include #include -#include #include "types.h" @@ -598,13 +597,10 @@ void aligned_large_pages_free(void* mem) { std_aligned_free(mem); } #endif size_t str_to_size_t(const std::string& s) { - size_t value; - auto result = std::from_chars(s.data(), s.data() + s.size(), value); - - if (result.ec != std::errc()) + unsigned long long value = std::stoull(s); + if (value > std::numeric_limits::max()) std::exit(EXIT_FAILURE); - - return value; + return static_cast(value); } std::string CommandLine::get_binary_directory(std::string argv0) { From c8375c2fbd398f07b8488ae2d1b12fa1251fb69f Mon Sep 17 00:00:00 2001 From: Tomasz Sobczyk Date: Thu, 30 May 2024 17:22:53 +0200 Subject: [PATCH 083/315] On linux use sysfs instead of lscpu Use sysfs (https://www.kernel.org/doc/Documentation/ABI/stable/sysfs-devices-node) to determine processor to NUMA node mapping. Avoids problems on some machines with high core count where lscpu was showing high cpu utilization. closes https://github.com/official-stockfish/Stockfish/pull/5315 No functional change --- src/misc.cpp | 13 +++++ src/misc.h | 24 ++++---- src/numa.h | 154 ++++++++++++++++++++++++++++----------------------- 3 files changed, 107 insertions(+), 84 deletions(-) diff --git a/src/misc.cpp b/src/misc.cpp index 7a447329..aa22e61f 100644 --- a/src/misc.cpp +++ b/src/misc.cpp @@ -42,12 +42,14 @@ using AdjustTokenPrivileges_t = #endif #include +#include #include #include #include #include #include #include +#include #include #include #include @@ -603,6 +605,17 @@ size_t str_to_size_t(const std::string& s) { return static_cast(value); } +std::optional read_file_to_string(const std::string& path) { + std::ifstream f(path, std::ios_base::binary); + if (!f) + return std::nullopt; + return std::string(std::istreambuf_iterator(f), std::istreambuf_iterator()); +} + +void remove_whitespace(std::string& s) { + s.erase(std::remove_if(s.begin(), s.end(), [](char c) { return std::isspace(c); }), s.end()); +} + std::string CommandLine::get_binary_directory(std::string argv0) { std::string pathSeparator; diff --git a/src/misc.h b/src/misc.h index ec7f7b76..5c0bde44 100644 --- a/src/misc.h +++ b/src/misc.h @@ -88,21 +88,12 @@ struct PipeDeleter { } }; -inline std::optional get_system_command_output(const std::string& command) { - std::unique_ptr pipe(popen(command.c_str(), "r")); - if (!pipe) - return std::nullopt; - - std::string result; - char buffer[1024]; - while (fgets(buffer, sizeof(buffer), pipe.get()) != nullptr) - result += buffer; - - return result; -} - #endif +// Reads the file as bytes. +// Returns std::nullopt if the file does not exist. +std::optional read_file_to_string(const std::string& path); + void dbg_hit_on(bool cond, int slot = 0); void dbg_mean_of(int64_t value, int slot = 0); void dbg_stdev_of(int64_t value, int slot = 0); @@ -118,9 +109,12 @@ inline TimePoint now() { } inline std::vector split(const std::string& s, const std::string& delimiter) { - size_t begin = 0; std::vector res; + if (s.empty()) + return res; + + size_t begin = 0; for (;;) { const size_t end = s.find(delimiter, begin); @@ -136,6 +130,8 @@ inline std::vector split(const std::string& s, const std::string& d return res; } +void remove_whitespace(std::string& s); + enum SyncCout { IO_LOCK, IO_UNLOCK diff --git a/src/numa.h b/src/numa.h index 3c9c823a..0553309a 100644 --- a/src/numa.h +++ b/src/numa.h @@ -33,9 +33,8 @@ #include #include -// We support linux very well, but we explicitly do NOT support Android, partially because -// there are potential issues with `lscpu`, `popen` availability, and partially because -// there's no NUMA environments running Android and there probably won't be. +// We support linux very well, but we explicitly do NOT support Android, because there's +// no affected systems, not worth maintaining. #if defined(__linux__) && !defined(__ANDROID__) #if !defined(_GNU_SOURCE) #define _GNU_SOURCE @@ -143,7 +142,9 @@ class NumaConfig { } // This function queries the system for the mapping of processors to NUMA nodes. - // On Linux we utilize `lscpu` to avoid libnuma. + // On Linux we read from standardized kernel sysfs, with a fallback to single NUMA node. + // On Windows we utilize GetNumaProcessorNodeEx, which has its quirks, see + // comment for Windows implementation of get_process_affinity static NumaConfig from_system([[maybe_unused]] bool respectProcessAffinity = true) { NumaConfig cfg = empty(); @@ -160,48 +161,52 @@ class NumaConfig { // On Linux things are straightforward, since there's no processor groups and // any thread can be scheduled on all processors. - // This command produces output in the following form - // CPU NODE - // 0 0 - // 1 0 - // 2 1 - // 3 1 - // - // On some systems it may use '-' to signify no NUMA node, in which case we assume it's in node 0. - auto lscpuOpt = get_system_command_output("lscpu -e=cpu,node"); - if (lscpuOpt.has_value()) + + // We try to gather this information from the sysfs first + // https://www.kernel.org/doc/Documentation/ABI/stable/sysfs-devices-node + + bool useFallback = false; + auto fallback = [&]() { + useFallback = true; + cfg = empty(); + }; + + // /sys/devices/system/node/online contains information about active NUMA nodes + auto nodeIdsStr = read_file_to_string("/sys/devices/system/node/online"); + if (!nodeIdsStr.has_value() || nodeIdsStr->empty()) { - - std::istringstream ss(*lscpuOpt); - - // skip the list header - ss.ignore(std::numeric_limits::max(), '\n'); - - while (true) - { - CpuIndex c; - NumaIndex n; - - ss >> c; - - if (!ss) - break; - - ss >> n; - - if (!ss) - { - ss.clear(); - std::string dummy; - ss >> dummy; - n = 0; - } - - if (is_cpu_allowed(c)) - cfg.add_cpu_to_node(n, c); - } + fallback(); } else + { + remove_whitespace(*nodeIdsStr); + for (size_t n : indices_from_shortened_string(*nodeIdsStr)) + { + // /sys/devices/system/node/node.../cpulist + std::string path = + std::string("/sys/devices/system/node/node") + std::to_string(n) + "/cpulist"; + auto cpuIdsStr = read_file_to_string(path); + // Now, we only bail if the file does not exist. Some nodes may be empty, that's fine. + // An empty node still has a file that appears to have some whitespace, so we need + // to handle that. + if (!cpuIdsStr.has_value()) + { + fallback(); + break; + } + else + { + remove_whitespace(*cpuIdsStr); + for (size_t c : indices_from_shortened_string(*cpuIdsStr)) + { + if (is_cpu_allowed(c)) + cfg.add_cpu_to_node(n, c); + } + } + } + } + + if (useFallback) { for (CpuIndex c = 0; c < SYSTEM_THREADS_NB; ++c) if (is_cpu_allowed(c)) @@ -309,38 +314,17 @@ class NumaConfig { NumaIndex n = 0; for (auto&& nodeStr : split(s, ":")) { - bool addedAnyCpuInThisNode = false; - - for (const std::string& cpuStr : split(nodeStr, ",")) + auto indices = indices_from_shortened_string(nodeStr); + if (!indices.empty()) { - if (cpuStr.empty()) - continue; - - auto parts = split(cpuStr, "-"); - if (parts.size() == 1) + for (auto idx : indices) { - const CpuIndex c = CpuIndex{str_to_size_t(parts[0])}; - if (!cfg.add_cpu_to_node(n, c)) + if (!cfg.add_cpu_to_node(n, CpuIndex(idx))) std::exit(EXIT_FAILURE); } - else if (parts.size() == 2) - { - const CpuIndex cfirst = CpuIndex{str_to_size_t(parts[0])}; - const CpuIndex clast = CpuIndex{str_to_size_t(parts[1])}; - if (!cfg.add_cpu_range_to_node(n, cfirst, clast)) - std::exit(EXIT_FAILURE); - } - else - { - std::exit(EXIT_FAILURE); - } - - addedAnyCpuInThisNode = true; - } - - if (addedAnyCpuInThisNode) n += 1; + } } cfg.customAffinity = true; @@ -675,7 +659,6 @@ class NumaConfig { return true; } - #if defined(__linux__) && !defined(__ANDROID__) static std::set get_process_affinity() { @@ -807,6 +790,37 @@ class NumaConfig { } #endif + + static std::vector indices_from_shortened_string(const std::string& s) { + std::vector indices; + + if (s.empty()) + return indices; + + for (const std::string& ss : split(s, ",")) + { + if (ss.empty()) + continue; + + auto parts = split(ss, "-"); + if (parts.size() == 1) + { + const CpuIndex c = CpuIndex{str_to_size_t(parts[0])}; + indices.emplace_back(c); + } + else if (parts.size() == 2) + { + const CpuIndex cfirst = CpuIndex{str_to_size_t(parts[0])}; + const CpuIndex clast = CpuIndex{str_to_size_t(parts[1])}; + for (size_t c = cfirst; c <= clast; ++c) + { + indices.emplace_back(c); + } + } + } + + return indices; + } }; class NumaReplicationContext; From 54e74919d478def20cb103d1e9677a696073c92f Mon Sep 17 00:00:00 2001 From: Joost VandeVondele Date: Thu, 30 May 2024 21:42:48 +0200 Subject: [PATCH 084/315] Fix cross from Linux to Windows specifies Windows 7 required https://learn.microsoft.com/en-us/cpp/porting/modifying-winver-and-win32-winnt?view=msvc-170 closes https://github.com/official-stockfish/Stockfish/pull/5319 No functional change --- src/numa.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/numa.h b/src/numa.h index 0553309a..ee84e1cf 100644 --- a/src/numa.h +++ b/src/numa.h @@ -42,6 +42,11 @@ #include #elif defined(_WIN64) + #if _WIN32_WINNT < 0x0601 + #undef _WIN32_WINNT + #define _WIN32_WINNT 0x0601 // Force to include needed API prototypes + #endif + // On Windows each processor group can have up to 64 processors. // https://learn.microsoft.com/en-us/windows/win32/procthread/processor-groups static constexpr size_t WIN_PROCESSOR_GROUP_SIZE = 64; From de1ae4949daf2c6d36c50e51c132cee808e2ade0 Mon Sep 17 00:00:00 2001 From: FauziAkram Date: Fri, 31 May 2024 04:01:02 +0300 Subject: [PATCH 085/315] Tweak first picked move (ttMove) reduction rule Tweak first picked move (ttMove) reduction rule: Instead of always resetting the reduction to 0, we now only do so if the current reduction is less than 2. If the current reduction is 2 or more, we decrease it by 2 instead. Passed STC: LLR: 2.93 (-2.94,2.94) <0.00,2.00> Total: 109504 W: 28340 L: 27919 D: 53245 Ptnml(0-2): 305, 12848, 28028, 13263, 308 https://tests.stockfishchess.org/tests/view/6658c2fa6b0e318cefa900c2 Passed LTC: LLR: 2.96 (-2.94,2.94) <0.50,2.50> Total: 130410 W: 33248 L: 32738 D: 64424 Ptnml(0-2): 53, 14139, 36328, 14615, 70 https://tests.stockfishchess.org/tests/view/6658dd8a6b0e318cefa90173 closes https://github.com/official-stockfish/Stockfish/pull/5321 bench: 1224588 --- src/search.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 638af546..4086d50f 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1149,10 +1149,10 @@ moves_loop: // When in check, search starts here if ((ss + 1)->cutoffCnt > 3) r++; - // Set reduction to 0 for first picked move (ttMove) (~2 Elo) - // Nullifies all previous reduction adjustments to ttMove and leaves only history to do them + // For first picked move (ttMove) reduce reduction + // but never allow it to go below 0 (~3 Elo) else if (move == ttMove) - r = 0; + r = std::max(0, r - 2); ss->statScore = 2 * thisThread->mainHistory[us][move.from_to()] + (*contHist[0])[movedPiece][move.to_sq()] From 0ef809ac71702ee496a88f2cf305117511b555b2 Mon Sep 17 00:00:00 2001 From: Linmiao Xu Date: Wed, 29 May 2024 13:56:15 -0400 Subject: [PATCH 086/315] Quadratic smallnet threshold with re-evaluation The threshold now decreases more quickly as pawn count decreases, using the smallnet more compared to before. Combo of two eval patches: https://tests.stockfishchess.org/tests/view/66576c5f6b0e318cefa8d26e https://tests.stockfishchess.org/tests/view/664ced40830eb9f886616a77 Passed STC: https://tests.stockfishchess.org/tests/view/66588c136b0e318cefa8ff21 LLR: 2.94 (-2.94,2.94) <0.00,2.00> Total: 112608 W: 29336 L: 28908 D: 54364 Ptnml(0-2): 344, 13223, 28718, 13699, 320 Passed LTC: https://tests.stockfishchess.org/tests/view/6658c8786b0e318cefa900f5 LLR: 2.96 (-2.94,2.94) <0.50,2.50> Total: 108288 W: 27493 L: 27026 D: 53769 Ptnml(0-2): 54, 11821, 29930, 12282, 57 closes https://github.com/official-stockfish/Stockfish/pull/5323 bench 1728074 --- src/evaluate.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index 666697dd..35bc9301 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -46,7 +46,8 @@ int Eval::simple_eval(const Position& pos, Color c) { bool Eval::use_smallnet(const Position& pos) { int simpleEval = simple_eval(pos, pos.side_to_move()); - return std::abs(simpleEval) > 992 + 6 * pos.count(); + int pawnCount = pos.count(); + return std::abs(simpleEval) > 992 + 6 * pawnCount * pawnCount / 16; } // Evaluate is the evaluator for the outer world. It returns a static evaluation @@ -67,7 +68,7 @@ Value Eval::evaluate(const Eval::NNUE::Networks& networks, : networks.big.evaluate(pos, &caches.big, true, &nnueComplexity); // Re-evaluate the position when higher eval accuracy is worth the time spent - if (smallNet && nnue * simpleEval < 0) + if (smallNet && (nnue * simpleEval < 0 || std::abs(nnue) < 250)) { nnue = networks.big.evaluate(pos, &caches.big, true, &nnueComplexity); smallNet = false; From b34a690cd4aa6d828ae0f47b427167f4e6392db7 Mon Sep 17 00:00:00 2001 From: rn5f107s2 Date: Thu, 30 May 2024 21:18:42 +0200 Subject: [PATCH 087/315] MCP more after a bad singular search The idea is, that if we have the information that the singular search failed low and therefore produced an upperbound score, we can use the score from singularsearch as approximate upperbound as to what bestValue our non ttMoves will produce. If this value is well below alpha, we assume that all non-ttMoves will score below alpha and therfore can skip more moves. This patch also sets up variables for future patches wanting to use teh singular search result outside of singular extensions, in singularBound and singularValue, meaning further patches using this search result to affect various pruning techniques can be tried. Passed STC: https://tests.stockfishchess.org/tests/view/6658d13e6b0e318cefa90120 LLR: 2.94 (-2.94,2.94) <0.00,2.00> Total: 85632 W: 22112 L: 21725 D: 41795 Ptnml(0-2): 243, 10010, 21947, 10349, 267 Passed LTC: https://tests.stockfishchess.org/tests/view/6658dd356b0e318cefa9016a LLR: 2.94 (-2.94,2.94) <0.50,2.50> Total: 243978 W: 62014 L: 61272 D: 120692 Ptnml(0-2): 128, 26598, 67791, 27348, 124 closes https://github.com/official-stockfish/Stockfish/pull/5325 bench 1397172 --- src/search.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 4086d50f..f738530a 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -550,11 +550,12 @@ Value Search::Worker::search( Key posKey; Move ttMove, move, excludedMove, bestMove; Depth extension, newDepth; - Value bestValue, value, ttValue, eval, maxValue, probCutBeta; + Value bestValue, value, ttValue, eval, maxValue, probCutBeta, singularValue; bool givesCheck, improving, priorCapture, opponentWorsening; bool capture, moveCountPruning, ttCapture; Piece movedPiece; int moveCount, captureCount, quietCount; + Bound singularBound; // Step 1. Initialize node Worker* thisThread = this; @@ -923,6 +924,8 @@ moves_loop: // When in check, search starts here value = bestValue; moveCountPruning = false; + singularValue = VALUE_INFINITE; + singularBound = BOUND_NONE; // Step 13. Loop through all pseudo-legal moves until no moves remain // or a beta cutoff occurs. @@ -972,7 +975,9 @@ moves_loop: // When in check, search starts here if (!rootNode && pos.non_pawn_material(us) && bestValue > VALUE_TB_LOSS_IN_MAX_PLY) { // Skip quiet moves if movecount exceeds our FutilityMoveCount threshold (~8 Elo) - moveCountPruning = moveCount >= futility_move_count(improving, depth); + moveCountPruning = + moveCount >= futility_move_count(improving, depth) + - (singularBound == BOUND_UPPER && singularValue < alpha - 50); // Reduced depth of the next LMR search int lmrDepth = newDepth - r; @@ -1058,8 +1063,9 @@ moves_loop: // When in check, search starts here Depth singularDepth = newDepth / 2; ss->excludedMove = move; - value = + value = singularValue = search(pos, ss, singularBeta - 1, singularBeta, singularDepth, cutNode); + singularBound = singularValue >= singularBeta ? BOUND_LOWER : BOUND_UPPER; ss->excludedMove = Move::none(); if (value < singularBeta) From cb4a62311985f685ba6f5457851527a3289073e6 Mon Sep 17 00:00:00 2001 From: Linmiao Xu Date: Mon, 27 May 2024 10:40:25 -0400 Subject: [PATCH 088/315] Update default smallnet to nn-37f18f62d772.nnue Created by training L1-128 from scratch with: - skipping based on simple eval in the trainer, for compatibility with regular binpacks without requiring pre-filtering all binpacks - minimum simple eval of 950, lower than 1000 previously - usage of some hse-v1 binpacks with minimum simple eval 1000 - addition of hse-v6 binpacks with minimum simple eval 500 - permuting the FT with 10k positions from fishpack32.binpack - torch.compile to speed up smallnet training Training is significantly slower when using non-pre-filtered binpacks due to the increased skipping required. This net was reached at epoch 339. ``` experiment-name: 128--S1-hse-1k-T80-v6-unfilt-less-sf--se-gt950-no-wld-skip training-dataset: /data/: - dfrc99-16tb7p.v2.min.binpack /data/hse-v1/: - leela96-filt-v2.min.high-simple-eval-1k.min-v2.binpack - test60-novdec2021-12tb7p-filter-v6-dd.min-mar2023.unmin.high-simple-eval-1k.min-v2.binpack - test77-nov2021-2tb7p.no-db.min.high-simple-eval-1k.min-v2.binpack - test77-dec2021-16tb7p.no-db.min.high-simple-eval-1k.min-v2.binpack - test77-jan2022-2tb7p.high-simple-eval-1k.min-v2.binpack - test78-jantomay2022-16tb7p-filter-v6-dd.min-mar2023.unmin.high-simple-eval-1k.min-v2.binpack - test78-juntosep2022-16tb7p-filter-v6-dd.min-mar2023.unmin.high-simple-eval-1k.min-v2.binpack - test79-apr2022-16tb7p.min.high-simple-eval-1k.min-v2.binpack - test79-may2022-16tb7p-filter-v6-dd.min-mar2023.unmin.high-simple-eval-1k.min-v2.binpack - test80-apr2022-16tb7p.min.high-simple-eval-1k.min-v2.binpack - test80-may2022-16tb7p.high-simple-eval-1k.min-v2.binpack - test80-jun2022-16tb7p-filter-v6-dd.min-mar2023.unmin.high-simple-eval-1k.min-v2.binpack - test80-jul2022-16tb7p.v6-dd.min.high-simple-eval-1k.min-v2.binpack - test80-sep2022-16tb7p-filter-v6-dd.min-mar2023.unmin.high-simple-eval-1k.min-v2.binpack - test80-nov2022-16tb7p-v6-dd.min.high-simple-eval-1k.min-v2.binpack /data/S11-mar2024/: - test80-2022-08-aug-16tb7p.v6-dd.min.binpack - test80-2022-10-oct-16tb7p.v6-dd.binpack - test80-2022-12-dec-16tb7p.min.binpack - test80-2023-01-jan-16tb7p.v6-sk20.min.binpack - test80-2023-02-feb-16tb7p.v6-sk20.min.binpack - test80-2023-03-mar-2tb7p.v6-sk16.min.binpack - test80-2023-04-apr-2tb7p.v6-sk16.min.binpack - test80-2023-05-may-2tb7p.v6.min.binpack - test80-2023-06-jun-2tb7p.binpack.min-v2.binpack - test80-2023-07-jul-2tb7p.binpack.min-v2.binpack - test80-2023-08-aug-2tb7p.v6.min.binpack - test80-2023-09-sep-2tb7p.binpack.hse-v6.binpack - test80-2023-10-oct-2tb7p.binpack.hse-v6.binpack - test80-2023-11-nov-2tb7p.binpack.hse-v6.binpack - test80-2023-12-dec-2tb7p.binpack.hse-v6.binpack - test80-2024-01-jan-2tb7p.binpack.hse-v6.binpack - test80-2024-02-feb-2tb7p.binpack.hse-v6.binpack - test80-2024-03-mar-2tb7p.binpack wld-fen-skipping: False nnue-pytorch-branch: linrock/nnue-pytorch/128-skipSimpleEval-lt950-torch-compile engine-test-branch: linrock/Stockfish/L1-128-nolazy engine-base-branch: linrock/Stockfish/L1-128 start-from-engine-test-net: False num-epochs: 500 start-lambda: 1.0 end-lambda: 1.0 ``` Training data can be found at: https://robotmoon.com/nnue-training-data/ Passed STC: https://tests.stockfishchess.org/tests/view/66549c16a86388d5e27daff5 LLR: 2.93 (-2.94,2.94) <0.00,2.00> Total: 196608 W: 51254 L: 50697 D: 94657 Ptnml(0-2): 722, 23244, 49796, 23839, 703 Passed LTC: https://tests.stockfishchess.org/tests/view/6658d1aa6b0e318cefa90122 LLR: 2.96 (-2.94,2.94) <0.50,2.50> Total: 122538 W: 31332 L: 30835 D: 60371 Ptnml(0-2): 69, 13407, 33811, 13922, 60 closes https://github.com/official-stockfish/Stockfish/pull/5333 bench --- src/evaluate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/evaluate.h b/src/evaluate.h index 4fab1a00..bdef9ceb 100644 --- a/src/evaluate.h +++ b/src/evaluate.h @@ -34,7 +34,7 @@ namespace Eval { // name of the macro or the location where this macro is defined, as it is used // in the Makefile/Fishtest. #define EvalFileDefaultNameBig "nn-ddcfb9224cdb.nnue" -#define EvalFileDefaultNameSmall "nn-baff1ede1f90.nnue" +#define EvalFileDefaultNameSmall "nn-37f18f62d772.nnue" namespace NNUE { struct Networks; From 783dfc2eb235236ff799618436d68d0c1a3f3807 Mon Sep 17 00:00:00 2001 From: Michael Chaly Date: Sat, 1 Jun 2024 20:44:06 +0300 Subject: [PATCH 089/315] Adjust return bonus from tt cutoffs at fail highs This is reintroduction of the recently simplified logic - if positive tt cutoff occurs return not a tt value but smth between it and beta. Difference is that instead of static linear combination there we use basically the same formula as we do in the main search - with the only difference being using tt depth instead of depth, which makes a lot of sense. Passed STC: https://tests.stockfishchess.org/tests/view/665b3a34f4a1fd0c208ea870 LLR: 2.95 (-2.94,2.94) <0.00,2.00> Total: 54944 W: 14239 L: 13896 D: 26809 Ptnml(0-2): 151, 6407, 14008, 6760, 146 Passed LTC: https://tests.stockfishchess.org/tests/view/665b520011645bd3d3fac341 LLR: 2.94 (-2.94,2.94) <0.50,2.50> Total: 90540 W: 23070 L: 22640 D: 44830 Ptnml(0-2): 39, 9903, 24965, 10315, 48 closes https://github.com/official-stockfish/Stockfish/pull/5336 bench 1381237 --- src/search.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/search.cpp b/src/search.cpp index f738530a..514b7b7d 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -640,7 +640,12 @@ Value Search::Worker::search( // Partial workaround for the graph history interaction problem // For high rule50 counts don't produce transposition table cutoffs. if (pos.rule50_count() < 90) + { + if (ttValue >= beta && std::abs(ttValue) < VALUE_TB_WIN_IN_MAX_PLY + && std::abs(beta) < VALUE_TB_WIN_IN_MAX_PLY) + ttValue = (ttValue * tte->depth() + beta) / (tte->depth() + 1); return ttValue; + } } // Step 5. Tablebases probe From b0870cf528ef90e8873719a36a448dafd73e3aee Mon Sep 17 00:00:00 2001 From: Joost VandeVondele Date: Sat, 1 Jun 2024 15:13:41 +0200 Subject: [PATCH 090/315] Avoid changing bestvalue in the case the ttValue contains mate scores, do not return them as bestValue, since they are not proven. passed STC https://tests.stockfishchess.org/tests/view/665b1ea5586058766677cfa3 LLR: 2.93 (-2.94,2.94) <-1.75,0.25> Total: 58912 W: 15319 L: 15130 D: 28463 Ptnml(0-2): 141, 6562, 15854, 6765, 134 passed LTC: https://tests.stockfishchess.org/tests/view/665b2712586058766677cfc4 LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 141666 W: 35976 L: 35879 D: 69811 Ptnml(0-2): 61, 15513, 39584, 15618, 57 closes https://github.com/official-stockfish/Stockfish/pull/5335 Bench: 1336115 --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index 514b7b7d..4dc7d330 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1495,7 +1495,7 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta, to_corrected_static_eval(unadjustedStaticEval, *thisThread, pos); // ttValue can be used as a better position evaluation (~13 Elo) - if (ttValue != VALUE_NONE + if (std::abs(ttValue) < VALUE_TB_WIN_IN_MAX_PLY && (tte->bound() & (ttValue > bestValue ? BOUND_LOWER : BOUND_UPPER))) bestValue = ttValue; } From ec1cda1d819f534c8d0bfc4624836157bc548eb6 Mon Sep 17 00:00:00 2001 From: FauziAkram Date: Fri, 31 May 2024 22:29:29 +0300 Subject: [PATCH 091/315] Simplify histories movepick formula Passed STC: LLR: 2.93 (-2.94,2.94) <-1.75,0.25> Total: 81440 W: 21100 L: 20929 D: 39411 Ptnml(0-2): 248, 9659, 20718, 9864, 231 https://tests.stockfishchess.org/tests/view/6659a8b7ea624d64ea5f3208 Passed LTC: LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 85758 W: 21763 L: 21607 D: 42388 Ptnml(0-2): 34, 9606, 23463, 9722, 54 https://tests.stockfishchess.org/tests/view/6659d7bff426908fcc6b692c closes https://github.com/official-stockfish/Stockfish/pull/5326 bench: 1280472 --- src/movepick.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/movepick.cpp b/src/movepick.cpp index b6828a30..d3335907 100644 --- a/src/movepick.cpp +++ b/src/movepick.cpp @@ -178,7 +178,7 @@ void MovePicker::score() { Square to = m.to_sq(); // histories - m.value = 2 * (*mainHistory)[pos.side_to_move()][m.from_to()]; + m.value = (*mainHistory)[pos.side_to_move()][m.from_to()]; m.value += 2 * (*pawnHistory)[pawn_structure_index(pos)][pc][to]; m.value += 2 * (*continuationHistory[0])[pc][to]; m.value += (*continuationHistory[1])[pc][to]; From 180cab443896a6a37a3c39852ff124ce856987d2 Mon Sep 17 00:00:00 2001 From: MinetaS Date: Sat, 1 Jun 2024 06:11:51 +0900 Subject: [PATCH 092/315] Simplify 50 move rule dampening Refactor the logic of 50 move rule dampening by removing a constant. Passed non-regression STC: LLR: 2.93 (-2.94,2.94) <-1.75,0.25> Total: 35232 W: 9214 L: 8992 D: 17026 Ptnml(0-2): 114, 4081, 8999, 4313, 109 https://tests.stockfishchess.org/tests/view/665a329013d08af3c1725610 Passed non-regression LTC: LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 38406 W: 9732 L: 9530 D: 19144 Ptnml(0-2): 14, 4132, 10708, 4336, 13 https://tests.stockfishchess.org/tests/view/665a370913d08af3c1725651 https://github.com/official-stockfish/Stockfish/pull/5327 Bench: 1059739 --- src/evaluate.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index 35bc9301..eaf7ab5f 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -81,10 +81,10 @@ Value Eval::evaluate(const Eval::NNUE::Networks& networks, int material = 300 * pos.count() + 350 * pos.count() + 400 * pos.count() + 640 * pos.count() + 1200 * pos.count(); - v = (nnue * (34300 + material) + optimism * (4400 + material)) / 35967; + v = (nnue * (34300 + material) + optimism * (4400 + material)) / 36672; // Damp down the evaluation linearly when shuffling - v = v * (204 - pos.rule50_count()) / 208; + v -= v * pos.rule50_count() / 212; // Guarantee evaluation does not hit the tablebase range v = std::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1); From b009c43254c3483dd356e28b5b66ba62a724aa1d Mon Sep 17 00:00:00 2001 From: xoto10 <23479932+xoto10@users.noreply.github.com> Date: Sat, 1 Jun 2024 17:10:06 +0100 Subject: [PATCH 093/315] Simplify tm, removing faster 1st move and 1.13 extraTime. Passed STC 10+0.1 : LLR: 2.93 (-2.94,2.94) <-1.75,0.25> Total: 349760 W: 90112 L: 90231 D: 169417 Ptnml(0-2): 784, 37970, 97496, 37841, 789 https://tests.stockfishchess.org/tests/view/665aeee00223e235f05b7d21 Passed LTC 60+0.6 : LLR: 2.95 (-2.94,2.94) <-1.75,0.25> Total: 140082 W: 35463 L: 35370 D: 69249 Ptnml(0-2): 59, 13492, 42851, 13575, 64 https://tests.stockfishchess.org/tests/view/665b15e78da109e362924e5a closes https://github.com/official-stockfish/Stockfish/pull/5334 No functional change --- src/search.cpp | 3 +-- src/search.h | 1 - src/thread.cpp | 7 ++++--- src/timeman.cpp | 18 ++++-------------- src/timeman.h | 1 - 5 files changed, 9 insertions(+), 21 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 4dc7d330..35de756f 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -160,8 +160,7 @@ void Search::Worker::start_searching() { return; } - main_manager()->tm.init(limits, rootPos.side_to_move(), rootPos.game_ply(), options, - main_manager()->originalPly, main_manager()->originalTimeAdjust); + main_manager()->tm.init(limits, rootPos.side_to_move(), rootPos.game_ply(), options, main_manager()->originalTimeAdjust); tt.new_search(); if (rootMoves.empty()) diff --git a/src/search.h b/src/search.h index 7cff10d5..01f7b8bd 100644 --- a/src/search.h +++ b/src/search.h @@ -209,7 +209,6 @@ class SearchManager: public ISearchManager { Stockfish::TimeManagement tm; double originalTimeAdjust; - int originalPly; int callsCnt; std::atomic_bool ponder; diff --git a/src/thread.cpp b/src/thread.cpp index 71134ead..1b0fffc3 100644 --- a/src/thread.cpp +++ b/src/thread.cpp @@ -213,12 +213,13 @@ void ThreadPool::clear() { for (auto&& th : threads) th->wait_for_search_finished(); + // These two affect the time taken on the first move of a game: + main_manager()->bestPreviousAverageScore = VALUE_INFINITE; + main_manager()->previousTimeReduction = 0.85; + main_manager()->callsCnt = 0; main_manager()->bestPreviousScore = VALUE_INFINITE; - main_manager()->bestPreviousAverageScore = VALUE_INFINITE; - main_manager()->originalPly = -1; main_manager()->originalTimeAdjust = -1; - main_manager()->previousTimeReduction = 1.0; main_manager()->tm.clear(); } diff --git a/src/timeman.cpp b/src/timeman.cpp index f6ca298a..9de70fdc 100644 --- a/src/timeman.cpp +++ b/src/timeman.cpp @@ -48,7 +48,6 @@ void TimeManagement::init(Search::LimitsType& limits, Color us, int ply, const OptionsMap& options, - int& originalPly, double& originalTimeAdjust) { TimePoint npmsec = TimePoint(options["nodestime"]); @@ -60,9 +59,6 @@ void TimeManagement::init(Search::LimitsType& limits, if (limits.time[us] == 0) return; - if (originalPly == -1) - originalPly = ply; - TimePoint moveOverhead = TimePoint(options["Move Overhead"]); // optScale is a percentage of available time to use for the current move. @@ -104,20 +100,14 @@ void TimeManagement::init(Search::LimitsType& limits, TimePoint timeLeft = std::max(TimePoint(1), limits.time[us] + limits.inc[us] * (mtg - 1) - moveOverhead * (2 + mtg)); - // Extra time according to timeLeft - if (originalTimeAdjust < 0) - originalTimeAdjust = 0.2078 + 0.1623 * std::log10(timeLeft); - // x basetime (+ z increment) // If there is a healthy increment, timeLeft can exceed the actual available // game time for the current move, so also cap to a percentage of available game time. if (limits.movestogo == 0) { - // Use extra time with larger increments - double optExtra = scaledInc < 500 ? 1.0 : 1.13; - if (ply - originalPly < 2) - optExtra *= 0.95; - optExtra *= originalTimeAdjust; + // Extra time according to timeLeft + if (originalTimeAdjust < 0) + originalTimeAdjust = 0.3285 * std::log10(timeLeft) - 0.4830; // Calculate time constants based on current time left. double logTimeInSec = std::log10(scaledTime / 1000.0); @@ -126,7 +116,7 @@ void TimeManagement::init(Search::LimitsType& limits, optScale = std::min(0.0122 + std::pow(ply + 2.95, 0.462) * optConstant, 0.213 * limits.time[us] / timeLeft) - * optExtra; + * originalTimeAdjust; maxScale = std::min(6.64, maxConstant + ply / 12.0); } diff --git a/src/timeman.h b/src/timeman.h index 8b763089..10207a8a 100644 --- a/src/timeman.h +++ b/src/timeman.h @@ -40,7 +40,6 @@ class TimeManagement { Color us, int ply, const OptionsMap& options, - int& originalPly, double& originalTimeAdjust); TimePoint optimum() const; From c17d73c554054db8cdc6eb39d667c1dca47d3818 Mon Sep 17 00:00:00 2001 From: Linmiao Xu Date: Sat, 1 Jun 2024 11:07:08 -0400 Subject: [PATCH 094/315] Simplify statScore divisor into a constant Passed non-regression STC: https://tests.stockfishchess.org/tests/view/665b392ff4a1fd0c208ea864 LLR: 2.93 (-2.94,2.94) <-1.75,0.25> Total: 114752 W: 29628 L: 29495 D: 55629 Ptnml(0-2): 293, 13694, 29269, 13827, 293 Passed non-regression LTC: https://tests.stockfishchess.org/tests/view/665b588c11645bd3d3fac467 LLR: 2.95 (-2.94,2.94) <-1.75,0.25> Total: 65322 W: 16549 L: 16373 D: 32400 Ptnml(0-2): 30, 7146, 18133, 7322, 30 closes https://github.com/official-stockfish/Stockfish/pull/5337 bench 1241443 --- src/numa.h | 2 +- src/search.cpp | 5 +++-- src/thread.cpp | 6 +++--- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/numa.h b/src/numa.h index ee84e1cf..967e24a6 100644 --- a/src/numa.h +++ b/src/numa.h @@ -564,7 +564,7 @@ class NumaConfig { { const size_t procGroupIndex = c / WIN_PROCESSOR_GROUP_SIZE; const size_t idxWithinProcGroup = c % WIN_PROCESSOR_GROUP_SIZE; - // We skip processors that are not in the same proccessor group. + // We skip processors that are not in the same processor group. // If everything was set up correctly this will never be an issue, // but we have to account for bad NUMA node specification. if (procGroupIndex != forcedProcGroupIndex) diff --git a/src/search.cpp b/src/search.cpp index 35de756f..84ca93f8 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -160,7 +160,8 @@ void Search::Worker::start_searching() { return; } - main_manager()->tm.init(limits, rootPos.side_to_move(), rootPos.game_ply(), options, main_manager()->originalTimeAdjust); + main_manager()->tm.init(limits, rootPos.side_to_move(), rootPos.game_ply(), options, + main_manager()->originalTimeAdjust); tt.new_search(); if (rootMoves.empty()) @@ -1169,7 +1170,7 @@ moves_loop: // When in check, search starts here + (*contHist[1])[movedPiece][move.to_sq()] - 5169; // Decrease/increase reduction for moves with a good/bad history (~8 Elo) - r -= ss->statScore / (12219 - std::min(depth, 13) * 120); + r -= ss->statScore / 11049; // Step 17. Late moves reduction / extension (LMR, ~117 Elo) if (depth >= 2 && moveCount > 1 + rootNode) diff --git a/src/thread.cpp b/src/thread.cpp index 1b0fffc3..a36c2efb 100644 --- a/src/thread.cpp +++ b/src/thread.cpp @@ -217,9 +217,9 @@ void ThreadPool::clear() { main_manager()->bestPreviousAverageScore = VALUE_INFINITE; main_manager()->previousTimeReduction = 0.85; - main_manager()->callsCnt = 0; - main_manager()->bestPreviousScore = VALUE_INFINITE; - main_manager()->originalTimeAdjust = -1; + main_manager()->callsCnt = 0; + main_manager()->bestPreviousScore = VALUE_INFINITE; + main_manager()->originalTimeAdjust = -1; main_manager()->tm.clear(); } From 8aaae0367cfed7ae5da54d330b65d76d4b1b13ae Mon Sep 17 00:00:00 2001 From: Joost VandeVondele Date: Sun, 2 Jun 2024 09:18:19 +0200 Subject: [PATCH 095/315] Revert "Adjust return bonus from tt cutoffs at fail highs" This reverts commit 783dfc2eb235236ff799618436d68d0c1a3f3807. could lead to a division by zero for: ttValue = (ttValue * tte->depth() + beta) / (tte->depth() + 1) as other threads can overwrite the tte with a QS depth of -1. closes https://github.com/official-stockfish/Stockfish/pull/5338 Bench: 1280020 --- src/search.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 84ca93f8..a2a75af0 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -640,12 +640,7 @@ Value Search::Worker::search( // Partial workaround for the graph history interaction problem // For high rule50 counts don't produce transposition table cutoffs. if (pos.rule50_count() < 90) - { - if (ttValue >= beta && std::abs(ttValue) < VALUE_TB_WIN_IN_MAX_PLY - && std::abs(beta) < VALUE_TB_WIN_IN_MAX_PLY) - ttValue = (ttValue * tte->depth() + beta) / (tte->depth() + 1); return ttValue; - } } // Step 5. Tablebases probe From a2a7edf4c8fa145667135bf1bc7f4f67016f7608 Mon Sep 17 00:00:00 2001 From: Tomasz Sobczyk Date: Sun, 2 Jun 2024 20:39:25 +0200 Subject: [PATCH 096/315] Fix GetProcessGroupAffinity call `GetProcessGroupAffinity` appears to require 4 byte alignment for `GroupArray` memory. See https://stackoverflow.com/q/78567676 for further information closes https://github.com/official-stockfish/Stockfish/pull/5340 No functional change --- src/numa.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/numa.h b/src/numa.h index 967e24a6..5934a0cd 100644 --- a/src/numa.h +++ b/src/numa.h @@ -773,8 +773,8 @@ class NumaConfig { return std::nullopt; // We are expecting a single group. - USHORT GroupCount = 1; - USHORT GroupArray[1]; + USHORT GroupCount = 1; + alignas(4) USHORT GroupArray[1]; status = GetProcessGroupAffinity_f(GetCurrentProcess(), &GroupCount, GroupArray); if (status == 0 || GroupCount != 1) return std::nullopt; From 00a28ae325688346e63a452b2050bd1491085359 Mon Sep 17 00:00:00 2001 From: Disservin Date: Fri, 31 May 2024 10:53:10 +0200 Subject: [PATCH 097/315] Add helpers for managing aligned memory Previously, we had two type aliases, LargePagePtr and AlignedPtr, which required manually initializing the aligned memory for the pointer. The new helpers: - make_unique_aligned - make_unique_large_page are now available for allocating aligned memory (with large pages). They behave similarly to std::make_unique, ensuring objects allocated with these functions follow RAII. The old approach had issues with initializing non-trivial types or arrays of objects. The evaluation function of the network is now a unique pointer to an array instead of an array of unique pointers. Memory related functions have been moved into memory.h Passed High Hash Pressure Test Non-Regression STC: https://tests.stockfishchess.org/tests/view/665b2b36586058766677cfd2 LLR: 2.93 (-2.94,2.94) <-1.75,0.25> Total: 476992 W: 122426 L: 122677 D: 231889 Ptnml(0-2): 1145, 51027, 134419, 50744, 1161 Failed Normal Non-Regression STC: https://tests.stockfishchess.org/tests/view/665b2997586058766677cfc8 LLR: -2.94 (-2.94,2.94) <-1.75,0.25> Total: 877312 W: 225233 L: 226395 D: 425684 Ptnml(0-2): 2110, 94642, 246239, 93630, 2035 Probably a fluke since there shouldn't be a real slowndown and it has also passed the high hash pressure test. closes https://github.com/official-stockfish/Stockfish/pull/5332 No functional change --- src/Makefile | 8 +- src/memory.cpp | 229 +++++++++++++++++++++++++++++++++++++++++++ src/memory.h | 215 ++++++++++++++++++++++++++++++++++++++++ src/misc.cpp | 199 +------------------------------------ src/misc.h | 48 +-------- src/nnue/network.cpp | 77 +++++---------- src/nnue/network.h | 6 +- src/numa.h | 1 + src/thread.h | 4 +- src/tt.cpp | 7 +- src/tt.h | 10 +- 11 files changed, 492 insertions(+), 312 deletions(-) create mode 100644 src/memory.cpp create mode 100644 src/memory.h diff --git a/src/Makefile b/src/Makefile index 5119b615..29c4f879 100644 --- a/src/Makefile +++ b/src/Makefile @@ -55,7 +55,7 @@ PGOBENCH = $(WINE_PATH) ./$(EXE) bench SRCS = benchmark.cpp bitboard.cpp evaluate.cpp main.cpp \ misc.cpp movegen.cpp movepick.cpp position.cpp \ search.cpp thread.cpp timeman.cpp tt.cpp uci.cpp ucioption.cpp tune.cpp syzygy/tbprobe.cpp \ - nnue/nnue_misc.cpp nnue/features/half_ka_v2_hm.cpp nnue/network.cpp engine.cpp score.cpp + nnue/nnue_misc.cpp nnue/features/half_ka_v2_hm.cpp nnue/network.cpp engine.cpp score.cpp memory.cpp HEADERS = benchmark.h bitboard.h evaluate.h misc.h movegen.h movepick.h \ nnue/nnue_misc.h nnue/features/half_ka_v2_hm.h nnue/layers/affine_transform.h \ @@ -63,7 +63,7 @@ HEADERS = benchmark.h bitboard.h evaluate.h misc.h movegen.h movepick.h \ nnue/layers/sqr_clipped_relu.h nnue/nnue_accumulator.h nnue/nnue_architecture.h \ nnue/nnue_common.h nnue/nnue_feature_transformer.h position.h \ search.h syzygy/tbprobe.h thread.h thread_win32_osx.h timeman.h \ - tt.h tune.h types.h uci.h ucioption.h perft.h nnue/network.h engine.h score.h numa.h + tt.h tune.h types.h uci.h ucioption.h perft.h nnue/network.h engine.h score.h numa.h memory.h OBJS = $(notdir $(SRCS:.cpp=.o)) @@ -489,8 +489,8 @@ ifeq ($(COMP),clang) endif ifeq ($(KERNEL),Darwin) - CXXFLAGS += -mmacosx-version-min=10.14 - LDFLAGS += -mmacosx-version-min=10.14 + CXXFLAGS += -mmacosx-version-min=10.15 + LDFLAGS += -mmacosx-version-min=10.15 ifneq ($(arch),any) CXXFLAGS += -arch $(arch) LDFLAGS += -arch $(arch) diff --git a/src/memory.cpp b/src/memory.cpp new file mode 100644 index 00000000..565b39b2 --- /dev/null +++ b/src/memory.cpp @@ -0,0 +1,229 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include "memory.h" + +#include + +#if __has_include("features.h") + #include +#endif + +#if defined(__linux__) && !defined(__ANDROID__) + #include +#endif + +#if defined(__APPLE__) || defined(__ANDROID__) || defined(__OpenBSD__) \ + || (defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC) && !defined(_WIN32)) \ + || defined(__e2k__) + #define POSIXALIGNEDALLOC + #include +#endif + +#ifdef _WIN32 + #if _WIN32_WINNT < 0x0601 + #undef _WIN32_WINNT + #define _WIN32_WINNT 0x0601 // Force to include needed API prototypes + #endif + + #ifndef NOMINMAX + #define NOMINMAX + #endif + + #include // std::hex, std::dec + #include // std::cerr + #include // std::endl + #include +// The needed Windows API for processor groups could be missed from old Windows +// versions, so instead of calling them directly (forcing the linker to resolve +// the calls at compile time), try to load them at runtime. To do this we need +// first to define the corresponding function pointers. +extern "C" { +using OpenProcessToken_t = bool (*)(HANDLE, DWORD, PHANDLE); +using LookupPrivilegeValueA_t = bool (*)(LPCSTR, LPCSTR, PLUID); +using AdjustTokenPrivileges_t = + bool (*)(HANDLE, BOOL, PTOKEN_PRIVILEGES, DWORD, PTOKEN_PRIVILEGES, PDWORD); +} +#endif + + +namespace Stockfish { + +// Wrapper for systems where the c++17 implementation +// does not guarantee the availability of aligned_alloc(). Memory allocated with +// std_aligned_alloc() must be freed with std_aligned_free(). +void* std_aligned_alloc(size_t alignment, size_t size) { + // Apple requires 10.15, which is enforced in the makefile +#if defined(_ISOC11_SOURCE) || defined(__APPLE__) + return aligned_alloc(alignment, size); +#elif defined(POSIXALIGNEDALLOC) + void* mem; + return posix_memalign(&mem, alignment, size) ? nullptr : mem; +#elif defined(_WIN32) && !defined(_M_ARM) && !defined(_M_ARM64) + return _mm_malloc(size, alignment); +#elif defined(_WIN32) + return _aligned_malloc(size, alignment); +#else + return std::aligned_alloc(alignment, size); +#endif +} + +void std_aligned_free(void* ptr) { + +#if defined(POSIXALIGNEDALLOC) + free(ptr); +#elif defined(_WIN32) && !defined(_M_ARM) && !defined(_M_ARM64) + _mm_free(ptr); +#elif defined(_WIN32) + _aligned_free(ptr); +#else + free(ptr); +#endif +} + +// aligned_large_pages_alloc() will return suitably aligned memory, if possible using large pages. + +#if defined(_WIN32) + +static void* aligned_large_pages_alloc_windows([[maybe_unused]] size_t allocSize) { + + #if !defined(_WIN64) + return nullptr; + #else + + HANDLE hProcessToken{}; + LUID luid{}; + void* mem = nullptr; + + const size_t largePageSize = GetLargePageMinimum(); + if (!largePageSize) + return nullptr; + + // Dynamically link OpenProcessToken, LookupPrivilegeValue and AdjustTokenPrivileges + + HMODULE hAdvapi32 = GetModuleHandle(TEXT("advapi32.dll")); + + if (!hAdvapi32) + hAdvapi32 = LoadLibrary(TEXT("advapi32.dll")); + + auto OpenProcessToken_f = + OpenProcessToken_t((void (*)()) GetProcAddress(hAdvapi32, "OpenProcessToken")); + if (!OpenProcessToken_f) + return nullptr; + auto LookupPrivilegeValueA_f = + LookupPrivilegeValueA_t((void (*)()) GetProcAddress(hAdvapi32, "LookupPrivilegeValueA")); + if (!LookupPrivilegeValueA_f) + return nullptr; + auto AdjustTokenPrivileges_f = + AdjustTokenPrivileges_t((void (*)()) GetProcAddress(hAdvapi32, "AdjustTokenPrivileges")); + if (!AdjustTokenPrivileges_f) + return nullptr; + + // We need SeLockMemoryPrivilege, so try to enable it for the process + if (!OpenProcessToken_f( // OpenProcessToken() + GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hProcessToken)) + return nullptr; + + if (LookupPrivilegeValueA_f(nullptr, "SeLockMemoryPrivilege", &luid)) + { + TOKEN_PRIVILEGES tp{}; + TOKEN_PRIVILEGES prevTp{}; + DWORD prevTpLen = 0; + + tp.PrivilegeCount = 1; + tp.Privileges[0].Luid = luid; + tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; + + // Try to enable SeLockMemoryPrivilege. Note that even if AdjustTokenPrivileges() succeeds, + // we still need to query GetLastError() to ensure that the privileges were actually obtained. + if (AdjustTokenPrivileges_f(hProcessToken, FALSE, &tp, sizeof(TOKEN_PRIVILEGES), &prevTp, + &prevTpLen) + && GetLastError() == ERROR_SUCCESS) + { + // Round up size to full pages and allocate + allocSize = (allocSize + largePageSize - 1) & ~size_t(largePageSize - 1); + mem = VirtualAlloc(nullptr, allocSize, MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES, + PAGE_READWRITE); + + // Privilege no longer needed, restore previous state + AdjustTokenPrivileges_f(hProcessToken, FALSE, &prevTp, 0, nullptr, nullptr); + } + } + + CloseHandle(hProcessToken); + + return mem; + + #endif +} + +void* aligned_large_pages_alloc(size_t allocSize) { + + // Try to allocate large pages + void* mem = aligned_large_pages_alloc_windows(allocSize); + + // Fall back to regular, page-aligned, allocation if necessary + if (!mem) + mem = VirtualAlloc(nullptr, allocSize, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE); + + return mem; +} + +#else + +void* aligned_large_pages_alloc(size_t allocSize) { + + #if defined(__linux__) + constexpr size_t alignment = 2 * 1024 * 1024; // assumed 2MB page size + #else + constexpr size_t alignment = 4096; // assumed small page size + #endif + + // Round up to multiples of alignment + size_t size = ((allocSize + alignment - 1) / alignment) * alignment; + void* mem = std_aligned_alloc(alignment, size); + #if defined(MADV_HUGEPAGE) + madvise(mem, size, MADV_HUGEPAGE); + #endif + return mem; +} + +#endif + + +// aligned_large_pages_free() will free the previously allocated ttmem + +#if defined(_WIN32) + +void aligned_large_pages_free(void* mem) { + + if (mem && !VirtualFree(mem, 0, MEM_RELEASE)) + { + DWORD err = GetLastError(); + std::cerr << "Failed to free large page memory. Error code: 0x" << std::hex << err + << std::dec << std::endl; + exit(EXIT_FAILURE); + } +} + +#else + +void aligned_large_pages_free(void* mem) { std_aligned_free(mem); } + +#endif +} // namespace Stockfish diff --git a/src/memory.h b/src/memory.h new file mode 100644 index 00000000..ad7ca602 --- /dev/null +++ b/src/memory.h @@ -0,0 +1,215 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifndef MEMORY_H_INCLUDED +#define MEMORY_H_INCLUDED + +#include +#include +#include +#include +#include +#include +#include + +#include "types.h" + +namespace Stockfish { + +void* std_aligned_alloc(size_t alignment, size_t size); +void std_aligned_free(void* ptr); +// memory aligned by page size, min alignment: 4096 bytes +void* aligned_large_pages_alloc(size_t size); +// nop if mem == nullptr +void aligned_large_pages_free(void* mem); + +// frees memory which was placed there with placement new. +// works for both single objects and arrays of unknown bound +template +void memory_deleter(T* ptr, FREE_FUNC free_func) { + if (!ptr) + return; + + // Explicitly needed to call the destructor + if constexpr (!std::is_trivially_destructible_v) + ptr->~T(); + + free_func(ptr); + return; +} + +// frees memory which was placed there with placement new. +// works for both single objects and arrays of unknown bound +template +void memory_deleter_array(T* ptr, FREE_FUNC free_func) { + if (!ptr) + return; + + + // Move back on the pointer to where the size is allocated. + const size_t array_offset = std::max(sizeof(size_t), alignof(T)); + char* raw_memory = reinterpret_cast(ptr) - array_offset; + + if constexpr (!std::is_trivially_destructible_v) + { + const size_t size = *reinterpret_cast(raw_memory); + + // Explicitly call the destructor for each element in reverse order + for (size_t i = size; i-- > 0;) + ptr[i].~T(); + } + + free_func(raw_memory); +} + +// Allocates memory for a single object and places it there with placement new. +template +inline std::enable_if_t, T*> memory_allocator(ALLOC_FUNC alloc_func, + Args&&... args) { + void* raw_memory = alloc_func(sizeof(T)); + ASSERT_ALIGNED(raw_memory, alignof(T)); + return new (raw_memory) T(std::forward(args)...); +} + +// Allocates memory for an array of unknown bound and places it there with placement new. +template +inline std::enable_if_t, std::remove_extent_t*> +memory_allocator(ALLOC_FUNC alloc_func, size_t num) { + using ElementType = std::remove_extent_t; + + const size_t array_offset = std::max(sizeof(size_t), alignof(ElementType)); + + // save the array size in the memory location + char* raw_memory = + reinterpret_cast(alloc_func(array_offset + num * sizeof(ElementType))); + ASSERT_ALIGNED(raw_memory, alignof(T)); + + new (raw_memory) size_t(num); + + for (size_t i = 0; i < num; ++i) + new (raw_memory + array_offset + i * sizeof(ElementType)) ElementType(); + + // Need to return the pointer at the start of the array so that the indexing in unique_ptr works + return reinterpret_cast(raw_memory + array_offset); +} + +// +// +// aligned large page unique ptr +// +// + +template +struct LargePageDeleter { + void operator()(T* ptr) const { return memory_deleter(ptr, aligned_large_pages_free); } +}; + +template +struct LargePageArrayDeleter { + void operator()(T* ptr) const { return memory_deleter_array(ptr, aligned_large_pages_free); } +}; + +template +using LargePagePtr = + std::conditional_t, + std::unique_ptr>>, + std::unique_ptr>>; + +// make_unique_large_page for single objects +template +std::enable_if_t, LargePagePtr> make_unique_large_page(Args&&... args) { + static_assert(alignof(T) <= 4096, + "aligned_large_pages_alloc() may fail for such a big alignment requirement of T"); + + T* obj = memory_allocator(aligned_large_pages_alloc, std::forward(args)...); + + return LargePagePtr(obj); +} + +// make_unique_large_page for arrays of unknown bound +template +std::enable_if_t, LargePagePtr> make_unique_large_page(size_t num) { + using ElementType = std::remove_extent_t; + + static_assert(alignof(ElementType) <= 4096, + "aligned_large_pages_alloc() may fail for such a big alignment requirement of T"); + + ElementType* memory = memory_allocator(aligned_large_pages_alloc, num); + + return LargePagePtr(memory); +} + +// +// +// aligned unique ptr +// +// + +template +struct AlignedDeleter { + void operator()(T* ptr) const { return memory_deleter(ptr, std_aligned_free); } +}; + +template +struct AlignedArrayDeleter { + void operator()(T* ptr) const { return memory_deleter_array(ptr, std_aligned_free); } +}; + +template +using AlignedPtr = + std::conditional_t, + std::unique_ptr>>, + std::unique_ptr>>; + +// make_unique_aligned for single objects +template +std::enable_if_t, AlignedPtr> make_unique_aligned(Args&&... args) { + const auto func = [](size_t size) { return std_aligned_alloc(alignof(T), size); }; + T* obj = memory_allocator(func, std::forward(args)...); + + return AlignedPtr(obj); +} + +// make_unique_aligned for arrays of unknown bound +template +std::enable_if_t, AlignedPtr> make_unique_aligned(size_t num) { + using ElementType = std::remove_extent_t; + + const auto func = [](size_t size) { return std_aligned_alloc(alignof(ElementType), size); }; + ElementType* memory = memory_allocator(func, num); + + return AlignedPtr(memory); +} + + +// Get the first aligned element of an array. +// ptr must point to an array of size at least `sizeof(T) * N + alignment` bytes, +// where N is the number of elements in the array. +template +T* align_ptr_up(T* ptr) { + static_assert(alignof(T) < Alignment); + + const uintptr_t ptrint = reinterpret_cast(reinterpret_cast(ptr)); + return reinterpret_cast( + reinterpret_cast((ptrint + (Alignment - 1)) / Alignment * Alignment)); +} + + +} // namespace Stockfish + +#endif // #ifndef MEMORY_H_INCLUDED diff --git a/src/misc.cpp b/src/misc.cpp index aa22e61f..a8bb46ec 100644 --- a/src/misc.cpp +++ b/src/misc.cpp @@ -18,29 +18,6 @@ #include "misc.h" -#ifdef _WIN32 - #if _WIN32_WINNT < 0x0601 - #undef _WIN32_WINNT - #define _WIN32_WINNT 0x0601 // Force to include needed API prototypes - #endif - - #ifndef NOMINMAX - #define NOMINMAX - #endif - - #include -// The needed Windows API for processor groups could be missed from old Windows -// versions, so instead of calling them directly (forcing the linker to resolve -// the calls at compile time), try to load them at runtime. To do this we need -// first to define the corresponding function pointers. -extern "C" { -using OpenProcessToken_t = bool (*)(HANDLE, DWORD, PHANDLE); -using LookupPrivilegeValueA_t = bool (*)(LPCSTR, LPCSTR, PLUID); -using AdjustTokenPrivileges_t = - bool (*)(HANDLE, BOOL, PTOKEN_PRIVILEGES, DWORD, PTOKEN_PRIVILEGES, PDWORD); -} -#endif - #include #include #include @@ -48,25 +25,14 @@ using AdjustTokenPrivileges_t = #include #include #include -#include #include +#include #include #include #include #include "types.h" -#if defined(__linux__) && !defined(__ANDROID__) - #include -#endif - -#if defined(__APPLE__) || defined(__ANDROID__) || defined(__OpenBSD__) \ - || (defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC) && !defined(_WIN32)) \ - || defined(__e2k__) - #define POSIXALIGNEDALLOC - #include -#endif - namespace Stockfish { namespace { @@ -427,169 +393,6 @@ void prefetch(const void* addr) { #endif - -// Wrapper for systems where the c++17 implementation -// does not guarantee the availability of aligned_alloc(). Memory allocated with -// std_aligned_alloc() must be freed with std_aligned_free(). -void* std_aligned_alloc(size_t alignment, size_t size) { - -#if defined(POSIXALIGNEDALLOC) - void* mem; - return posix_memalign(&mem, alignment, size) ? nullptr : mem; -#elif defined(_WIN32) && !defined(_M_ARM) && !defined(_M_ARM64) - return _mm_malloc(size, alignment); -#elif defined(_WIN32) - return _aligned_malloc(size, alignment); -#else - return std::aligned_alloc(alignment, size); -#endif -} - -void std_aligned_free(void* ptr) { - -#if defined(POSIXALIGNEDALLOC) - free(ptr); -#elif defined(_WIN32) && !defined(_M_ARM) && !defined(_M_ARM64) - _mm_free(ptr); -#elif defined(_WIN32) - _aligned_free(ptr); -#else - free(ptr); -#endif -} - -// aligned_large_pages_alloc() will return suitably aligned memory, if possible using large pages. - -#if defined(_WIN32) - -static void* aligned_large_pages_alloc_windows([[maybe_unused]] size_t allocSize) { - - #if !defined(_WIN64) - return nullptr; - #else - - HANDLE hProcessToken{}; - LUID luid{}; - void* mem = nullptr; - - const size_t largePageSize = GetLargePageMinimum(); - if (!largePageSize) - return nullptr; - - // Dynamically link OpenProcessToken, LookupPrivilegeValue and AdjustTokenPrivileges - - HMODULE hAdvapi32 = GetModuleHandle(TEXT("advapi32.dll")); - - if (!hAdvapi32) - hAdvapi32 = LoadLibrary(TEXT("advapi32.dll")); - - auto OpenProcessToken_f = - OpenProcessToken_t((void (*)()) GetProcAddress(hAdvapi32, "OpenProcessToken")); - if (!OpenProcessToken_f) - return nullptr; - auto LookupPrivilegeValueA_f = - LookupPrivilegeValueA_t((void (*)()) GetProcAddress(hAdvapi32, "LookupPrivilegeValueA")); - if (!LookupPrivilegeValueA_f) - return nullptr; - auto AdjustTokenPrivileges_f = - AdjustTokenPrivileges_t((void (*)()) GetProcAddress(hAdvapi32, "AdjustTokenPrivileges")); - if (!AdjustTokenPrivileges_f) - return nullptr; - - // We need SeLockMemoryPrivilege, so try to enable it for the process - if (!OpenProcessToken_f( // OpenProcessToken() - GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hProcessToken)) - return nullptr; - - if (LookupPrivilegeValueA_f(nullptr, "SeLockMemoryPrivilege", &luid)) - { - TOKEN_PRIVILEGES tp{}; - TOKEN_PRIVILEGES prevTp{}; - DWORD prevTpLen = 0; - - tp.PrivilegeCount = 1; - tp.Privileges[0].Luid = luid; - tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; - - // Try to enable SeLockMemoryPrivilege. Note that even if AdjustTokenPrivileges() succeeds, - // we still need to query GetLastError() to ensure that the privileges were actually obtained. - if (AdjustTokenPrivileges_f(hProcessToken, FALSE, &tp, sizeof(TOKEN_PRIVILEGES), &prevTp, - &prevTpLen) - && GetLastError() == ERROR_SUCCESS) - { - // Round up size to full pages and allocate - allocSize = (allocSize + largePageSize - 1) & ~size_t(largePageSize - 1); - mem = VirtualAlloc(nullptr, allocSize, MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES, - PAGE_READWRITE); - - // Privilege no longer needed, restore previous state - AdjustTokenPrivileges_f(hProcessToken, FALSE, &prevTp, 0, nullptr, nullptr); - } - } - - CloseHandle(hProcessToken); - - return mem; - - #endif -} - -void* aligned_large_pages_alloc(size_t allocSize) { - - // Try to allocate large pages - void* mem = aligned_large_pages_alloc_windows(allocSize); - - // Fall back to regular, page-aligned, allocation if necessary - if (!mem) - mem = VirtualAlloc(nullptr, allocSize, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE); - - return mem; -} - -#else - -void* aligned_large_pages_alloc(size_t allocSize) { - - #if defined(__linux__) - constexpr size_t alignment = 2 * 1024 * 1024; // assumed 2MB page size - #else - constexpr size_t alignment = 4096; // assumed small page size - #endif - - // Round up to multiples of alignment - size_t size = ((allocSize + alignment - 1) / alignment) * alignment; - void* mem = std_aligned_alloc(alignment, size); - #if defined(MADV_HUGEPAGE) - madvise(mem, size, MADV_HUGEPAGE); - #endif - return mem; -} - -#endif - - -// aligned_large_pages_free() will free the previously allocated ttmem - -#if defined(_WIN32) - -void aligned_large_pages_free(void* mem) { - - if (mem && !VirtualFree(mem, 0, MEM_RELEASE)) - { - DWORD err = GetLastError(); - std::cerr << "Failed to free large page memory. Error code: 0x" << std::hex << err - << std::dec << std::endl; - exit(EXIT_FAILURE); - } -} - -#else - -void aligned_large_pages_free(void* mem) { std_aligned_free(mem); } - -#endif - - #ifdef _WIN32 #include #define GETCWD _getcwd diff --git a/src/misc.h b/src/misc.h index 5c0bde44..557a4d8c 100644 --- a/src/misc.h +++ b/src/misc.h @@ -26,10 +26,9 @@ #include #include #include -#include +#include #include #include -#include #define stringify2(x) #x #define stringify(x) stringify2(x) @@ -44,39 +43,10 @@ std::string compiler_info(); // which can be quite slow. void prefetch(const void* addr); -void start_logger(const std::string& fname); -void* std_aligned_alloc(size_t alignment, size_t size); -void std_aligned_free(void* ptr); -// memory aligned by page size, min alignment: 4096 bytes -void* aligned_large_pages_alloc(size_t size); -// nop if mem == nullptr -void aligned_large_pages_free(void* mem); +void start_logger(const std::string& fname); size_t str_to_size_t(const std::string& s); -// Deleter for automating release of memory area -template -struct AlignedDeleter { - void operator()(T* ptr) const { - ptr->~T(); - std_aligned_free(ptr); - } -}; - -template -struct LargePageDeleter { - void operator()(T* ptr) const { - ptr->~T(); - aligned_large_pages_free(ptr); - } -}; - -template -using AlignedPtr = std::unique_ptr>; - -template -using LargePagePtr = std::unique_ptr>; - #if defined(__linux__) struct PipeDeleter { @@ -141,20 +111,6 @@ std::ostream& operator<<(std::ostream&, SyncCout); #define sync_cout std::cout << IO_LOCK #define sync_endl std::endl << IO_UNLOCK - -// Get the first aligned element of an array. -// ptr must point to an array of size at least `sizeof(T) * N + alignment` bytes, -// where N is the number of elements in the array. -template -T* align_ptr_up(T* ptr) { - static_assert(alignof(T) < Alignment); - - const uintptr_t ptrint = reinterpret_cast(reinterpret_cast(ptr)); - return reinterpret_cast( - reinterpret_cast((ptrint + (Alignment - 1)) / Alignment * Alignment)); -} - - // True if and only if the binary is compiled on a little-endian machine static inline const union { uint32_t i; diff --git a/src/nnue/network.cpp b/src/nnue/network.cpp index db864fcd..71c384ff 100644 --- a/src/nnue/network.cpp +++ b/src/nnue/network.cpp @@ -20,7 +20,6 @@ #include #include -#include #include #include #include @@ -30,6 +29,7 @@ #include "../evaluate.h" #include "../incbin/incbin.h" +#include "../memory.h" #include "../misc.h" #include "../position.h" #include "../types.h" @@ -86,23 +86,6 @@ namespace Stockfish::Eval::NNUE { namespace Detail { -// Initialize the evaluation function parameters -template -void initialize(AlignedPtr& pointer) { - - pointer.reset(reinterpret_cast(std_aligned_alloc(alignof(T), sizeof(T)))); - std::memset(pointer.get(), 0, sizeof(T)); -} - -template -void initialize(LargePagePtr& pointer) { - - static_assert(alignof(T) <= 4096, - "aligned_large_pages_alloc() may fail for such a big alignment requirement of T"); - pointer.reset(reinterpret_cast(aligned_large_pages_alloc(sizeof(T)))); - std::memset(pointer.get(), 0, sizeof(T)); -} - // Read evaluation function parameters template bool read_parameters(std::istream& stream, T& reference) { @@ -128,19 +111,17 @@ template Network::Network(const Network& other) : evalFile(other.evalFile), embeddedType(other.embeddedType) { + if (other.featureTransformer) - { - Detail::initialize(featureTransformer); - *featureTransformer = *other.featureTransformer; - } + featureTransformer = make_unique_large_page(*other.featureTransformer); + + network = make_unique_aligned(LayerStacks); + + if (!other.network) + return; + for (std::size_t i = 0; i < LayerStacks; ++i) - { - if (other.network[i]) - { - Detail::initialize(network[i]); - *(network[i]) = *(other.network[i]); - } - } + network[i] = other.network[i]; } template @@ -150,18 +131,15 @@ Network::operator=(const Network& other) { embeddedType = other.embeddedType; if (other.featureTransformer) - { - Detail::initialize(featureTransformer); - *featureTransformer = *other.featureTransformer; - } + featureTransformer = make_unique_large_page(*other.featureTransformer); + + network = make_unique_aligned(LayerStacks); + + if (!other.network) + return *this; + for (std::size_t i = 0; i < LayerStacks; ++i) - { - if (other.network[i]) - { - Detail::initialize(network[i]); - *(network[i]) = *(other.network[i]); - } - } + network[i] = other.network[i]; return *this; } @@ -253,7 +231,7 @@ Value Network::evaluate(const Position& const int bucket = (pos.count() - 1) / 4; const auto psqt = featureTransformer->transform(pos, cache, transformedFeatures, bucket); - const auto positional = network[bucket]->propagate(transformedFeatures); + const auto positional = network[bucket].propagate(transformedFeatures); if (complexity) *complexity = std::abs(psqt - positional) / OutputScale; @@ -292,11 +270,11 @@ void Network::verify(std::string evalfilePath) const { exit(EXIT_FAILURE); } - size_t size = sizeof(*featureTransformer) + sizeof(*network) * LayerStacks; + size_t size = sizeof(*featureTransformer) + sizeof(Arch) * LayerStacks; sync_cout << "info string NNUE evaluation using " << evalfilePath << " (" << size / (1024 * 1024) << "MiB, (" << featureTransformer->InputDimensions << ", " - << network[0]->TransformedFeatureDimensions << ", " << network[0]->FC_0_OUTPUTS - << ", " << network[0]->FC_1_OUTPUTS << ", 1))" << sync_endl; + << network[0].TransformedFeatureDimensions << ", " << network[0].FC_0_OUTPUTS << ", " + << network[0].FC_1_OUTPUTS << ", 1))" << sync_endl; } @@ -333,7 +311,7 @@ Network::trace_evaluate(const Position& { const auto materialist = featureTransformer->transform(pos, cache, transformedFeatures, bucket); - const auto positional = network[bucket]->propagate(transformedFeatures); + const auto positional = network[bucket].propagate(transformedFeatures); t.psqt[bucket] = static_cast(materialist / OutputScale); t.positional[bucket] = static_cast(positional / OutputScale); @@ -386,9 +364,8 @@ void Network::load_internal() { template void Network::initialize() { - Detail::initialize(featureTransformer); - for (std::size_t i = 0; i < LayerStacks; ++i) - Detail::initialize(network[i]); + featureTransformer = make_unique_large_page(); + network = make_unique_aligned(LayerStacks); } @@ -455,7 +432,7 @@ bool Network::read_parameters(std::istream& stream, return false; for (std::size_t i = 0; i < LayerStacks; ++i) { - if (!Detail::read_parameters(stream, *(network[i]))) + if (!Detail::read_parameters(stream, network[i])) return false; } return stream && stream.peek() == std::ios::traits_type::eof(); @@ -471,7 +448,7 @@ bool Network::write_parameters(std::ostream& stream, return false; for (std::size_t i = 0; i < LayerStacks; ++i) { - if (!Detail::write_parameters(stream, *(network[i]))) + if (!Detail::write_parameters(stream, network[i])) return false; } return bool(stream); diff --git a/src/nnue/network.h b/src/nnue/network.h index f0ccfafc..6ba3cfba 100644 --- a/src/nnue/network.h +++ b/src/nnue/network.h @@ -25,13 +25,13 @@ #include #include -#include "../misc.h" +#include "../memory.h" #include "../position.h" #include "../types.h" +#include "nnue_accumulator.h" #include "nnue_architecture.h" #include "nnue_feature_transformer.h" #include "nnue_misc.h" -#include "nnue_accumulator.h" namespace Stockfish::Eval::NNUE { @@ -91,7 +91,7 @@ class Network { LargePagePtr featureTransformer; // Evaluation function - AlignedPtr network[LayerStacks]; + AlignedPtr network; EvalFile evalFile; EmbeddedNNUEType embeddedType; diff --git a/src/numa.h b/src/numa.h index 5934a0cd..a56d7142 100644 --- a/src/numa.h +++ b/src/numa.h @@ -32,6 +32,7 @@ #include #include #include +#include // We support linux very well, but we explicitly do NOT support Android, because there's // no affected systems, not worth maintaining. diff --git a/src/thread.h b/src/thread.h index 102b2299..7416271b 100644 --- a/src/thread.h +++ b/src/thread.h @@ -23,15 +23,15 @@ #include #include #include +#include #include #include #include -#include +#include "numa.h" #include "position.h" #include "search.h" #include "thread_win32_osx.h" -#include "numa.h" namespace Stockfish { diff --git a/src/tt.cpp b/src/tt.cpp index f95170e9..f808106a 100644 --- a/src/tt.cpp +++ b/src/tt.cpp @@ -24,7 +24,7 @@ #include #include -#include "misc.h" +#include "memory.h" #include "syzygy/tbprobe.h" #include "thread.h" @@ -75,11 +75,10 @@ uint8_t TTEntry::relative_age(const uint8_t generation8) const { // measured in megabytes. Transposition table consists // of clusters and each cluster consists of ClusterSize number of TTEntry. void TranspositionTable::resize(size_t mbSize, ThreadPool& threads) { - aligned_large_pages_free(table); - clusterCount = mbSize * 1024 * 1024 / sizeof(Cluster); - table = static_cast(aligned_large_pages_alloc(clusterCount * sizeof(Cluster))); + table = make_unique_large_page(clusterCount); + if (!table) { std::cerr << "Failed to allocate " << mbSize << "MB for transposition table." << std::endl; diff --git a/src/tt.h b/src/tt.h index 3b09ec4e..2dcfdd44 100644 --- a/src/tt.h +++ b/src/tt.h @@ -21,7 +21,9 @@ #include #include +#include +#include "memory.h" #include "misc.h" #include "types.h" @@ -94,8 +96,6 @@ class TranspositionTable { static constexpr int GENERATION_MASK = (0xFF << GENERATION_BITS) & 0xFF; public: - ~TranspositionTable() { aligned_large_pages_free(table); } - void new_search() { // increment by delta to keep lower bits as is generation8 += GENERATION_DELTA; @@ -115,9 +115,9 @@ class TranspositionTable { private: friend struct TTEntry; - size_t clusterCount; - Cluster* table = nullptr; - uint8_t generation8 = 0; // Size must be not bigger than TTEntry::genBound8 + size_t clusterCount; + LargePagePtr table; + uint8_t generation8 = 0; // Size must be not bigger than TTEntry::genBound8 }; } // namespace Stockfish From 3d6756769cd159edf1d7eaec074c880551590c32 Mon Sep 17 00:00:00 2001 From: FauziAkram Date: Sun, 2 Jun 2024 18:40:32 +0300 Subject: [PATCH 098/315] Simplify continuation histories Functional simplification. Simplify continuation histories, therefore increasing the effect of stats updates and movepicker bonuses for continuation history 3 plies deep. Passed STC: LLR: 2.93 (-2.94,2.94) <-1.75,0.25> Total: 229184 W: 59087 L: 59080 D: 111017 Ptnml(0-2): 554, 27248, 59002, 27213, 575 https://tests.stockfishchess.org/tests/view/665c7a09fd45fb0f907c223b Passed LTC: LLR: 2.95 (-2.94,2.94) <-1.75,0.25> Total: 44532 W: 11419 L: 11223 D: 21890 Ptnml(0-2): 18, 4787, 12457, 4989, 15 https://tests.stockfishchess.org/tests/view/665c8842fd45fb0f907c23ec closes https://github.com/official-stockfish/Stockfish/pull/5339 Bench: 1326444 --- src/movepick.cpp | 2 +- src/search.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/movepick.cpp b/src/movepick.cpp index d3335907..52e8c526 100644 --- a/src/movepick.cpp +++ b/src/movepick.cpp @@ -182,7 +182,7 @@ void MovePicker::score() { m.value += 2 * (*pawnHistory)[pawn_structure_index(pos)][pc][to]; m.value += 2 * (*continuationHistory[0])[pc][to]; m.value += (*continuationHistory[1])[pc][to]; - m.value += (*continuationHistory[2])[pc][to] / 4; + m.value += (*continuationHistory[2])[pc][to] / 3; m.value += (*continuationHistory[3])[pc][to]; m.value += (*continuationHistory[5])[pc][to]; diff --git a/src/search.cpp b/src/search.cpp index a2a75af0..44da8683 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1811,7 +1811,7 @@ void update_continuation_histories(Stack* ss, Piece pc, Square to, int bonus) { if (ss->inCheck && i > 2) break; if (((ss - i)->currentMove).is_ok()) - (*(ss - i)->continuationHistory)[pc][to] << bonus / (1 + 3 * (i == 3)); + (*(ss - i)->continuationHistory)[pc][to] << bonus / (1 + (i == 3)); } } From 924a843594743297f47edf7b0931ede8dcbb6dd8 Mon Sep 17 00:00:00 2001 From: FauziAkram Date: Sun, 2 Jun 2024 23:32:58 +0300 Subject: [PATCH 099/315] Simplify recapture extension Simplifying the extension formula by removing the move == ttMove condition. Passed STC: LLR: 2.93 (-2.94,2.94) <-1.75,0.25> Total: 47328 W: 12324 L: 12117 D: 22887 Ptnml(0-2): 134, 5532, 12097, 5795, 106 https://tests.stockfishchess.org/tests/view/665ca5e6fd45fb0f907c41be Passed LTC: LLR: 2.95 (-2.94,2.94) <-1.75,0.25> Total: 72126 W: 18378 L: 18209 D: 35539 Ptnml(0-2): 36, 7841, 20130, 8030, 26 https://tests.stockfishchess.org/tests/view/665cb276fd45fb0f907c41f9 closes https://github.com/official-stockfish/Stockfish/pull/5341 Bench: 1399468 --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index 44da8683..4defbadb 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1103,7 +1103,7 @@ moves_loop: // When in check, search starts here } // Extension for capturing the previous moved piece (~0 Elo on STC, ~1 Elo on LTC) - else if (PvNode && move == ttMove && move.to_sq() == prevSq + else if (PvNode && move.to_sq() == prevSq && thisThread->captureHistory[movedPiece][move.to_sq()] [type_of(pos.piece_on(move.to_sq()))] > 3988) From fe298953f89a86e7edfb0e53605d9d9c47f7ceea Mon Sep 17 00:00:00 2001 From: Gahtan Nahdi <155860115+gahtan-syarif@users.noreply.github.com> Date: Sun, 2 Jun 2024 05:26:34 +0700 Subject: [PATCH 100/315] Simplify smallnet threshold Turns the quadratic threshold to a linear one STC non-reg: https://tests.stockfishchess.org/tests/view/665ba0b744e8416a9cdc188d LLR: 2.95 (-2.94,2.94) <-1.75,0.25> Total: 330432 W: 85351 L: 85454 D: 159627 Ptnml(0-2): 888, 39643, 84283, 39488, 914 LTC non-reg: https://tests.stockfishchess.org/tests/view/665cd60ffd45fb0f907c4306 LLR: 2.95 (-2.94,2.94) <-1.75,0.25> Total: 139146 W: 35194 L: 35093 D: 68859 Ptnml(0-2): 58, 15523, 38313, 15618, 61 closes https://github.com/official-stockfish/Stockfish/pull/5342 Bench: 1057383 --- src/evaluate.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index eaf7ab5f..064ea027 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -46,8 +46,7 @@ int Eval::simple_eval(const Position& pos, Color c) { bool Eval::use_smallnet(const Position& pos) { int simpleEval = simple_eval(pos, pos.side_to_move()); - int pawnCount = pos.count(); - return std::abs(simpleEval) > 992 + 6 * pawnCount * pawnCount / 16; + return std::abs(simpleEval) > 992 + 10 * pos.count(); } // Evaluate is the evaluator for the outer world. It returns a static evaluation From 397f47a7a1b7abe490d7bcb7a526d01555aed2be Mon Sep 17 00:00:00 2001 From: Dubslow Date: Sun, 2 Jun 2024 16:27:58 -0500 Subject: [PATCH 101/315] Adjust lowest depth constants to the natural place Passed STC: https://tests.stockfishchess.org/tests/view/665ce3f8fd45fb0f907c537f LLR: 2.93 (-2.94,2.94) <-1.75,0.25> Total: 282784 W: 73032 L: 73082 D: 136670 Ptnml(0-2): 680, 31845, 76364, 31851, 652 Recently when I overhauled these comments, Disservin asked why these were so much lower: they're a relic from when we had a third QS stage at -5. Now we don't, so fix these to the obvious place. I was fairly sure it was nonfunctional but ran the nonreg to be double sure. closes https://github.com/official-stockfish/Stockfish/pull/5343 Bench: 1057383 --- src/types.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/types.h b/src/types.h index aa4af012..10ad1fac 100644 --- a/src/types.h +++ b/src/types.h @@ -196,8 +196,8 @@ enum : int { // For TT entries where no searching at all was done (whether regular or qsearch) we use // _UNSEARCHED, which should thus compare lower than any QS or regular depth. _ENTRY_OFFSET is used // only for the TT entry occupancy check (see tt.cpp), and should thus be lower than _UNSEARCHED. - DEPTH_UNSEARCHED = -6, - DEPTH_ENTRY_OFFSET = -7 + DEPTH_UNSEARCHED = -2, + DEPTH_ENTRY_OFFSET = -3 }; // clang-format off From 86b564055d753c49dede0b8549363f3ee11c572e Mon Sep 17 00:00:00 2001 From: Dubslow Date: Sun, 2 Jun 2024 16:55:10 -0500 Subject: [PATCH 102/315] Remove delta, adjusted, complexity from nnue code ...rather they're the consumer's concern whether to tweak the result or not. Passed STC: https://tests.stockfishchess.org/tests/view/665cea9ffd45fb0f907c53bd LLR: 2.93 (-2.94,2.94) <-1.75,0.25> Total: 69696 W: 18101 L: 17918 D: 33677 Ptnml(0-2): 195, 8171, 17929, 8362, 191 Passed LTC: https://tests.stockfishchess.org/tests/view/665cf761fd45fb0f907c5406 LLR: 2.96 (-2.94,2.94) <-1.75,0.25> Total: 63720 W: 16344 L: 16165 D: 31211 Ptnml(0-2): 32, 6990, 17625, 7193, 20 Non functional except for rounding issues of OutputScale changing bench. closes https://github.com/official-stockfish/Stockfish/pull/5344 Bench: 1378596 --- src/evaluate.cpp | 23 +++++++++++++++-------- src/nnue/network.cpp | 20 ++++---------------- src/nnue/network.h | 8 ++++---- src/nnue/nnue_misc.cpp | 13 ++++++++----- 4 files changed, 31 insertions(+), 33 deletions(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index 064ea027..248b2593 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -24,8 +24,9 @@ #include #include #include -#include #include +#include +#include #include "nnue/network.h" #include "nnue/nnue_misc.h" @@ -60,17 +61,22 @@ Value Eval::evaluate(const Eval::NNUE::Networks& networks, int simpleEval = simple_eval(pos, pos.side_to_move()); bool smallNet = use_smallnet(pos); - int nnueComplexity; int v; - Value nnue = smallNet ? networks.small.evaluate(pos, &caches.small, true, &nnueComplexity) - : networks.big.evaluate(pos, &caches.big, true, &nnueComplexity); + auto [psqt, positional] = smallNet ? networks.small.evaluate(pos, &caches.small) + : networks.big.evaluate(pos, &caches.big); + + constexpr int delta = 3; + Value nnue = ((128 - delta) * psqt + (128 + delta) * positional) / 128; + int nnueComplexity = std::abs(psqt - positional); // Re-evaluate the position when higher eval accuracy is worth the time spent if (smallNet && (nnue * simpleEval < 0 || std::abs(nnue) < 250)) { - nnue = networks.big.evaluate(pos, &caches.big, true, &nnueComplexity); - smallNet = false; + std::tie(psqt, positional) = networks.big.evaluate(pos, &caches.big); + nnue = ((128 - delta) * psqt + (128 + delta) * positional) / 128; + nnueComplexity = std::abs(psqt - positional); + smallNet = false; } // Blend optimism and eval with nnue complexity @@ -108,8 +114,9 @@ std::string Eval::trace(Position& pos, const Eval::NNUE::Networks& networks) { ss << std::showpoint << std::showpos << std::fixed << std::setprecision(2) << std::setw(15); - Value v = networks.big.evaluate(pos, &caches->big, false); - v = pos.side_to_move() == WHITE ? v : -v; + auto [psqt, positional] = networks.big.evaluate(pos, &caches->big); + Value v = psqt + positional; + v = pos.side_to_move() == WHITE ? v : -v; ss << "NNUE evaluation " << 0.01 * UCIEngine::to_cp(v, pos) << " (white side)\n"; v = evaluate(networks, pos, *caches, VALUE_ZERO); diff --git a/src/nnue/network.cpp b/src/nnue/network.cpp index 71c384ff..f7d2cc6a 100644 --- a/src/nnue/network.cpp +++ b/src/nnue/network.cpp @@ -18,7 +18,6 @@ #include "network.h" -#include #include #include #include @@ -206,15 +205,13 @@ bool Network::save(const std::optional& filename template -Value Network::evaluate(const Position& pos, - AccumulatorCaches::Cache* cache, - bool adjusted, - int* complexity) const { +NetworkOutput +Network::evaluate(const Position& pos, + AccumulatorCaches::Cache* cache) const { // We manually align the arrays on the stack because with gcc < 9.3 // overaligning stack variables with alignas() doesn't work correctly. constexpr uint64_t alignment = CacheLineSize; - constexpr int delta = 24; #if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN) TransformedFeatureType @@ -232,16 +229,7 @@ Value Network::evaluate(const Position& const int bucket = (pos.count() - 1) / 4; const auto psqt = featureTransformer->transform(pos, cache, transformedFeatures, bucket); const auto positional = network[bucket].propagate(transformedFeatures); - - if (complexity) - *complexity = std::abs(psqt - positional) / OutputScale; - - // Give more value to positional evaluation when adjusted flag is set - if (adjusted) - return static_cast(((1024 - delta) * psqt + (1024 + delta) * positional) - / (1024 * OutputScale)); - else - return static_cast((psqt + positional) / OutputScale); + return {static_cast(psqt / OutputScale), static_cast(positional / OutputScale)}; } diff --git a/src/nnue/network.h b/src/nnue/network.h index 6ba3cfba..15208255 100644 --- a/src/nnue/network.h +++ b/src/nnue/network.h @@ -23,6 +23,7 @@ #include #include #include +#include #include #include "../memory.h" @@ -40,6 +41,7 @@ enum class EmbeddedNNUEType { SMALL, }; +using NetworkOutput = std::tuple; template class Network { @@ -59,10 +61,8 @@ class Network { void load(const std::string& rootDirectory, std::string evalfilePath); bool save(const std::optional& filename) const; - Value evaluate(const Position& pos, - AccumulatorCaches::Cache* cache, - bool adjusted = false, - int* complexity = nullptr) const; + NetworkOutput evaluate(const Position& pos, + AccumulatorCaches::Cache* cache) const; void hint_common_access(const Position& pos, diff --git a/src/nnue/nnue_misc.cpp b/src/nnue/nnue_misc.cpp index a13c717c..7585cce5 100644 --- a/src/nnue/nnue_misc.cpp +++ b/src/nnue/nnue_misc.cpp @@ -28,6 +28,7 @@ #include #include #include +#include #include "../evaluate.h" #include "../position.h" @@ -131,8 +132,9 @@ trace(Position& pos, const Eval::NNUE::Networks& networks, Eval::NNUE::Accumulat // We estimate the value of each piece by doing a differential evaluation from // the current base eval, simulating the removal of the piece from its square. - Value base = networks.big.evaluate(pos, &caches.big); - base = pos.side_to_move() == WHITE ? base : -base; + auto [psqt, positional] = networks.big.evaluate(pos, &caches.big); + Value base = psqt + positional; + base = pos.side_to_move() == WHITE ? base : -base; for (File f = FILE_A; f <= FILE_H; ++f) for (Rank r = RANK_1; r <= RANK_8; ++r) @@ -148,9 +150,10 @@ trace(Position& pos, const Eval::NNUE::Networks& networks, Eval::NNUE::Accumulat pos.remove_piece(sq); st->accumulatorBig.computed[WHITE] = st->accumulatorBig.computed[BLACK] = false; - Value eval = networks.big.evaluate(pos, &caches.big); - eval = pos.side_to_move() == WHITE ? eval : -eval; - v = base - eval; + std::tie(psqt, positional) = networks.big.evaluate(pos, &caches.big); + Value eval = psqt + positional; + eval = pos.side_to_move() == WHITE ? eval : -eval; + v = base - eval; pos.put_piece(pc, sq); st->accumulatorBig.computed[WHITE] = st->accumulatorBig.computed[BLACK] = false; From ba06671aa9df5c0a3fa5f1fa2ce17ea4aa742b7a Mon Sep 17 00:00:00 2001 From: Disservin Date: Mon, 3 Jun 2024 19:47:34 +0200 Subject: [PATCH 103/315] Normalize some variable names and reuse existing logic closes https://github.com/official-stockfish/Stockfish/pull/5346 No functional change --- src/search.cpp | 4 ++-- src/search.h | 6 +++--- src/thread.cpp | 4 +--- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 4defbadb..c03fe781 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -137,10 +137,10 @@ void update_all_stats(const Position& pos, Search::Worker::Worker(SharedState& sharedState, std::unique_ptr sm, - size_t thread_id, + size_t threadId, NumaReplicatedAccessToken token) : // Unpack the SharedState struct into member variables - thread_idx(thread_id), + threadIdx(threadId), numaAccessToken(token), manager(std::move(sm)), options(sharedState.options), diff --git a/src/search.h b/src/search.h index 01f7b8bd..a22d3200 100644 --- a/src/search.h +++ b/src/search.h @@ -244,7 +244,7 @@ class Worker { // It searches from the root position and outputs the "bestmove". void start_searching(); - bool is_mainthread() const { return thread_idx == 0; } + bool is_mainthread() const { return threadIdx == 0; } // Public because they need to be updatable by the stats CounterMoveHistory counterMoves; @@ -270,7 +270,7 @@ class Worker { // Get a pointer to the search manager, only allowed to be called by the // main thread. SearchManager* main_manager() const { - assert(thread_idx == 0); + assert(threadIdx == 0); return static_cast(manager.get()); } @@ -291,7 +291,7 @@ class Worker { Depth rootDepth, completedDepth; Value rootDelta; - size_t thread_idx; + size_t threadIdx; NumaReplicatedAccessToken numaAccessToken; // Reductions lookup table initialized at startup diff --git a/src/thread.cpp b/src/thread.cpp index a36c2efb..0a33422a 100644 --- a/src/thread.cpp +++ b/src/thread.cpp @@ -127,9 +127,7 @@ void Thread::idle_loop() { } } -Search::SearchManager* ThreadPool::main_manager() { - return static_cast(main_thread()->worker.get()->manager.get()); -} +Search::SearchManager* ThreadPool::main_manager() { return main_thread()->worker->main_manager(); } uint64_t ThreadPool::nodes_searched() const { return accumulate(&Search::Worker::nodes); } uint64_t ThreadPool::tb_hits() const { return accumulate(&Search::Worker::tbHits); } From 7f09d06b834a5aaedbc78c5161ba91a8d6761421 Mon Sep 17 00:00:00 2001 From: Disservin Date: Tue, 4 Jun 2024 07:53:25 +0200 Subject: [PATCH 104/315] Properly initialize the TT in a multithreaded way again --- src/tt.cpp | 4 +++- src/tt.h | 7 ++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/tt.cpp b/src/tt.cpp index f808106a..56779b86 100644 --- a/src/tt.cpp +++ b/src/tt.cpp @@ -75,9 +75,11 @@ uint8_t TTEntry::relative_age(const uint8_t generation8) const { // measured in megabytes. Transposition table consists // of clusters and each cluster consists of ClusterSize number of TTEntry. void TranspositionTable::resize(size_t mbSize, ThreadPool& threads) { + aligned_large_pages_free(table); + clusterCount = mbSize * 1024 * 1024 / sizeof(Cluster); - table = make_unique_large_page(clusterCount); + table = static_cast(aligned_large_pages_alloc(clusterCount * sizeof(Cluster))); if (!table) { diff --git a/src/tt.h b/src/tt.h index 2dcfdd44..974c7eb0 100644 --- a/src/tt.h +++ b/src/tt.h @@ -96,6 +96,7 @@ class TranspositionTable { static constexpr int GENERATION_MASK = (0xFF << GENERATION_BITS) & 0xFF; public: + ~TranspositionTable() { aligned_large_pages_free(table); } void new_search() { // increment by delta to keep lower bits as is generation8 += GENERATION_DELTA; @@ -115,9 +116,9 @@ class TranspositionTable { private: friend struct TTEntry; - size_t clusterCount; - LargePagePtr table; - uint8_t generation8 = 0; // Size must be not bigger than TTEntry::genBound8 + size_t clusterCount; + Cluster* table = nullptr; + uint8_t generation8 = 0; // Size must be not bigger than TTEntry::genBound8 }; } // namespace Stockfish From 4f53560d248195b172ac97d7c74e6bcfc65fe6fd Mon Sep 17 00:00:00 2001 From: Disservin Date: Tue, 4 Jun 2024 07:57:08 +0200 Subject: [PATCH 105/315] Accumulate nodes over all bench positions not just the last closes https://github.com/official-stockfish/Stockfish/pull/5352 No functional change --- src/tt.h | 1 - src/uci.cpp | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/tt.h b/src/tt.h index 974c7eb0..b2e8f582 100644 --- a/src/tt.h +++ b/src/tt.h @@ -21,7 +21,6 @@ #include #include -#include #include "memory.h" #include "misc.h" diff --git a/src/uci.cpp b/src/uci.cpp index 4b683116..43b0e005 100644 --- a/src/uci.cpp +++ b/src/uci.cpp @@ -296,7 +296,7 @@ void UCIEngine::bench(std::istream& args) { Search::LimitsType limits = parse_limits(is); if (limits.perft) - nodes = perft(limits); + nodesSearched = perft(limits); else { engine.go(limits); From daaccd9fc9ca2dcc8ed7c72075fb1d3f504fa6ef Mon Sep 17 00:00:00 2001 From: Gahtan Nahdi <155860115+gahtan-syarif@users.noreply.github.com> Date: Tue, 4 Jun 2024 05:31:51 +0700 Subject: [PATCH 106/315] Simplify smallnet threshold remove pawncount Passed STC non-reg: https://tests.stockfishchess.org/tests/view/665e4548fd45fb0f907c80d5 LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 60896 W: 15710 L: 15518 D: 29668 Ptnml(0-2): 149, 7145, 15660, 7353, 141 Passed LTC non-reg: https://tests.stockfishchess.org/tests/view/665e4c52fd45fb0f907c815f LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 58068 W: 14773 L: 14590 D: 28705 Ptnml(0-2): 16, 6368, 16090, 6537, 23 closes https://github.com/official-stockfish/Stockfish/pull/5349 Bench: 1343156 --- src/evaluate.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index 248b2593..afba6363 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -47,7 +47,7 @@ int Eval::simple_eval(const Position& pos, Color c) { bool Eval::use_smallnet(const Position& pos) { int simpleEval = simple_eval(pos, pos.side_to_move()); - return std::abs(simpleEval) > 992 + 10 * pos.count(); + return std::abs(simpleEval) > 992; } // Evaluate is the evaluator for the outer world. It returns a static evaluation From 02ff76630b358e5f958793cc93df0009d2da65a5 Mon Sep 17 00:00:00 2001 From: Tomasz Sobczyk Date: Tue, 4 Jun 2024 12:48:13 +0200 Subject: [PATCH 107/315] Add NumaPolicy "hardware" option that bypasses current processor affinity. Can be used in case a GUI (e.g. ChessBase 17 see #5307) sets affinity to a single processor group, but the user would like to use the full capabilities of the hardware. Improves affinity handling on Windows in case of multiple available APIs and existing affinities. closes https://github.com/official-stockfish/Stockfish/pull/5353 No functional change --- src/engine.cpp | 5 + src/numa.h | 394 ++++++++++++++++++++++++++++--------------------- 2 files changed, 232 insertions(+), 167 deletions(-) diff --git a/src/engine.cpp b/src/engine.cpp index 3fc27223..6980dd83 100644 --- a/src/engine.cpp +++ b/src/engine.cpp @@ -133,6 +133,11 @@ void Engine::set_numa_config_from_option(const std::string& o) { { numaContext.set_numa_config(NumaConfig::from_system()); } + else if (o == "hardware") + { + // Don't respect affinity set in the system. + numaContext.set_numa_config(NumaConfig::from_system(false)); + } else if (o == "none") { numaContext.set_numa_config(NumaConfig{}); diff --git a/src/numa.h b/src/numa.h index a56d7142..c170c178 100644 --- a/src/numa.h +++ b/src/numa.h @@ -19,6 +19,7 @@ #ifndef NUMA_H_INCLUDED #define NUMA_H_INCLUDED +#include #include #include #include @@ -63,21 +64,9 @@ static constexpr size_t WIN_PROCESSOR_GROUP_SIZE = 64; // https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-setthreadselectedcpusetmasks using SetThreadSelectedCpuSetMasks_t = BOOL (*)(HANDLE, PGROUP_AFFINITY, USHORT); -// https://learn.microsoft.com/en-us/windows/win32/api/processtopologyapi/nf-processtopologyapi-setthreadgroupaffinity -using SetThreadGroupAffinity_t = BOOL (*)(HANDLE, const GROUP_AFFINITY*, PGROUP_AFFINITY); - // https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-getthreadselectedcpusetmasks using GetThreadSelectedCpuSetMasks_t = BOOL (*)(HANDLE, PGROUP_AFFINITY, USHORT, PUSHORT); -// https://learn.microsoft.com/en-us/windows/win32/api/winbase/nf-winbase-getprocessaffinitymask -using GetProcessAffinityMask_t = BOOL (*)(HANDLE, PDWORD_PTR, PDWORD_PTR); - -// https://learn.microsoft.com/en-us/windows/win32/api/processtopologyapi/nf-processtopologyapi-getprocessgroupaffinity -using GetProcessGroupAffinity_t = BOOL (*)(HANDLE, PUSHORT, PUSHORT); - -// https://learn.microsoft.com/en-us/windows/win32/api/winbase/nf-winbase-getactiveprocessorcount -using GetActiveProcessorCount_t = DWORD (*)(WORD); - #endif #include "misc.h" @@ -94,14 +83,7 @@ inline CpuIndex get_hardware_concurrency() { // only returns the number of processors in the first group, because only these // are available to std::thread. #ifdef _WIN64 - HMODULE k32 = GetModuleHandle(TEXT("Kernel32.dll")); - auto GetActiveProcessorCount_f = - GetActiveProcessorCount_t((void (*)()) GetProcAddress(k32, "GetActiveProcessorCount")); - - if (GetActiveProcessorCount_f != nullptr) - { - concurrency = GetActiveProcessorCount_f(ALL_PROCESSOR_GROUPS); - } + concurrency = std::max(concurrency, GetActiveProcessorCount(ALL_PROCESSOR_GROUPS)); #endif return concurrency; @@ -109,6 +91,214 @@ inline CpuIndex get_hardware_concurrency() { inline const CpuIndex SYSTEM_THREADS_NB = std::max(1, get_hardware_concurrency()); +#if defined(_WIN64) + +struct WindowsAffinity { + std::optional> oldApi; + std::optional> newApi; + bool isDeterminate = true; + + std::optional> get_combined() const { + // When the affinity is not determinate we treat it as no affinity, + // because otherwise we would have to set affinity to fewer + // processors than we currently have affinity to. + if (!isDeterminate) + return std::nullopt; + + if (!oldApi.has_value()) + return newApi; + if (!newApi.has_value()) + return oldApi; + + std::set intersect; + std::set_intersection(oldApi->begin(), oldApi->end(), newApi->begin(), newApi->end(), + std::inserter(intersect, intersect.begin())); + return intersect; + } +}; + +inline std::pair> get_process_group_affinity() { + WORD numProcGroups = GetActiveProcessorGroupCount(); + + // GetProcessGroupAffinity requires the GroupArray argument to be + // aligned to 4 bytes instead of just 2. + static constexpr size_t GroupArrayMinimumAlignment = 4; + static_assert(GroupArrayMinimumAlignment >= alignof(USHORT)); + + auto GroupArray = std::make_unique( + numProcGroups + (GroupArrayMinimumAlignment / alignof(USHORT) - 1)); + + USHORT GroupCount = static_cast(numProcGroups); + const BOOL status = GetProcessGroupAffinity(GetCurrentProcess(), &GroupCount, GroupArray.get()); + + return std::make_pair(status, std::vector(GroupArray.get(), GroupArray.get() + GroupCount)); +} + +// Since Windows 11 and Windows Server 2022 thread affinities can span +// processor groups and can be set as such by a new WinAPI function. +// However, we may need to force using the old API if we detect +// that the process has affinity set by the old API already and we want to override that. +inline bool use_old_affinity_api() { + HMODULE k32 = GetModuleHandle(TEXT("Kernel32.dll")); + auto SetThreadSelectedCpuSetMasks_f = SetThreadSelectedCpuSetMasks_t( + (void (*)()) GetProcAddress(k32, "SetThreadSelectedCpuSetMasks")); + + if (SetThreadSelectedCpuSetMasks_f == nullptr) + return true; + + auto [status, groupAffinity] = get_process_group_affinity(); + + // If GroupCount > 1 then we know old API was never used and we can stick + // to the new API safely. + if (status != 0 && groupAffinity.size() > 1) + return false; + + return true; +}; + +// On Windows there are two ways to set affinity, and therefore 2 ways to get it. +// These are not consistent, so we have to check both. +// In some cases it is actually not possible to determine affinity. +// For example when two different threads have affinity on different processor groups, +// set using SetThreadAffinityMask, we can't retrieve the actual affinities. +// From documentation on GetProcessAffinityMask: +// > If the calling process contains threads in multiple groups, +// > the function returns zero for both affinity masks. +// In such cases we just give up and assume we have affinity for all processors. +// nullopt means no affinity is set, that is, all processors are allowed +inline WindowsAffinity get_process_affinity() { + HMODULE k32 = GetModuleHandle(TEXT("Kernel32.dll")); + auto GetThreadSelectedCpuSetMasks_f = GetThreadSelectedCpuSetMasks_t( + (void (*)()) GetProcAddress(k32, "GetThreadSelectedCpuSetMasks")); + + WindowsAffinity affinity; + + if (GetThreadSelectedCpuSetMasks_f != nullptr) + { + USHORT RequiredMaskCount; + BOOL status = + GetThreadSelectedCpuSetMasks_f(GetCurrentThread(), nullptr, 0, &RequiredMaskCount); + + // If RequiredMaskCount then these affinities were never set, but it's not consistent + // so GetProcessAffinityMask may still return some affinity. + if (status == 0) + { + affinity.isDeterminate = false; + return affinity; + } + + if (RequiredMaskCount > 0) + { + std::set cpus; + + auto groupAffinities = std::make_unique(RequiredMaskCount); + + GetThreadSelectedCpuSetMasks_f(GetCurrentThread(), groupAffinities.get(), + RequiredMaskCount, &RequiredMaskCount); + + for (USHORT i = 0; i < RequiredMaskCount; ++i) + { + const size_t procGroupIndex = groupAffinities[i].Group; + + for (size_t j = 0; j < WIN_PROCESSOR_GROUP_SIZE; ++j) + { + if (groupAffinities[i].Mask & (KAFFINITY(1) << j)) + cpus.insert(procGroupIndex * WIN_PROCESSOR_GROUP_SIZE + j); + } + } + + affinity.newApi = std::move(cpus); + } + } + + DWORD_PTR proc, sys; + BOOL status = GetProcessAffinityMask(GetCurrentProcess(), &proc, &sys); + + // If proc == 0 then we can't determine affinity because it spans processor groups. + if (status == 0 || proc == 0) + { + affinity.isDeterminate = false; + return affinity; + } + + // If SetProcessAffinityMask was never called the affinity + // must span all processor groups, but if it was called it must only span one. + auto [status2, groupAffinity] = get_process_group_affinity(); + if (status2 == 0) + { + affinity.isDeterminate = false; + return affinity; + } + + // If we have affinity for more than 1 group then at this point we + // can assume SetProcessAffinityMask has never been called and therefore + // according ot old API we do not have any affinity set. + // Otherwise we have to assume we have affinity set and gather the processor IDs. + if (groupAffinity.size() == 1) + { + std::set cpus; + + const size_t procGroupIndex = groupAffinity[0]; + + uint64_t mask = static_cast(proc); + for (size_t j = 0; j < WIN_PROCESSOR_GROUP_SIZE; ++j) + { + if (mask & (KAFFINITY(1) << j)) + cpus.insert(procGroupIndex * WIN_PROCESSOR_GROUP_SIZE + j); + } + + affinity.oldApi = std::move(cpus); + } + + return affinity; +} + +#endif + +#if defined(__linux__) && !defined(__ANDROID__) + +inline std::set get_process_affinity() { + + std::set cpus; + + // For unsupported systems, or in case of a soft error, we may assume all processors + // are available for use. + [[maybe_unused]] auto set_to_all_cpus = [&]() { + for (CpuIndex c = 0; c < SYSTEM_THREADS_NB; ++c) + cpus.insert(c); + }; + + // cpu_set_t by default holds 1024 entries. This may not be enough soon, + // but there is no easy way to determine how many threads there actually is. + // In this case we just choose a reasonable upper bound. + static constexpr CpuIndex MaxNumCpus = 1024 * 64; + + cpu_set_t* mask = CPU_ALLOC(MaxNumCpus); + if (mask == nullptr) + std::exit(EXIT_FAILURE); + + const size_t masksize = CPU_ALLOC_SIZE(MaxNumCpus); + + CPU_ZERO_S(masksize, mask); + + const int status = sched_getaffinity(0, masksize, mask); + + if (status != 0) + { + CPU_FREE(mask); + std::exit(EXIT_FAILURE); + } + + for (CpuIndex c = 0; c < MaxNumCpus; ++c) + if (CPU_ISSET_S(c, masksize, mask)) + cpus.insert(c); + + CPU_FREE(mask); + + return cpus; +} + +#endif // We want to abstract the purpose of storing the numa node index somewhat. // Whoever is using this does not need to know the specifics of the replication @@ -224,7 +414,7 @@ class NumaConfig { std::optional> allowedCpus; if (respectProcessAffinity) - allowedCpus = get_process_affinity(); + allowedCpus = get_process_affinity().get_combined(); // The affinity can't be determined in all cases on Windows, but we at least guarantee // that the number of allowed processors is >= number of processors in the affinity mask. @@ -233,15 +423,6 @@ class NumaConfig { return !allowedCpus.has_value() || allowedCpus->count(c) == 1; }; - // Since Windows 11 and Windows Server 2022 thread affinities can span - // processor groups and can be set as such by a new WinAPI function. - static const bool CanAffinitySpanProcessorGroups = []() { - HMODULE k32 = GetModuleHandle(TEXT("Kernel32.dll")); - auto SetThreadSelectedCpuSetMasks_f = SetThreadSelectedCpuSetMasks_t( - (void (*)()) GetProcAddress(k32, "SetThreadSelectedCpuSetMasks")); - return SetThreadSelectedCpuSetMasks_f != nullptr; - }(); - WORD numProcGroups = GetActiveProcessorGroupCount(); for (WORD procGroup = 0; procGroup < numProcGroups; ++procGroup) { @@ -269,7 +450,8 @@ class NumaConfig { // the new NUMA allocation behaviour was introduced while there was // still no way to set thread affinity spanning multiple processor groups. // See https://learn.microsoft.com/en-us/windows/win32/procthread/numa-support - if (!CanAffinitySpanProcessorGroups) + // We also do this is if need to force old API for some reason. + if (use_old_affinity_api()) { NumaConfig splitCfg = empty(); @@ -307,6 +489,12 @@ class NumaConfig { // We have to ensure no empty NUMA nodes persist. cfg.remove_empty_numa_nodes(); + // If the user explicitly opts out from respecting the current process affinity + // then it may be inconsistent with the current affinity (obviously), so we + // consider it custom. + if (!respectProcessAffinity) + cfg.customAffinity = true; + return cfg; } @@ -510,9 +698,11 @@ class NumaConfig { HMODULE k32 = GetModuleHandle(TEXT("Kernel32.dll")); auto SetThreadSelectedCpuSetMasks_f = SetThreadSelectedCpuSetMasks_t( (void (*)()) GetProcAddress(k32, "SetThreadSelectedCpuSetMasks")); - auto SetThreadGroupAffinity_f = - SetThreadGroupAffinity_t((void (*)()) GetProcAddress(k32, "SetThreadGroupAffinity")); + // We ALWAYS set affinity with the new API if available, + // because there's no downsides, and we forcibly keep it consistent + // with the old API should we need to use it. I.e. we always keep this as a superset + // of what we set with SetThreadGroupAffinity. if (SetThreadSelectedCpuSetMasks_f != nullptr) { // Only available on Windows 11 and Windows Server 2022 onwards. @@ -541,7 +731,9 @@ class NumaConfig { // This is defensive, allowed because this code is not performance critical. SwitchToThread(); } - else if (SetThreadGroupAffinity_f != nullptr) + + // Sometimes we need to force the old API, but do not use it unless necessary. + if (SetThreadSelectedCpuSetMasks_f == nullptr || use_old_affinity_api()) { // On earlier windows version (since windows 7) we can't run a single thread // on multiple processor groups, so we need to restrict the group. @@ -576,7 +768,7 @@ class NumaConfig { HANDLE hThread = GetCurrentThread(); - const BOOL status = SetThreadGroupAffinity_f(hThread, &affinity, nullptr); + const BOOL status = SetThreadGroupAffinity(hThread, &affinity, nullptr); if (status == 0) std::exit(EXIT_FAILURE); @@ -665,138 +857,6 @@ class NumaConfig { return true; } -#if defined(__linux__) && !defined(__ANDROID__) - - static std::set get_process_affinity() { - - std::set cpus; - - // For unsupported systems, or in case of a soft error, we may assume all processors - // are available for use. - [[maybe_unused]] auto set_to_all_cpus = [&]() { - for (CpuIndex c = 0; c < SYSTEM_THREADS_NB; ++c) - cpus.insert(c); - }; - - // cpu_set_t by default holds 1024 entries. This may not be enough soon, - // but there is no easy way to determine how many threads there actually is. - // In this case we just choose a reasonable upper bound. - static constexpr CpuIndex MaxNumCpus = 1024 * 64; - - cpu_set_t* mask = CPU_ALLOC(MaxNumCpus); - if (mask == nullptr) - std::exit(EXIT_FAILURE); - - const size_t masksize = CPU_ALLOC_SIZE(MaxNumCpus); - - CPU_ZERO_S(masksize, mask); - - const int status = sched_getaffinity(0, masksize, mask); - - if (status != 0) - { - CPU_FREE(mask); - std::exit(EXIT_FAILURE); - } - - for (CpuIndex c = 0; c < MaxNumCpus; ++c) - if (CPU_ISSET_S(c, masksize, mask)) - cpus.insert(c); - - CPU_FREE(mask); - - return cpus; - } - -#elif defined(_WIN64) - - // On Windows there are two ways to set affinity, and therefore 2 ways to get it. - // These are not consistent, so we have to check both. - // In some cases it is actually not possible to determine affinity. - // For example when two different threads have affinity on different processor groups, - // set using SetThreadAffinityMask, we can't retrieve the actual affinities. - // From documentation on GetProcessAffinityMask: - // > If the calling process contains threads in multiple groups, - // > the function returns zero for both affinity masks. - // In such cases we just give up and assume we have affinity for all processors. - // nullopt means no affinity is set, that is, all processors are allowed - static std::optional> get_process_affinity() { - HMODULE k32 = GetModuleHandle(TEXT("Kernel32.dll")); - auto GetThreadSelectedCpuSetMasks_f = GetThreadSelectedCpuSetMasks_t( - (void (*)()) GetProcAddress(k32, "GetThreadSelectedCpuSetMasks")); - auto GetProcessAffinityMask_f = - GetProcessAffinityMask_t((void (*)()) GetProcAddress(k32, "GetProcessAffinityMask")); - auto GetProcessGroupAffinity_f = - GetProcessGroupAffinity_t((void (*)()) GetProcAddress(k32, "GetProcessGroupAffinity")); - - if (GetThreadSelectedCpuSetMasks_f != nullptr) - { - std::set cpus; - - USHORT RequiredMaskCount; - GetThreadSelectedCpuSetMasks_f(GetCurrentThread(), nullptr, 0, &RequiredMaskCount); - - // If RequiredMaskCount then these affinities were never set, but it's not consistent - // so GetProcessAffinityMask may still return some affinity. - if (RequiredMaskCount > 0) - { - auto groupAffinities = std::make_unique(RequiredMaskCount); - - GetThreadSelectedCpuSetMasks_f(GetCurrentThread(), groupAffinities.get(), - RequiredMaskCount, &RequiredMaskCount); - - for (USHORT i = 0; i < RequiredMaskCount; ++i) - { - const size_t procGroupIndex = groupAffinities[i].Group; - - for (size_t j = 0; j < WIN_PROCESSOR_GROUP_SIZE; ++j) - { - if (groupAffinities[i].Mask & (KAFFINITY(1) << j)) - cpus.insert(procGroupIndex * WIN_PROCESSOR_GROUP_SIZE + j); - } - } - - return cpus; - } - } - - if (GetProcessAffinityMask_f != nullptr && GetProcessGroupAffinity_f != nullptr) - { - std::set cpus; - - DWORD_PTR proc, sys; - BOOL status = GetProcessAffinityMask_f(GetCurrentProcess(), &proc, &sys); - if (status == 0) - return std::nullopt; - - // We can't determine affinity because it spans processor groups. - if (proc == 0) - return std::nullopt; - - // We are expecting a single group. - USHORT GroupCount = 1; - alignas(4) USHORT GroupArray[1]; - status = GetProcessGroupAffinity_f(GetCurrentProcess(), &GroupCount, GroupArray); - if (status == 0 || GroupCount != 1) - return std::nullopt; - - const size_t procGroupIndex = GroupArray[0]; - - uint64_t mask = static_cast(proc); - for (size_t j = 0; j < WIN_PROCESSOR_GROUP_SIZE; ++j) - { - if (mask & (KAFFINITY(1) << j)) - cpus.insert(procGroupIndex * WIN_PROCESSOR_GROUP_SIZE + j); - } - - return cpus; - } - - return std::nullopt; - } - -#endif - static std::vector indices_from_shortened_string(const std::string& s) { std::vector indices; From 21ba32af6d34c367ef22384c0f87fe49764d8ef0 Mon Sep 17 00:00:00 2001 From: mstembera Date: Tue, 4 Jun 2024 17:59:47 -0700 Subject: [PATCH 108/315] Remove m512_hadd128x16_interleave() This functionality is no longer used anywhere. closes https://github.com/official-stockfish/Stockfish/pull/5357 No functional change --- src/nnue/layers/simd.h | 33 --------------------------------- src/search.cpp | 2 +- src/search.h | 2 +- 3 files changed, 2 insertions(+), 35 deletions(-) diff --git a/src/nnue/layers/simd.h b/src/nnue/layers/simd.h index cec41474..55cb7df1 100644 --- a/src/nnue/layers/simd.h +++ b/src/nnue/layers/simd.h @@ -43,39 +43,6 @@ namespace Stockfish::Simd { return _mm512_reduce_add_epi32(sum) + bias; } -/* - Parameters: - sum0 = [zmm0.i128[0], zmm0.i128[1], zmm0.i128[2], zmm0.i128[3]] - sum1 = [zmm1.i128[0], zmm1.i128[1], zmm1.i128[2], zmm1.i128[3]] - sum2 = [zmm2.i128[0], zmm2.i128[1], zmm2.i128[2], zmm2.i128[3]] - sum3 = [zmm3.i128[0], zmm3.i128[1], zmm3.i128[2], zmm3.i128[3]] - - Returns: - ret = [ - reduce_add_epi32(zmm0.i128[0]), reduce_add_epi32(zmm1.i128[0]), reduce_add_epi32(zmm2.i128[0]), reduce_add_epi32(zmm3.i128[0]), - reduce_add_epi32(zmm0.i128[1]), reduce_add_epi32(zmm1.i128[1]), reduce_add_epi32(zmm2.i128[1]), reduce_add_epi32(zmm3.i128[1]), - reduce_add_epi32(zmm0.i128[2]), reduce_add_epi32(zmm1.i128[2]), reduce_add_epi32(zmm2.i128[2]), reduce_add_epi32(zmm3.i128[2]), - reduce_add_epi32(zmm0.i128[3]), reduce_add_epi32(zmm1.i128[3]), reduce_add_epi32(zmm2.i128[3]), reduce_add_epi32(zmm3.i128[3]) - ] - */ -[[maybe_unused]] static __m512i -m512_hadd128x16_interleave(__m512i sum0, __m512i sum1, __m512i sum2, __m512i sum3) { - - __m512i sum01a = _mm512_unpacklo_epi32(sum0, sum1); - __m512i sum01b = _mm512_unpackhi_epi32(sum0, sum1); - - __m512i sum23a = _mm512_unpacklo_epi32(sum2, sum3); - __m512i sum23b = _mm512_unpackhi_epi32(sum2, sum3); - - __m512i sum01 = _mm512_add_epi32(sum01a, sum01b); - __m512i sum23 = _mm512_add_epi32(sum23a, sum23b); - - __m512i sum0123a = _mm512_unpacklo_epi64(sum01, sum23); - __m512i sum0123b = _mm512_unpackhi_epi64(sum01, sum23); - - return _mm512_add_epi32(sum0123a, sum0123b); -} - [[maybe_unused]] static void m512_add_dpbusd_epi32(__m512i& acc, __m512i a, __m512i b) { #if defined(USE_VNNI) diff --git a/src/search.cpp b/src/search.cpp index c03fe781..6e03b62a 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1660,7 +1660,7 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta, return bestValue; } -Depth Search::Worker::reduction(bool i, Depth d, int mn, int delta) { +Depth Search::Worker::reduction(bool i, Depth d, int mn, int delta) const { int reductionScale = reductions[d] * reductions[mn]; return (reductionScale + 1222 - delta * 733 / rootDelta) / 1024 + (!i && reductionScale > 1231); } diff --git a/src/search.h b/src/search.h index a22d3200..d5210c2e 100644 --- a/src/search.h +++ b/src/search.h @@ -265,7 +265,7 @@ class Worker { template Value qsearch(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth = 0); - Depth reduction(bool i, Depth d, int mn, int delta); + Depth reduction(bool i, Depth d, int mn, int delta) const; // Get a pointer to the search manager, only allowed to be called by the // main thread. From a08fcacb2876ced0cb68d01e61f081449386f132 Mon Sep 17 00:00:00 2001 From: Muzhen Gaming <61100393+XInTheDark@users.noreply.github.com> Date: Tue, 4 Jun 2024 12:47:54 +0800 Subject: [PATCH 109/315] VVLTC search tune Parameters were tuned with 199k games of VVLTC: https://tests.stockfishchess.org/tests/view/665c67e73542f91ad1c54fe2 Passed VVLTC 1st sprt: https://tests.stockfishchess.org/tests/view/665e9c83fd45fb0f907c837c LLR: 2.95 (-2.94,2.94) <0.00,2.00> Total: 83494 W: 21546 L: 21219 D: 40729 Ptnml(0-2): 6, 7707, 25993, 8036, 5 Passed VVLTC 2nd sprt: https://tests.stockfishchess.org/tests/view/665f650bfd45fb0f907cb360 LLR: 2.94 (-2.94,2.94) <0.50,2.50> Total: 151056 W: 38796 L: 38295 D: 73965 Ptnml(0-2): 5, 13742, 47536, 14237, 8 https://github.com/official-stockfish/Stockfish/pull/5359 Bench: 1154524 --- src/search.cpp | 94 +++++++++++++++++++++++++------------------------- 1 file changed, 47 insertions(+), 47 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 6e03b62a..1c0bbc4d 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -60,9 +60,9 @@ static constexpr double EvalLevel[10] = {0.981, 0.956, 0.895, 0.949, 0.913, // Futility margin Value futility_margin(Depth d, bool noTtCutNode, bool improving, bool oppWorsening) { - Value futilityMult = 129 - 43 * noTtCutNode; - Value improvingDeduction = 56 * improving * futilityMult / 32; - Value worseningDeduction = 336 * oppWorsening * futilityMult / 1024; + Value futilityMult = 124 - 43 * noTtCutNode; + Value improvingDeduction = 60 * improving * futilityMult / 32; + Value worseningDeduction = 344 * oppWorsening * futilityMult / 1024; return futilityMult * d - improvingDeduction - worseningDeduction; } @@ -74,15 +74,15 @@ constexpr int futility_move_count(bool improving, Depth depth) { // Add correctionHistory value to raw staticEval and guarantee evaluation does not hit the tablebase range Value to_corrected_static_eval(Value v, const Worker& w, const Position& pos) { auto cv = w.correctionHistory[pos.side_to_move()][pawn_structure_index(pos)]; - v += cv * std::abs(cv) / 5435; + v += cv * std::abs(cv) / 4990; return std::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1); } // History and stats update bonus, based on depth -int stat_bonus(Depth d) { return std::clamp(205 * d - 283, 18, 1544); } +int stat_bonus(Depth d) { return std::clamp(186 * d - 285, 20, 1524); } // History and stats update malus, based on depth -int stat_malus(Depth d) { return (d < 4 ? 767 * d - 275 : 1911); } +int stat_malus(Depth d) { return (d < 4 ? 707 * d - 260 : 2073); } // Add a small random component to draw evaluations to avoid 3-fold blindness Value value_draw(size_t nodes) { return VALUE_DRAW - 1 + Value(nodes & 0x2); } @@ -314,12 +314,12 @@ void Search::Worker::iterative_deepening() { // Reset aspiration window starting size Value avg = rootMoves[pvIdx].averageScore; - delta = 9 + avg * avg / 10502; + delta = 9 + avg * avg / 10182; alpha = std::max(avg - delta, -VALUE_INFINITE); beta = std::min(avg + delta, VALUE_INFINITE); // Adjust optimism based on root move's averageScore (~4 Elo) - optimism[us] = 122 * avg / (std::abs(avg) + 92); + optimism[us] = 127 * avg / (std::abs(avg) + 86); optimism[~us] = -optimism[us]; // Start with a small aspiration window and, in the case of a fail @@ -500,17 +500,17 @@ void Search::Worker::clear() { counterMoves.fill(Move::none()); mainHistory.fill(0); captureHistory.fill(0); - pawnHistory.fill(-1300); + pawnHistory.fill(-1193); correctionHistory.fill(0); for (bool inCheck : {false, true}) for (StatsType c : {NoCaptures, Captures}) for (auto& to : continuationHistory[inCheck][c]) for (auto& h : to) - h->fill(-60); + h->fill(-56); for (size_t i = 1; i < reductions.size(); ++i) - reductions[i] = int((19.90 + std::log(size_t(options["Threads"])) / 2) * std::log(i)); + reductions[i] = int((19.26 + std::log(size_t(options["Threads"])) / 2) * std::log(i)); refreshTable.clear(networks[numaAccessToken]); } @@ -742,7 +742,7 @@ Value Search::Worker::search( // Use static evaluation difference to improve quiet move ordering (~9 Elo) if (((ss - 1)->currentMove).is_ok() && !(ss - 1)->inCheck && !priorCapture) { - int bonus = std::clamp(-11 * int((ss - 1)->staticEval + ss->staticEval), -1592, 1390); + int bonus = std::clamp(-10 * int((ss - 1)->staticEval + ss->staticEval), -1590, 1371); bonus = bonus > 0 ? 2 * bonus : bonus / 2; thisThread->mainHistory[~us][((ss - 1)->currentMove).from_to()] << bonus; if (type_of(pos.piece_on(prevSq)) != PAWN && ((ss - 1)->currentMove).type_of() != PROMOTION) @@ -764,7 +764,7 @@ Value Search::Worker::search( // Step 7. Razoring (~1 Elo) // If eval is really low check with qsearch if it can exceed alpha, if it can't, // return a fail low. - if (eval < alpha - 501 - 305 * depth * depth) + if (eval < alpha - 512 - 293 * depth * depth) { value = qsearch(pos, ss, alpha - 1, alpha); if (value < alpha) @@ -773,23 +773,23 @@ Value Search::Worker::search( // Step 8. Futility pruning: child node (~40 Elo) // The depth condition is important for mate finding. - if (!ss->ttPv && depth < 12 + if (!ss->ttPv && depth < 13 && eval - futility_margin(depth, cutNode && !ss->ttHit, improving, opponentWorsening) - - (ss - 1)->statScore / 248 + - (ss - 1)->statScore / 263 >= beta && eval >= beta && eval < VALUE_TB_WIN_IN_MAX_PLY && (!ttMove || ttCapture)) return beta > VALUE_TB_LOSS_IN_MAX_PLY ? beta + (eval - beta) / 3 : eval; // Step 9. Null move search with verification search (~35 Elo) - if (!PvNode && (ss - 1)->currentMove != Move::null() && (ss - 1)->statScore < 13999 - && eval >= beta && ss->staticEval >= beta - 21 * depth + 390 && !excludedMove + if (!PvNode && (ss - 1)->currentMove != Move::null() && (ss - 1)->statScore < 14369 + && eval >= beta && ss->staticEval >= beta - 21 * depth + 393 && !excludedMove && pos.non_pawn_material(us) && ss->ply >= thisThread->nmpMinPly && beta > VALUE_TB_LOSS_IN_MAX_PLY) { assert(eval - beta >= 0); // Null move dynamic reduction based on depth and eval - Depth R = std::min(int(eval - beta) / 177, 6) + depth / 3 + 5; + Depth R = std::min(int(eval - beta) / 197, 6) + depth / 3 + 5; ss->currentMove = Move::null(); ss->continuationHistory = &thisThread->continuationHistory[0][0][NO_PIECE][0]; @@ -847,7 +847,7 @@ Value Search::Worker::search( // Step 11. ProbCut (~10 Elo) // If we have a good enough capture (or queen promotion) and a reduced search returns a value // much above beta, we can (almost) safely prune the previous move. - probCutBeta = beta + 185 - 60 * improving; + probCutBeta = beta + 177 - 57 * improving; if ( !PvNode && depth > 3 && std::abs(beta) < VALUE_TB_WIN_IN_MAX_PLY @@ -903,7 +903,7 @@ Value Search::Worker::search( moves_loop: // When in check, search starts here // Step 12. A small Probcut idea, when we are in check (~4 Elo) - probCutBeta = beta + 361; + probCutBeta = beta + 388; if (ss->inCheck && !PvNode && ttCapture && (tte->bound() & BOUND_LOWER) && tte->depth() >= depth - 4 && ttValue >= probCutBeta && std::abs(ttValue) < VALUE_TB_WIN_IN_MAX_PLY && std::abs(beta) < VALUE_TB_WIN_IN_MAX_PLY) @@ -991,15 +991,15 @@ moves_loop: // When in check, search starts here // Futility pruning for captures (~2 Elo) if (!givesCheck && lmrDepth < 7 && !ss->inCheck) { - Value futilityValue = ss->staticEval + 283 + 235 * lmrDepth + Value futilityValue = ss->staticEval + 287 + 248 * lmrDepth + PieceValue[capturedPiece] + captHist / 7; if (futilityValue <= alpha) continue; } // SEE based pruning for captures and checks (~11 Elo) - int seeHist = std::clamp(captHist / 32, -183 * depth, 162 * depth); - if (!pos.see_ge(move, -166 * depth - seeHist)) + int seeHist = std::clamp(captHist / 32, -180 * depth, 163 * depth); + if (!pos.see_ge(move, -160 * depth - seeHist)) continue; } else @@ -1010,18 +1010,18 @@ moves_loop: // When in check, search starts here + thisThread->pawnHistory[pawn_structure_index(pos)][movedPiece][move.to_sq()]; // Continuation history based pruning (~2 Elo) - if (lmrDepth < 6 && history < -4427 * depth) + if (lmrDepth < 6 && history < -4151 * depth) continue; history += 2 * thisThread->mainHistory[us][move.from_to()]; - lmrDepth += history / 3670; + lmrDepth += history / 3678; Value futilityValue = - ss->staticEval + (bestValue < ss->staticEval - 51 ? 149 : 55) + 141 * lmrDepth; + ss->staticEval + (bestValue < ss->staticEval - 51 ? 138 : 54) + 140 * lmrDepth; // Futility pruning: parent node (~13 Elo) - if (!ss->inCheck && lmrDepth < 11 && futilityValue <= alpha) + if (!ss->inCheck && lmrDepth < 12 && futilityValue <= alpha) { if (bestValue <= futilityValue && std::abs(bestValue) < VALUE_TB_WIN_IN_MAX_PLY && futilityValue < VALUE_TB_WIN_IN_MAX_PLY) @@ -1032,7 +1032,7 @@ moves_loop: // When in check, search starts here lmrDepth = std::max(lmrDepth, 0); // Prune moves with negative SEE (~4 Elo) - if (!pos.see_ge(move, -26 * lmrDepth * lmrDepth)) + if (!pos.see_ge(move, -24 * lmrDepth * lmrDepth)) continue; } } @@ -1055,11 +1055,11 @@ moves_loop: // When in check, search starts here // margins scale well. if (!rootNode && move == ttMove && !excludedMove - && depth >= 4 - (thisThread->completedDepth > 38) + ss->ttPv + && depth >= 4 - (thisThread->completedDepth > 35) + ss->ttPv && std::abs(ttValue) < VALUE_TB_WIN_IN_MAX_PLY && (tte->bound() & BOUND_LOWER) && tte->depth() >= depth - 3) { - Value singularBeta = ttValue - (58 + 64 * (ss->ttPv && !PvNode)) * depth / 64; + Value singularBeta = ttValue - (52 + 80 * (ss->ttPv && !PvNode)) * depth / 64; Depth singularDepth = newDepth / 2; ss->excludedMove = move; @@ -1070,13 +1070,13 @@ moves_loop: // When in check, search starts here if (value < singularBeta) { - int doubleMargin = 304 * PvNode - 203 * !ttCapture; - int tripleMargin = 117 + 259 * PvNode - 296 * !ttCapture + 97 * ss->ttPv; + int doubleMargin = 290 * PvNode - 200 * !ttCapture; + int tripleMargin = 107 + 247 * PvNode - 278 * !ttCapture + 99 * ss->ttPv; extension = 1 + (value < singularBeta - doubleMargin) + (value < singularBeta - tripleMargin); - depth += ((!PvNode) && (depth < 16)); + depth += ((!PvNode) && (depth < 18)); } // Multi-cut pruning @@ -1106,7 +1106,7 @@ moves_loop: // When in check, search starts here else if (PvNode && move.to_sq() == prevSq && thisThread->captureHistory[movedPiece][move.to_sq()] [type_of(pos.piece_on(move.to_sq()))] - > 3988) + > 3922) extension = 1; } @@ -1162,10 +1162,10 @@ moves_loop: // When in check, search starts here ss->statScore = 2 * thisThread->mainHistory[us][move.from_to()] + (*contHist[0])[movedPiece][move.to_sq()] - + (*contHist[1])[movedPiece][move.to_sq()] - 5169; + + (*contHist[1])[movedPiece][move.to_sq()] - 4747; // Decrease/increase reduction for moves with a good/bad history (~8 Elo) - r -= ss->statScore / 11049; + r -= ss->statScore / 11125; // Step 17. Late moves reduction / extension (LMR, ~117 Elo) if (depth >= 2 && moveCount > 1 + rootNode) @@ -1184,7 +1184,7 @@ moves_loop: // When in check, search starts here { // Adjust full-depth search based on LMR results - if the result // was good enough search deeper, if it was bad enough search shallower. - const bool doDeeperSearch = value > (bestValue + 36 + 2 * newDepth); // (~1 Elo) + const bool doDeeperSearch = value > (bestValue + 35 + 2 * newDepth); // (~1 Elo) const bool doShallowerSearch = value < bestValue + newDepth; // (~2 Elo) newDepth += doDeeperSearch - doShallowerSearch; @@ -1345,10 +1345,10 @@ moves_loop: // When in check, search starts here // Bonus for prior countermove that caused the fail low else if (!priorCapture && prevSq != SQ_NONE) { - int bonus = (116 * (depth > 5) + 115 * (PvNode || cutNode) - + 186 * ((ss - 1)->statScore < -14144) + 121 * ((ss - 1)->moveCount > 9) - + 64 * (!ss->inCheck && bestValue <= ss->staticEval - 115) - + 137 * (!(ss - 1)->inCheck && bestValue <= -(ss - 1)->staticEval - 81)); + int bonus = (113 * (depth > 5) + 118 * (PvNode || cutNode) + + 191 * ((ss - 1)->statScore < -14396) + 119 * ((ss - 1)->moveCount > 8) + + 64 * (!ss->inCheck && bestValue <= ss->staticEval - 107) + + 147 * (!(ss - 1)->inCheck && bestValue <= -(ss - 1)->staticEval - 75)); update_continuation_histories(ss - 1, pos.piece_on(prevSq), prevSq, stat_bonus(depth) * bonus / 100); thisThread->mainHistory[~us][((ss - 1)->currentMove).from_to()] @@ -1520,7 +1520,7 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta, if (bestValue > alpha) alpha = bestValue; - futilityBase = ss->staticEval + 279; + futilityBase = ss->staticEval + 294; } const PieceToHistory* contHist[] = {(ss - 1)->continuationHistory, @@ -1592,11 +1592,11 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta, + (*contHist[1])[pos.moved_piece(move)][move.to_sq()] + thisThread->pawnHistory[pawn_structure_index(pos)][pos.moved_piece(move)] [move.to_sq()] - <= 4181) + <= 4452) continue; // Do not search moves with bad enough SEE values (~5 Elo) - if (!pos.see_ge(move, -67)) + if (!pos.see_ge(move, -74)) continue; } @@ -1662,7 +1662,7 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta, Depth Search::Worker::reduction(bool i, Depth d, int mn, int delta) const { int reductionScale = reductions[d] * reductions[mn]; - return (reductionScale + 1222 - delta * 733 / rootDelta) / 1024 + (!i && reductionScale > 1231); + return (reductionScale + 1236 - delta * 746 / rootDelta) / 1024 + (!i && reductionScale > 1326); } // elapsed() returns the time elapsed since the search started. If the @@ -1765,7 +1765,7 @@ void update_all_stats(const Position& pos, if (!pos.capture_stage(bestMove)) { - int bestMoveBonus = bestValue > beta + 176 ? quietMoveBonus // larger bonus + int bestMoveBonus = bestValue > beta + 164 ? quietMoveBonus // larger bonus : stat_bonus(depth); // smaller bonus update_quiet_stats(pos, ss, workerThread, bestMove, bestMoveBonus); @@ -1803,7 +1803,7 @@ void update_all_stats(const Position& pos, // by moves at ply -1, -2, -3, -4, and -6 with current move. void update_continuation_histories(Stack* ss, Piece pc, Square to, int bonus) { - bonus = bonus * 47 / 64; + bonus = bonus * 51 / 64; for (int i : {1, 2, 3, 4, 6}) { From 36eb9bc783d35842571d0d4313349b964892d9ca Mon Sep 17 00:00:00 2001 From: Viren6 <94880762+Viren6@users.noreply.github.com> Date: Wed, 5 Jun 2024 03:24:39 +0100 Subject: [PATCH 110/315] Use futility margin in razoring margin Uses futilityMargin * depth to set the razoring margin. This retains the quadratic depth scaling to preserve mate finding capabilities. This patch is nice because it increases the elo sensitivity of the futility margin heuristics. Passed STC: https://tests.stockfishchess.org/tests/view/665f9892fd11ae7170b4849c LLR: 2.93 (-2.94,2.94) <0.00,2.00> Total: 39392 W: 10348 L: 10030 D: 19014 Ptnml(0-2): 99, 4585, 10009, 4905, 98 Passed LTC: https://tests.stockfishchess.org/tests/view/665f9d2dfd11ae7170b484a8 LLR: 2.95 (-2.94,2.94) <0.50,2.50> Total: 107910 W: 27521 L: 27053 D: 53336 Ptnml(0-2): 73, 11835, 29670, 12305, 72 closes https://github.com/official-stockfish/Stockfish/pull/5360 bench 1277173 --- src/search.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 1c0bbc4d..15cc2d8f 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -60,9 +60,9 @@ static constexpr double EvalLevel[10] = {0.981, 0.956, 0.895, 0.949, 0.913, // Futility margin Value futility_margin(Depth d, bool noTtCutNode, bool improving, bool oppWorsening) { - Value futilityMult = 124 - 43 * noTtCutNode; - Value improvingDeduction = 60 * improving * futilityMult / 32; - Value worseningDeduction = 344 * oppWorsening * futilityMult / 1024; + Value futilityMult = 109 - 40 * noTtCutNode; + Value improvingDeduction = 59 * improving * futilityMult / 32; + Value worseningDeduction = 328 * oppWorsening * futilityMult / 1024; return futilityMult * d - improvingDeduction - worseningDeduction; } @@ -554,7 +554,7 @@ Value Search::Worker::search( bool givesCheck, improving, priorCapture, opponentWorsening; bool capture, moveCountPruning, ttCapture; Piece movedPiece; - int moveCount, captureCount, quietCount; + int moveCount, captureCount, quietCount, futilityMargin; Bound singularBound; // Step 1. Initialize node @@ -761,10 +761,12 @@ Value Search::Worker::search( opponentWorsening = ss->staticEval + (ss - 1)->staticEval > 2; + futilityMargin = futility_margin(depth, cutNode && !ss->ttHit, improving, opponentWorsening); + // Step 7. Razoring (~1 Elo) // If eval is really low check with qsearch if it can exceed alpha, if it can't, // return a fail low. - if (eval < alpha - 512 - 293 * depth * depth) + if (eval < alpha - 465 - futilityMargin * depth * 33 / 32) { value = qsearch(pos, ss, alpha - 1, alpha); if (value < alpha) @@ -774,9 +776,7 @@ Value Search::Worker::search( // Step 8. Futility pruning: child node (~40 Elo) // The depth condition is important for mate finding. if (!ss->ttPv && depth < 13 - && eval - futility_margin(depth, cutNode && !ss->ttHit, improving, opponentWorsening) - - (ss - 1)->statScore / 263 - >= beta + && eval - futilityMargin - (ss - 1)->statScore / 263 >= beta && eval >= beta && eval < VALUE_TB_WIN_IN_MAX_PLY && (!ttMove || ttCapture)) return beta > VALUE_TB_LOSS_IN_MAX_PLY ? beta + (eval - beta) / 3 : eval; From fb18caae7a7906a6c9a0579c43021221c663965a Mon Sep 17 00:00:00 2001 From: Disservin Date: Wed, 5 Jun 2024 18:31:11 +0200 Subject: [PATCH 111/315] Update clang-format to version 18 clang-format-18 is available in ubuntu noble(24.04), if you are on a version lower than that you can use the update script from llvm. https://apt.llvm.org/ Windows users should be able to download and use clang-format from their release builds https://github.com/llvm/llvm-project/releases or get the latest from msys2 https://packages.msys2.org/package/mingw-w64-x86_64-clang. macOS users can resort to "brew install clang-format". closes https://github.com/official-stockfish/Stockfish/pull/5365 No functional change --- .github/workflows/clang-format.yml | 6 +++--- CONTRIBUTING.md | 2 +- src/Makefile | 4 ++-- src/search.cpp | 10 +++++----- src/thread.cpp | 2 +- src/tune.cpp | 3 +-- 6 files changed, 13 insertions(+), 14 deletions(-) diff --git a/.github/workflows/clang-format.yml b/.github/workflows/clang-format.yml index e20e0d5d..630edbf9 100644 --- a/.github/workflows/clang-format.yml +++ b/.github/workflows/clang-format.yml @@ -25,7 +25,7 @@ jobs: id: clang-format continue-on-error: true with: - clang-format-version: "17" + clang-format-version: "18" exclude-regex: "incbin" - name: Comment on PR @@ -33,9 +33,9 @@ jobs: uses: thollander/actions-comment-pull-request@fabd468d3a1a0b97feee5f6b9e499eab0dd903f6 # @v2.5.0 with: message: | - clang-format 17 needs to be run on this PR. + clang-format 18 needs to be run on this PR. If you do not have clang-format installed, the maintainer will run it when merging. - For the exact version please see https://packages.ubuntu.com/mantic/clang-format-17. + For the exact version please see https://packages.ubuntu.com/noble/clang-format-18. _(execution **${{ github.run_id }}** / attempt **${{ github.run_attempt }}**)_ comment_tag: execution diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index cf9cecda..caffc916 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -59,7 +59,7 @@ discussion._ Changes to Stockfish C++ code should respect our coding style defined by [.clang-format](.clang-format). You can format your changes by running -`make format`. This requires clang-format version 17 to be installed on your system. +`make format`. This requires clang-format version 18 to be installed on your system. ## Navigate diff --git a/src/Makefile b/src/Makefile index 29c4f879..742fd195 100644 --- a/src/Makefile +++ b/src/Makefile @@ -153,8 +153,8 @@ dotprod = no arm_version = 0 STRIP = strip -ifneq ($(shell which clang-format-17 2> /dev/null),) - CLANG-FORMAT = clang-format-17 +ifneq ($(shell which clang-format-18 2> /dev/null),) + CLANG-FORMAT = clang-format-18 else CLANG-FORMAT = clang-format endif diff --git a/src/search.cpp b/src/search.cpp index 15cc2d8f..2cbc7677 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -579,9 +579,10 @@ Value Search::Worker::search( // Step 2. Check for aborted search and immediate draw if (threads.stop.load(std::memory_order_relaxed) || pos.is_draw(ss->ply) || ss->ply >= MAX_PLY) - return (ss->ply >= MAX_PLY && !ss->inCheck) ? evaluate( - networks[numaAccessToken], pos, refreshTable, thisThread->optimism[us]) - : value_draw(thisThread->nodes); + return (ss->ply >= MAX_PLY && !ss->inCheck) + ? evaluate(networks[numaAccessToken], pos, refreshTable, + thisThread->optimism[us]) + : value_draw(thisThread->nodes); // Step 3. Mate distance pruning. Even if we mate at the next move our score // would be at best mate_in(ss->ply + 1), but if alpha is already bigger because @@ -775,8 +776,7 @@ Value Search::Worker::search( // Step 8. Futility pruning: child node (~40 Elo) // The depth condition is important for mate finding. - if (!ss->ttPv && depth < 13 - && eval - futilityMargin - (ss - 1)->statScore / 263 >= beta + if (!ss->ttPv && depth < 13 && eval - futilityMargin - (ss - 1)->statScore / 263 >= beta && eval >= beta && eval < VALUE_TB_WIN_IN_MAX_PLY && (!ttMove || ttCapture)) return beta > VALUE_TB_LOSS_IN_MAX_PLY ? beta + (eval - beta) / 3 : eval; diff --git a/src/thread.cpp b/src/thread.cpp index 0a33422a..4acb9854 100644 --- a/src/thread.cpp +++ b/src/thread.cpp @@ -179,7 +179,7 @@ void ThreadPool::set(const NumaConfig& numaConfig, const size_t threadId = threads.size(); const NumaIndex numaId = doBindThreads ? boundThreadToNumaNode[threadId] : 0; auto manager = threadId == 0 ? std::unique_ptr( - std::make_unique(updateContext)) + std::make_unique(updateContext)) : std::make_unique(); // When not binding threads we want to force all access to happen diff --git a/src/tune.cpp b/src/tune.cpp index 3e5ebe5e..f377e59e 100644 --- a/src/tune.cpp +++ b/src/tune.cpp @@ -118,7 +118,6 @@ void Tune::Entry::read_option() { namespace Stockfish { -void Tune::read_results() { /* ...insert your values here... */ -} +void Tune::read_results() { /* ...insert your values here... */ } } // namespace Stockfish From 5688b188cc8560e107815c83a7084220fddebdb9 Mon Sep 17 00:00:00 2001 From: cj5716 <125858804+cj5716@users.noreply.github.com> Date: Fri, 31 May 2024 21:55:39 +0800 Subject: [PATCH 112/315] Simplify evaluation constants Passed STC (<0, 2> by accident): LLR: 2.95 (-2.94,2.94) <0.00,2.00> Total: 346016 W: 89529 L: 88756 D: 167731 Ptnml(0-2): 1012, 41074, 88027, 41919, 976 https://tests.stockfishchess.org/tests/view/6659d6ecf426908fcc6b6929 Passed LTC: LLR: 2.96 (-2.94,2.94) <-1.75,0.25> Total: 89862 W: 22887 L: 22734 D: 44241 Ptnml(0-2): 45, 9999, 24694, 10144, 49 https://tests.stockfishchess.org/tests/view/665a6ebb062b2c3cf814fde8 Passed LTC (Rebased): LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 325500 W: 82734 L: 82826 D: 159940 Ptnml(0-2): 193, 36409, 89665, 36263, 220 https://tests.stockfishchess.org/tests/view/665bd39f44e8416a9cdc1909 closes https://github.com/official-stockfish/Stockfish/pull/5361 Bench 961982 --- src/evaluate.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index afba6363..fdf35eb1 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -83,10 +83,8 @@ Value Eval::evaluate(const Eval::NNUE::Networks& networks, optimism += optimism * nnueComplexity / 470; nnue -= nnue * nnueComplexity / 20000; - int material = 300 * pos.count() + 350 * pos.count() + 400 * pos.count() - + 640 * pos.count() + 1200 * pos.count(); - - v = (nnue * (34300 + material) + optimism * (4400 + material)) / 36672; + int material = 600 * pos.count() + pos.non_pawn_material(); + v = (nnue * (68600 + material) + optimism * (8800 + material)) / 73344; // Damp down the evaluation linearly when shuffling v -= v * pos.rule50_count() / 212; From e6c83beed12a6d3d17c69bea4bcf1a397bc60c86 Mon Sep 17 00:00:00 2001 From: R-Goc Date: Tue, 4 Jun 2024 18:06:14 +0200 Subject: [PATCH 113/315] Change PGO type for clang Change type of PGO in clang to IR which is recommended by LLVM/clang and could result in a speedup. https://github.com/llvm/llvm-project/issues/45668 closes https://github.com/official-stockfish/Stockfish/pull/5355 No functional change --- src/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Makefile b/src/Makefile index 742fd195..7142b972 100644 --- a/src/Makefile +++ b/src/Makefile @@ -1051,14 +1051,14 @@ FORCE: clang-profile-make: $(MAKE) ARCH=$(ARCH) COMP=$(COMP) \ - EXTRACXXFLAGS='-fprofile-instr-generate ' \ - EXTRALDFLAGS=' -fprofile-instr-generate' \ + EXTRACXXFLAGS='-fprofile-generate ' \ + EXTRALDFLAGS=' -fprofile-generate' \ all clang-profile-use: $(XCRUN) llvm-profdata merge -output=stockfish.profdata *.profraw $(MAKE) ARCH=$(ARCH) COMP=$(COMP) \ - EXTRACXXFLAGS='-fprofile-instr-use=stockfish.profdata' \ + EXTRACXXFLAGS='-fprofile-use=stockfish.profdata' \ EXTRALDFLAGS='-fprofile-use ' \ all From 66ed4312f22a951aaa01bbb87b2d730656b8f2c1 Mon Sep 17 00:00:00 2001 From: Disservin Date: Fri, 7 Jun 2024 18:40:47 +0200 Subject: [PATCH 114/315] Workaround the clang-format inconsistencies closes https://github.com/official-stockfish/Stockfish/pull/5378 No functional change --- src/nnue/nnue_misc.cpp | 10 +++++----- src/tune.cpp | 7 +++++-- src/uci.cpp | 5 +++-- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/src/nnue/nnue_misc.cpp b/src/nnue/nnue_misc.cpp index 7585cce5..122610a7 100644 --- a/src/nnue/nnue_misc.cpp +++ b/src/nnue/nnue_misc.cpp @@ -178,16 +178,16 @@ trace(Position& pos, const Eval::NNUE::Networks& networks, Eval::NNUE::Accumulat for (std::size_t bucket = 0; bucket < LayerStacks; ++bucket) { - ss << "| " << bucket << " "; - ss << " | "; + ss << "| " << bucket << " " // + << " | "; format_cp_aligned_dot(t.psqt[bucket], ss, pos); - ss << " " + ss << " " // << " | "; format_cp_aligned_dot(t.positional[bucket], ss, pos); - ss << " " + ss << " " // << " | "; format_cp_aligned_dot(t.psqt[bucket] + t.positional[bucket], ss, pos); - ss << " " + ss << " " // << " |"; if (bucket == t.correctBucket) ss << " <-- this bucket is used"; diff --git a/src/tune.cpp b/src/tune.cpp index f377e59e..94c9b53e 100644 --- a/src/tune.cpp +++ b/src/tune.cpp @@ -58,8 +58,11 @@ void make_option(OptionsMap* options, const string& n, int v, const SetRange& r) LastOption = &((*options)[n]); // Print formatted parameters, ready to be copy-pasted in Fishtest - std::cout << n << "," << v << "," << r(v).first << "," << r(v).second << "," - << (r(v).second - r(v).first) / 20.0 << "," + std::cout << n << "," // + << v << "," // + << r(v).first << "," // + << r(v).second << "," // + << (r(v).second - r(v).first) / 20.0 << "," // << "0.0020" << std::endl; } } diff --git a/src/uci.cpp b/src/uci.cpp index 43b0e005..42c69cde 100644 --- a/src/uci.cpp +++ b/src/uci.cpp @@ -324,8 +324,9 @@ void UCIEngine::bench(std::istream& args) { dbg_print(); - std::cerr << "\n===========================" - << "\nTotal time (ms) : " << elapsed << "\nNodes searched : " << nodes + std::cerr << "\n===========================" // + << "\nTotal time (ms) : " << elapsed // + << "\nNodes searched : " << nodes // << "\nNodes/second : " << 1000 * nodes / elapsed << std::endl; // reset callback, to not capture a dangling reference to nodesSearched From 5dda4037c73ead63b145c9a77f1dbb41422e058f Mon Sep 17 00:00:00 2001 From: rn5f107s2 Date: Wed, 5 Jun 2024 18:56:25 +0200 Subject: [PATCH 115/315] Simplify razor changes Remove razoring changes from https://github.com/official-stockfish/Stockfish/pull/5360 The mentioned patch introduced the usage of futility_margin into razoring alongside a tune to futility_margin. It seems the elo gained in this patch comes from the tune of futility_margin and not the introduction of futility_margin to razoring, so simplify it away here. Passed Non-regression STC: https://tests.stockfishchess.org/tests/view/66606581c340c8eed7757bc8 LLR: 2.93 (-2.94,2.94) <-1.75,0.25> Total: 169056 W: 43922 L: 43848 D: 81286 Ptnml(0-2): 438, 20288, 43034, 20298, 470 Passed Non-regression LTC: https://tests.stockfishchess.org/tests/view/66607764c340c8eed7757c58 LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 157134 W: 39805 L: 39723 D: 77606 Ptnml(0-2): 74, 17444, 43461, 17502, 86 Passed rebased Non-regression LTC: https://tests.stockfishchess.org/tests/view/6660c696c340c8eed77580c0 LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 135984 W: 34427 L: 34324 D: 67233 Ptnml(0-2): 67, 15063, 37615, 15194, 53 closes https://github.com/official-stockfish/Stockfish/pull/5366 Bench: 1150518 --- src/search.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 2cbc7677..e0a49dba 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -554,7 +554,7 @@ Value Search::Worker::search( bool givesCheck, improving, priorCapture, opponentWorsening; bool capture, moveCountPruning, ttCapture; Piece movedPiece; - int moveCount, captureCount, quietCount, futilityMargin; + int moveCount, captureCount, quietCount; Bound singularBound; // Step 1. Initialize node @@ -762,12 +762,10 @@ Value Search::Worker::search( opponentWorsening = ss->staticEval + (ss - 1)->staticEval > 2; - futilityMargin = futility_margin(depth, cutNode && !ss->ttHit, improving, opponentWorsening); - // Step 7. Razoring (~1 Elo) // If eval is really low check with qsearch if it can exceed alpha, if it can't, // return a fail low. - if (eval < alpha - 465 - futilityMargin * depth * 33 / 32) + if (eval < alpha - 512 - 293 * depth * depth) { value = qsearch(pos, ss, alpha - 1, alpha); if (value < alpha) @@ -776,7 +774,10 @@ Value Search::Worker::search( // Step 8. Futility pruning: child node (~40 Elo) // The depth condition is important for mate finding. - if (!ss->ttPv && depth < 13 && eval - futilityMargin - (ss - 1)->statScore / 263 >= beta + if (!ss->ttPv && depth < 13 + && eval - futility_margin(depth, cutNode && !ss->ttHit, improving, opponentWorsening) + - (ss - 1)->statScore / 263 + >= beta && eval >= beta && eval < VALUE_TB_WIN_IN_MAX_PLY && (!ttMove || ttCapture)) return beta > VALUE_TB_LOSS_IN_MAX_PLY ? beta + (eval - beta) / 3 : eval; From e2be0aaf67569788f0d1e726d0a86ce1604958da Mon Sep 17 00:00:00 2001 From: FauziAkram Date: Thu, 6 Jun 2024 13:07:45 +0300 Subject: [PATCH 116/315] Tweak pruning formula Tweak pruning formula, including a constant. I started from an old yellow patch, if I'm not mistaken by Viz (Unfortunately I lost the link) where he tried something similar. I worked on it, trying different variations, until I came up with a good configuration to pass. Passed STC: LLR: 2.93 (-2.94,2.94) <0.00,2.00> Total: 213120 W: 55351 L: 54778 D: 102991 Ptnml(0-2): 572, 25209, 54437, 25758, 584 https://tests.stockfishchess.org/tests/view/6660c9a7c340c8eed7758195 Passed LTC: LLR: 2.94 (-2.94,2.94) <0.50,2.50> Total: 315324 W: 80176 L: 79284 D: 155864 Ptnml(0-2): 155, 34711, 87030, 35619, 147 https://tests.stockfishchess.org/tests/view/6660d7bb6489614cdad13d66 closes https://github.com/official-stockfish/Stockfish/pull/5370 Bench: 1231853 --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index e0a49dba..7417a4b6 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1580,7 +1580,7 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta, // If static exchange evaluation is much worse than what is needed to not // fall below alpha we can prune this move. - if (futilityBase > alpha && !pos.see_ge(move, (alpha - futilityBase) * 4)) + if (futilityBase > alpha && !pos.see_ge(move, (alpha - futilityBase) * 2 - 30)) { bestValue = alpha; continue; From f55239b2f374a2f98717e7c361732f7c4510388b Mon Sep 17 00:00:00 2001 From: Tomasz Sobczyk Date: Thu, 6 Jun 2024 12:47:24 +0200 Subject: [PATCH 117/315] NumaPolicy fixes and robustness improvements 1. Fix GetProcessGroupAffinity still not getting properly aligned memory sometimes. 2. Fix a very theoretically possible heap corruption if GetActiveProcessorGroupCount changes between calls. 3. Fully determine affinity on Windows 11 and Windows Server 2022. It should only ever be indeterminate in case of an error. 4. Separate isDeterminate for old and new API, as they are &'d together we still can end up with a subset of processors even if one API is indeterminate. 5. likely_used_old_api() that is based on actual affinity that's been detected 6. IMPORTANT: Gather affinities at startup, so that we only later use the affinites set at startup. Not only does this prevent us from our own calls interfering with detection but it also means subsequent setoption NumaPolicy calls should behave as expected. 7. Fix ERROR_INSUFFICIENT_BUFFER from GetThreadSelectedCpuSetMasks being treated like an error. Should resolve https://github.com/vondele/Stockfish/commit/02ff76630b358e5f958793cc93df0009d2da65a5#commitcomment-142790025 closes https://github.com/official-stockfish/Stockfish/pull/5372 Bench: 1231853 --- src/numa.h | 284 ++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 203 insertions(+), 81 deletions(-) diff --git a/src/numa.h b/src/numa.h index c170c178..fd9abd4d 100644 --- a/src/numa.h +++ b/src/numa.h @@ -35,6 +35,8 @@ #include #include +#include "memory.h" + // We support linux very well, but we explicitly do NOT support Android, because there's // no affected systems, not worth maintaining. #if defined(__linux__) && !defined(__ANDROID__) @@ -96,15 +98,15 @@ inline const CpuIndex SYSTEM_THREADS_NB = std::max(1, get_hardware_con struct WindowsAffinity { std::optional> oldApi; std::optional> newApi; - bool isDeterminate = true; + + // We also provide diagnostic for when the affinity is set to nullopt + // whether it was due to being indeterminate. If affinity is indeterminate + // it's best to assume it is not set at all, so consistent with the meaning + // of the nullopt affinity. + bool isNewDeterminate = true; + bool isOldDeterminate = true; std::optional> get_combined() const { - // When the affinity is not determinate we treat it as no affinity, - // because otherwise we would have to set affinity to fewer - // processors than we currently have affinity to. - if (!isDeterminate) - return std::nullopt; - if (!oldApi.has_value()) return newApi; if (!newApi.has_value()) @@ -115,47 +117,53 @@ struct WindowsAffinity { std::inserter(intersect, intersect.begin())); return intersect; } + + // Since Windows 11 and Windows Server 2022 thread affinities can span + // processor groups and can be set as such by a new WinAPI function. + // However, we may need to force using the old API if we detect + // that the process has affinity set by the old API already and we want to override that. + // Due to the limitations of the old API we can't detect its use reliably. + // There will be cases where we detect not use but it has actually been used and vice versa. + bool likely_used_old_api() const { return oldApi.has_value() || !isOldDeterminate; } }; inline std::pair> get_process_group_affinity() { - WORD numProcGroups = GetActiveProcessorGroupCount(); - // GetProcessGroupAffinity requires the GroupArray argument to be // aligned to 4 bytes instead of just 2. static constexpr size_t GroupArrayMinimumAlignment = 4; static_assert(GroupArrayMinimumAlignment >= alignof(USHORT)); - auto GroupArray = std::make_unique( - numProcGroups + (GroupArrayMinimumAlignment / alignof(USHORT) - 1)); + // The function should succeed the second time, but it may fail if the group + // affinity has changed between GetProcessGroupAffinity calls. + // In such case we consider this a hard error, as we can't work with unstable affinities + // anyway. + static constexpr int MAX_TRIES = 2; + USHORT GroupCount = 1; + for (int i = 0; i < MAX_TRIES; ++i) + { + auto GroupArray = std::make_unique( + GroupCount + (GroupArrayMinimumAlignment / alignof(USHORT) - 1)); - USHORT GroupCount = static_cast(numProcGroups); - const BOOL status = GetProcessGroupAffinity(GetCurrentProcess(), &GroupCount, GroupArray.get()); + USHORT* GroupArrayAligned = align_ptr_up(GroupArray.get()); - return std::make_pair(status, std::vector(GroupArray.get(), GroupArray.get() + GroupCount)); + const BOOL status = + GetProcessGroupAffinity(GetCurrentProcess(), &GroupCount, GroupArrayAligned); + + if (status == 0 && GetLastError() != ERROR_INSUFFICIENT_BUFFER) + { + break; + } + + if (status != 0) + { + return std::make_pair(status, + std::vector(GroupArrayAligned, GroupArrayAligned + GroupCount)); + } + } + + return std::make_pair(0, std::vector()); } -// Since Windows 11 and Windows Server 2022 thread affinities can span -// processor groups and can be set as such by a new WinAPI function. -// However, we may need to force using the old API if we detect -// that the process has affinity set by the old API already and we want to override that. -inline bool use_old_affinity_api() { - HMODULE k32 = GetModuleHandle(TEXT("Kernel32.dll")); - auto SetThreadSelectedCpuSetMasks_f = SetThreadSelectedCpuSetMasks_t( - (void (*)()) GetProcAddress(k32, "SetThreadSelectedCpuSetMasks")); - - if (SetThreadSelectedCpuSetMasks_f == nullptr) - return true; - - auto [status, groupAffinity] = get_process_group_affinity(); - - // If GroupCount > 1 then we know old API was never used and we can stick - // to the new API safely. - if (status != 0 && groupAffinity.size() > 1) - return false; - - return true; -}; - // On Windows there are two ways to set affinity, and therefore 2 ways to get it. // These are not consistent, so we have to check both. // In some cases it is actually not possible to determine affinity. @@ -171,83 +179,183 @@ inline WindowsAffinity get_process_affinity() { auto GetThreadSelectedCpuSetMasks_f = GetThreadSelectedCpuSetMasks_t( (void (*)()) GetProcAddress(k32, "GetThreadSelectedCpuSetMasks")); + BOOL status = 0; + WindowsAffinity affinity; if (GetThreadSelectedCpuSetMasks_f != nullptr) { USHORT RequiredMaskCount; - BOOL status = - GetThreadSelectedCpuSetMasks_f(GetCurrentThread(), nullptr, 0, &RequiredMaskCount); + status = GetThreadSelectedCpuSetMasks_f(GetCurrentThread(), nullptr, 0, &RequiredMaskCount); - // If RequiredMaskCount then these affinities were never set, but it's not consistent - // so GetProcessAffinityMask may still return some affinity. - if (status == 0) + // We expect ERROR_INSUFFICIENT_BUFFER from GetThreadSelectedCpuSetMasks, + // but other failure is an actual error. + if (status == 0 && GetLastError() != ERROR_INSUFFICIENT_BUFFER) { - affinity.isDeterminate = false; - return affinity; + affinity.isNewDeterminate = false; } - - if (RequiredMaskCount > 0) + else if (RequiredMaskCount > 0) { - std::set cpus; - + // If RequiredMaskCount then these affinities were never set, but it's not consistent + // so GetProcessAffinityMask may still return some affinity. auto groupAffinities = std::make_unique(RequiredMaskCount); - GetThreadSelectedCpuSetMasks_f(GetCurrentThread(), groupAffinities.get(), - RequiredMaskCount, &RequiredMaskCount); + status = GetThreadSelectedCpuSetMasks_f(GetCurrentThread(), groupAffinities.get(), + RequiredMaskCount, &RequiredMaskCount); - for (USHORT i = 0; i < RequiredMaskCount; ++i) + if (status == 0) { - const size_t procGroupIndex = groupAffinities[i].Group; - - for (size_t j = 0; j < WIN_PROCESSOR_GROUP_SIZE; ++j) - { - if (groupAffinities[i].Mask & (KAFFINITY(1) << j)) - cpus.insert(procGroupIndex * WIN_PROCESSOR_GROUP_SIZE + j); - } + affinity.isNewDeterminate = false; } + else + { + std::set cpus; - affinity.newApi = std::move(cpus); + for (USHORT i = 0; i < RequiredMaskCount; ++i) + { + const size_t procGroupIndex = groupAffinities[i].Group; + + for (size_t j = 0; j < WIN_PROCESSOR_GROUP_SIZE; ++j) + { + if (groupAffinities[i].Mask & (KAFFINITY(1) << j)) + cpus.insert(procGroupIndex * WIN_PROCESSOR_GROUP_SIZE + j); + } + } + + affinity.newApi = std::move(cpus); + } } } + // NOTE: There is no way to determine full affinity using the old API if + // individual threads set affinity on different processor groups. + DWORD_PTR proc, sys; - BOOL status = GetProcessAffinityMask(GetCurrentProcess(), &proc, &sys); + status = GetProcessAffinityMask(GetCurrentProcess(), &proc, &sys); // If proc == 0 then we can't determine affinity because it spans processor groups. + // On Windows 11 and Server 2022 it will instead + // > If, however, hHandle specifies a handle to the current process, the function + // > always uses the calling thread's primary group (which by default is the same + // > as the process' primary group) in order to set the + // > lpProcessAffinityMask and lpSystemAffinityMask. + // So it will never be indeterminate here. We can only make assumptions later. if (status == 0 || proc == 0) { - affinity.isDeterminate = false; + affinity.isOldDeterminate = false; return affinity; } // If SetProcessAffinityMask was never called the affinity // must span all processor groups, but if it was called it must only span one. - auto [status2, groupAffinity] = get_process_group_affinity(); - if (status2 == 0) + std::vector groupAffinity; // We need to capture this later and capturing + // from structured bindings requires c++20. + std::tie(status, groupAffinity) = get_process_group_affinity(); + if (status == 0) { - affinity.isDeterminate = false; + affinity.isOldDeterminate = false; return affinity; } - // If we have affinity for more than 1 group then at this point we - // can assume SetProcessAffinityMask has never been called and therefore - // according ot old API we do not have any affinity set. - // Otherwise we have to assume we have affinity set and gather the processor IDs. if (groupAffinity.size() == 1) { - std::set cpus; - - const size_t procGroupIndex = groupAffinity[0]; - - uint64_t mask = static_cast(proc); - for (size_t j = 0; j < WIN_PROCESSOR_GROUP_SIZE; ++j) + // We detect the case when affinity is set to all processors and correctly + // leave affinity.oldApi as nullopt. + if (GetActiveProcessorGroupCount() != 1 || proc != sys) { - if (mask & (KAFFINITY(1) << j)) - cpus.insert(procGroupIndex * WIN_PROCESSOR_GROUP_SIZE + j); - } + std::set cpus; - affinity.oldApi = std::move(cpus); + const size_t procGroupIndex = groupAffinity[0]; + + const uint64_t mask = static_cast(proc); + for (size_t j = 0; j < WIN_PROCESSOR_GROUP_SIZE; ++j) + { + if (mask & (KAFFINITY(1) << j)) + cpus.insert(procGroupIndex * WIN_PROCESSOR_GROUP_SIZE + j); + } + + affinity.oldApi = std::move(cpus); + } + } + else + { + // If we got here it means that either SetProcessAffinityMask was never set + // or we're on Windows 11/Server 2022. + + // Since Windows 11 and Windows Server 2022 the behaviour of GetProcessAffinityMask changed + // > If, however, hHandle specifies a handle to the current process, the function + // > always uses the calling thread's primary group (which by default is the same + // > as the process' primary group) in order to set the + // > lpProcessAffinityMask and lpSystemAffinityMask. + // In which case we can actually retrieve the full affinity. + + if (GetThreadSelectedCpuSetMasks_f != nullptr) + { + std::thread th([&]() { + std::set cpus; + bool isAffinityFull = true; + + for (auto procGroupIndex : groupAffinity) + { + const int numActiveProcessors = + GetActiveProcessorCount(static_cast(procGroupIndex)); + + // We have to schedule to 2 different processors and & the affinities we get. + // Otherwise our processor choice could influence the resulting affinity. + // We assume the processor IDs within the group are filled sequentially from 0. + uint64_t procCombined = std::numeric_limits::max(); + uint64_t sysCombined = std::numeric_limits::max(); + + for (int i = 0; i < std::min(numActiveProcessors, 2); ++i) + { + GROUP_AFFINITY GroupAffinity; + std::memset(&GroupAffinity, 0, sizeof(GROUP_AFFINITY)); + GroupAffinity.Group = static_cast(procGroupIndex); + + GroupAffinity.Mask = static_cast(1) << i; + + status = + SetThreadGroupAffinity(GetCurrentThread(), &GroupAffinity, nullptr); + if (status == 0) + { + affinity.isOldDeterminate = false; + return; + } + + SwitchToThread(); + + DWORD_PTR proc2, sys2; + status = GetProcessAffinityMask(GetCurrentProcess(), &proc2, &sys2); + if (status == 0) + { + affinity.isOldDeterminate = false; + return; + } + + procCombined &= static_cast(proc2); + sysCombined &= static_cast(sys2); + } + + if (procCombined != sysCombined) + isAffinityFull = false; + + for (size_t j = 0; j < WIN_PROCESSOR_GROUP_SIZE; ++j) + { + if (procCombined & (KAFFINITY(1) << j)) + cpus.insert(procGroupIndex * WIN_PROCESSOR_GROUP_SIZE + j); + } + } + + // We have to detect the case where the affinity was not set, or is set to all processors + // so that we correctly produce as std::nullopt result. + if (!isAffinityFull) + { + affinity.oldApi = std::move(cpus); + } + }); + + th.join(); + } } return affinity; @@ -300,6 +408,18 @@ inline std::set get_process_affinity() { #endif +#if defined(__linux__) && !defined(__ANDROID__) + +inline static const auto STARTUP_PROCESSOR_AFFINITY = get_process_affinity(); + +#elif defined(_WIN64) + +inline static const auto STARTUP_PROCESSOR_AFFINITY = get_process_affinity(); +inline static const auto STARTUP_USE_OLD_AFFINITY_API = + STARTUP_PROCESSOR_AFFINITY.likely_used_old_api(); + +#endif + // We want to abstract the purpose of storing the numa node index somewhat. // Whoever is using this does not need to know the specifics of the replication // machinery to be able to access NUMA replicated memory. @@ -326,6 +446,8 @@ class NumaReplicatedAccessToken { // It is guaranteed that NUMA nodes are NOT empty, i.e. every node exposed by NumaConfig // has at least one processor assigned. // +// We use startup affinities so as not to modify its own behaviour in time. +// // Until Stockfish doesn't support exceptions all places where an exception should be thrown // are replaced by std::exit. class NumaConfig { @@ -349,7 +471,7 @@ class NumaConfig { std::set allowedCpus; if (respectProcessAffinity) - allowedCpus = get_process_affinity(); + allowedCpus = STARTUP_PROCESSOR_AFFINITY; auto is_cpu_allowed = [respectProcessAffinity, &allowedCpus](CpuIndex c) { return !respectProcessAffinity || allowedCpus.count(c) == 1; @@ -414,7 +536,7 @@ class NumaConfig { std::optional> allowedCpus; if (respectProcessAffinity) - allowedCpus = get_process_affinity().get_combined(); + allowedCpus = STARTUP_PROCESSOR_AFFINITY.get_combined(); // The affinity can't be determined in all cases on Windows, but we at least guarantee // that the number of allowed processors is >= number of processors in the affinity mask. @@ -451,7 +573,7 @@ class NumaConfig { // still no way to set thread affinity spanning multiple processor groups. // See https://learn.microsoft.com/en-us/windows/win32/procthread/numa-support // We also do this is if need to force old API for some reason. - if (use_old_affinity_api()) + if (STARTUP_USE_OLD_AFFINITY_API) { NumaConfig splitCfg = empty(); @@ -733,7 +855,7 @@ class NumaConfig { } // Sometimes we need to force the old API, but do not use it unless necessary. - if (SetThreadSelectedCpuSetMasks_f == nullptr || use_old_affinity_api()) + if (SetThreadSelectedCpuSetMasks_f == nullptr || STARTUP_USE_OLD_AFFINITY_API) { // On earlier windows version (since windows 7) we can't run a single thread // on multiple processor groups, so we need to restrict the group. From 7d4ffa175c52a425c6ebc19737586baf93f5b6ff Mon Sep 17 00:00:00 2001 From: Dubslow Date: Mon, 3 Jun 2024 17:47:03 -0500 Subject: [PATCH 118/315] Remove delta from evaluation Passed STC: https://tests.stockfishchess.org/tests/view/6660e49c6489614cdad14e29 LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 188768 W: 48907 L: 48854 D: 91007 Ptnml(0-2): 584, 22571, 48005, 22656, 568 Passed LTC: https://tests.stockfishchess.org/tests/view/6660ff9791e372763104b38c LLR: 2.95 (-2.94,2.94) <-1.75,0.25> Total: 310680 W: 78651 L: 78727 D: 153302 Ptnml(0-2): 180, 34818, 85433, 34716, 193 closes https://github.com/official-stockfish/Stockfish/pull/5373 Bench: 1214575 --- src/evaluate.cpp | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index fdf35eb1..1317a01e 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -47,7 +47,7 @@ int Eval::simple_eval(const Position& pos, Color c) { bool Eval::use_smallnet(const Position& pos) { int simpleEval = simple_eval(pos, pos.side_to_move()); - return std::abs(simpleEval) > 992; + return std::abs(simpleEval) > 962; } // Evaluate is the evaluator for the outer world. It returns a static evaluation @@ -66,25 +66,24 @@ Value Eval::evaluate(const Eval::NNUE::Networks& networks, auto [psqt, positional] = smallNet ? networks.small.evaluate(pos, &caches.small) : networks.big.evaluate(pos, &caches.big); - constexpr int delta = 3; - Value nnue = ((128 - delta) * psqt + (128 + delta) * positional) / 128; - int nnueComplexity = std::abs(psqt - positional); + Value nnue = psqt + positional; + int nnueComplexity = std::abs(psqt - positional); // Re-evaluate the position when higher eval accuracy is worth the time spent - if (smallNet && (nnue * simpleEval < 0 || std::abs(nnue) < 250)) + if (smallNet && (nnue * simpleEval < 0 || std::abs(nnue) < 227)) { std::tie(psqt, positional) = networks.big.evaluate(pos, &caches.big); - nnue = ((128 - delta) * psqt + (128 + delta) * positional) / 128; + nnue = psqt + positional; nnueComplexity = std::abs(psqt - positional); smallNet = false; } // Blend optimism and eval with nnue complexity - optimism += optimism * nnueComplexity / 470; - nnue -= nnue * nnueComplexity / 20000; + optimism += optimism * nnueComplexity / 457; + nnue -= nnue * nnueComplexity / 19157; - int material = 600 * pos.count() + pos.non_pawn_material(); - v = (nnue * (68600 + material) + optimism * (8800 + material)) / 73344; + int material = 554 * pos.count() + pos.non_pawn_material(); + v = (nnue * (73921 + material) + optimism * (8112 + material)) / 73260; // Damp down the evaluation linearly when shuffling v -= v * pos.rule50_count() / 212; From 1c67b46caf91a0e6277967ea9a7e4b2f6afbc971 Mon Sep 17 00:00:00 2001 From: Dubslow Date: Thu, 6 Jun 2024 13:10:30 -0500 Subject: [PATCH 119/315] Linearize corrHist Passed STC: https://tests.stockfishchess.org/tests/view/6661fff88dd8f31ed3c5d819 LLR: 2.93 (-2.94,2.94) <-1.75,0.25> Total: 17504 W: 4651 L: 4406 D: 8447 Ptnml(0-2): 71, 1975, 4384, 2282, 40 Passed LTC: https://tests.stockfishchess.org/tests/view/666205b48dd8f31ed3c61296 LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 24522 W: 6313 L: 6094 D: 12115 Ptnml(0-2): 14, 2643, 6726, 2866, 12 closes https://github.com/official-stockfish/Stockfish/pull/5374 Bench: 1237729 --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index 7417a4b6..06adc92a 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -74,7 +74,7 @@ constexpr int futility_move_count(bool improving, Depth depth) { // Add correctionHistory value to raw staticEval and guarantee evaluation does not hit the tablebase range Value to_corrected_static_eval(Value v, const Worker& w, const Position& pos) { auto cv = w.correctionHistory[pos.side_to_move()][pawn_structure_index(pos)]; - v += cv * std::abs(cv) / 4990; + v += cv / 10; return std::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1); } From 4151c06b744a3145617200ca8f76285aae193dc2 Mon Sep 17 00:00:00 2001 From: evqsx <149484438+evqsx@users.noreply.github.com> Date: Thu, 6 Jun 2024 15:43:55 +0800 Subject: [PATCH 120/315] Remove the correction history bonus in null move search Passed STC: https://tests.stockfishchess.org/tests/view/666168e191e372763104c664 LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 94848 W: 24708 L: 24550 D: 45590 Ptnml(0-2): 289, 11355, 24033, 11403, 344 Passed LTC: https://tests.stockfishchess.org/tests/view/6661e73591e372763104c751 LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 70452 W: 17849 L: 17679 D: 34924 Ptnml(0-2): 27, 7707, 19596, 7861, 35 closes https://github.com/official-stockfish/Stockfish/pull/5375 Bench: 1174094 --- AUTHORS | 1 + src/search.cpp | 9 --------- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/AUTHORS b/AUTHORS index a232e115..6eefb56d 100644 --- a/AUTHORS +++ b/AUTHORS @@ -72,6 +72,7 @@ Ehsan Rashid (erashid) Elvin Liu (solarlight2) erbsenzaehler Ernesto Gatti +evqsx Fabian Beuke (madnight) Fabian Fichter (ianfab) Fanael Linithien (Fanael) diff --git a/src/search.cpp b/src/search.cpp index 06adc92a..8ae12e68 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -805,16 +805,7 @@ Value Search::Worker::search( if (nullValue >= beta && nullValue < VALUE_TB_WIN_IN_MAX_PLY) { if (thisThread->nmpMinPly || depth < 16) - { - if (nullValue >= ss->staticEval) - { - auto bonus = std::min(int(nullValue - ss->staticEval) * depth / 32, - CORRECTION_HISTORY_LIMIT / 16); - thisThread->correctionHistory[us][pawn_structure_index(pos)] - << bonus; - } return nullValue; - } assert(!thisThread->nmpMinPly); // Recursive verification is not allowed From e271059e08c6258420af12897367ea2149220171 Mon Sep 17 00:00:00 2001 From: cj5716 <125858804+cj5716@users.noreply.github.com> Date: Fri, 7 Jun 2024 18:30:33 +0800 Subject: [PATCH 121/315] Make repeated bench runs identical fixes https://github.com/official-stockfish/Stockfish/issues/5376 closes https://github.com/official-stockfish/Stockfish/pull/5377 No functional changes --- src/tt.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/tt.cpp b/src/tt.cpp index 56779b86..5a44759e 100644 --- a/src/tt.cpp +++ b/src/tt.cpp @@ -94,6 +94,7 @@ void TranspositionTable::resize(size_t mbSize, ThreadPool& threads) { // Initializes the entire transposition table to zero, // in a multi-threaded way. void TranspositionTable::clear(ThreadPool& threads) { + generation8 = 0; const size_t threadCount = threads.num_threads(); for (size_t i = 0; i < threadCount; ++i) From 7e890fd048e22bfd213d46ec8eb88f7931f0315d Mon Sep 17 00:00:00 2001 From: Joost VandeVondele Date: Fri, 7 Jun 2024 23:53:33 +0200 Subject: [PATCH 122/315] Keep mate PVs intact. do not return a cutoff value in razoring if that value is in the mate/tb range. passed STC: https://tests.stockfishchess.org/tests/view/666381880ff7cb4868d1fe58 LLR: 2.93 (-2.94,2.94) <-1.75,0.25> Total: 130848 W: 34046 L: 33931 D: 62871 Ptnml(0-2): 429, 14968, 34524, 15065, 438 passed LTC: https://tests.stockfishchess.org/tests/view/66643f120612cd151f9e7788 LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 39702 W: 10157 L: 9959 D: 19586 Ptnml(0-2): 20, 4108, 11402, 4296, 25 closes https://github.com/official-stockfish/Stockfish/pull/5379 Bench: 1174094 --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index 8ae12e68..3dbdfd47 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -768,7 +768,7 @@ Value Search::Worker::search( if (eval < alpha - 512 - 293 * depth * depth) { value = qsearch(pos, ss, alpha - 1, alpha); - if (value < alpha) + if (value < alpha && std::abs(value) < VALUE_TB_WIN_IN_MAX_PLY) return value; } From c8213ba0d047569141ed58f5eb86579d976b5614 Mon Sep 17 00:00:00 2001 From: Dubslow Date: Mon, 10 Jun 2024 18:03:36 -0500 Subject: [PATCH 123/315] Simplify TT interface and avoid changing TT info This commit builds on the work and ideas of #5345, #5348, and #5364. Place as much as possible of the TT implementation in tt.cpp, rather than in the header. Some commentary is added to better document the public interface. Fix the search read-TT races, or at least contain them to within TT methods only. Passed SMP STC: https://tests.stockfishchess.org/tests/view/666134ab91e372763104b443 LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 512552 W: 132387 L: 132676 D: 247489 Ptnml(0-2): 469, 58429, 138771, 58136, 471 The unmerged version has bench identical to the other PR (see also #5348) and therefore those same-functionality tests: SMP LTC: https://tests.stockfishchess.org/tests/view/665c7021fd45fb0f907c214a SMP LTC: https://tests.stockfishchess.org/tests/view/665d28a7fd45fb0f907c5495 closes https://github.com/official-stockfish/Stockfish/pull/5369 bench 1205675 --- src/search.cpp | 199 +++++++++++++++++++++--------------------- src/tt.cpp | 148 +++++++++++++++++++++++++------ src/tt.h | 119 ++++++++++--------------- tests/instrumented.sh | 7 +- 4 files changed, 265 insertions(+), 208 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 3dbdfd47..9c3f915d 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -546,16 +546,15 @@ Value Search::Worker::search( StateInfo st; ASSERT_ALIGNED(&st, Eval::NNUE::CacheLineSize); - TTEntry* tte; - Key posKey; - Move ttMove, move, excludedMove, bestMove; - Depth extension, newDepth; - Value bestValue, value, ttValue, eval, maxValue, probCutBeta, singularValue; - bool givesCheck, improving, priorCapture, opponentWorsening; - bool capture, moveCountPruning, ttCapture; - Piece movedPiece; - int moveCount, captureCount, quietCount; - Bound singularBound; + Key posKey; + Move move, excludedMove, bestMove; + Depth extension, newDepth; + Value bestValue, value, eval, maxValue, probCutBeta, singularValue; + bool givesCheck, improving, priorCapture, opponentWorsening; + bool capture, moveCountPruning, ttCapture; + Piece movedPiece; + int moveCount, captureCount, quietCount; + Bound singularBound; // Step 1. Initialize node Worker* thisThread = this; @@ -605,31 +604,32 @@ Value Search::Worker::search( ss->statScore = 0; // Step 4. Transposition table lookup. - excludedMove = ss->excludedMove; - posKey = pos.key(); - tte = tt.probe(posKey, ss->ttHit); - ttValue = ss->ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE; - ttMove = rootNode ? thisThread->rootMoves[thisThread->pvIdx].pv[0] - : ss->ttHit ? tte->move() - : Move::none(); - ttCapture = ttMove && pos.capture_stage(ttMove); + excludedMove = ss->excludedMove; + posKey = pos.key(); + auto [ttHit, ttData, ttWriter] = tt.probe(posKey); + // Need further processing of the saved data + ss->ttHit = ttHit; + ttData.move = rootNode ? thisThread->rootMoves[thisThread->pvIdx].pv[0] + : ttHit ? ttData.move + : Move::none(); + ttData.value = ttHit ? value_from_tt(ttData.value, ss->ply, pos.rule50_count()) : VALUE_NONE; + ss->ttPv = excludedMove ? ss->ttPv : PvNode || (ttHit && ttData.is_pv); + ttCapture = ttData.move && pos.capture_stage(ttData.move); // At this point, if excluded, skip straight to step 6, static eval. However, // to save indentation, we list the condition in all code between here and there. - if (!excludedMove) - ss->ttPv = PvNode || (ss->ttHit && tte->is_pv()); // At non-PV nodes we check for an early TT cutoff - if (!PvNode && !excludedMove && tte->depth() > depth - (ttValue <= beta) - && ttValue != VALUE_NONE // Possible in case of TT access race or if !ttHit - && (tte->bound() & (ttValue >= beta ? BOUND_LOWER : BOUND_UPPER))) + if (!PvNode && !excludedMove && ttData.depth > depth - (ttData.value <= beta) + && ttData.value != VALUE_NONE // Can happen when !ttHit or when access race in probe() + && (ttData.bound & (ttData.value >= beta ? BOUND_LOWER : BOUND_UPPER))) { // If ttMove is quiet, update move sorting heuristics on TT hit (~2 Elo) - if (ttMove && ttValue >= beta) + if (ttData.move && ttData.value >= beta) { // Bonus for a quiet ttMove that fails high (~2 Elo) if (!ttCapture) - update_quiet_stats(pos, ss, *this, ttMove, stat_bonus(depth)); + update_quiet_stats(pos, ss, *this, ttData.move, stat_bonus(depth)); // Extra penalty for early quiet moves of // the previous ply (~1 Elo on STC, ~2 Elo on LTC) @@ -641,7 +641,7 @@ Value Search::Worker::search( // Partial workaround for the graph history interaction problem // For high rule50 counts don't produce transposition table cutoffs. if (pos.rule50_count() < 90) - return ttValue; + return ttData.value; } // Step 5. Tablebases probe @@ -679,9 +679,9 @@ Value Search::Worker::search( if (b == BOUND_EXACT || (b == BOUND_LOWER ? value >= beta : value <= alpha)) { - tte->save(posKey, value_to_tt(value, ss->ply), ss->ttPv, b, - std::min(MAX_PLY - 1, depth + 6), Move::none(), VALUE_NONE, - tt.generation()); + ttWriter.write(posKey, value_to_tt(value, ss->ply), ss->ttPv, b, + std::min(MAX_PLY - 1, depth + 6), Move::none(), VALUE_NONE, + tt.generation()); return value; } @@ -716,7 +716,7 @@ Value Search::Worker::search( else if (ss->ttHit) { // Never assume anything about values stored in TT - unadjustedStaticEval = tte->eval(); + unadjustedStaticEval = ttData.eval; if (unadjustedStaticEval == VALUE_NONE) unadjustedStaticEval = evaluate(networks[numaAccessToken], pos, refreshTable, thisThread->optimism[us]); @@ -726,8 +726,9 @@ Value Search::Worker::search( ss->staticEval = eval = to_corrected_static_eval(unadjustedStaticEval, *thisThread, pos); // ttValue can be used as a better position evaluation (~7 Elo) - if (ttValue != VALUE_NONE && (tte->bound() & (ttValue > eval ? BOUND_LOWER : BOUND_UPPER))) - eval = ttValue; + if (ttData.value != VALUE_NONE + && (ttData.bound & (ttData.value > eval ? BOUND_LOWER : BOUND_UPPER))) + eval = ttData.value; } else { @@ -736,8 +737,8 @@ Value Search::Worker::search( ss->staticEval = eval = to_corrected_static_eval(unadjustedStaticEval, *thisThread, pos); // Static evaluation is saved as it was before adjustment by correction history - tte->save(posKey, VALUE_NONE, ss->ttPv, BOUND_NONE, DEPTH_UNSEARCHED, Move::none(), - unadjustedStaticEval, tt.generation()); + ttWriter.write(posKey, VALUE_NONE, ss->ttPv, BOUND_NONE, DEPTH_UNSEARCHED, Move::none(), + unadjustedStaticEval, tt.generation()); } // Use static evaluation difference to improve quiet move ordering (~9 Elo) @@ -778,7 +779,7 @@ Value Search::Worker::search( && eval - futility_margin(depth, cutNode && !ss->ttHit, improving, opponentWorsening) - (ss - 1)->statScore / 263 >= beta - && eval >= beta && eval < VALUE_TB_WIN_IN_MAX_PLY && (!ttMove || ttCapture)) + && eval >= beta && eval < VALUE_TB_WIN_IN_MAX_PLY && (!ttData.move || ttCapture)) return beta > VALUE_TB_LOSS_IN_MAX_PLY ? beta + (eval - beta) / 3 : eval; // Step 9. Null move search with verification search (~35 Elo) @@ -824,7 +825,7 @@ Value Search::Worker::search( // Step 10. Internal iterative reductions (~9 Elo) // For PV nodes without a ttMove, we decrease depth by 3. - if (PvNode && !ttMove) + if (PvNode && !ttData.move) depth -= 3; // Use qsearch if depth <= 0. @@ -833,8 +834,8 @@ Value Search::Worker::search( // For cutNodes, if depth is high enough, decrease depth by 2 if there is no ttMove, or // by 1 if there is a ttMove with an upper bound. - if (cutNode && depth >= 8 && (!ttMove || tte->bound() == BOUND_UPPER)) - depth -= 1 + !ttMove; + if (cutNode && depth >= 8 && (!ttData.move || ttData.bound == BOUND_UPPER)) + depth -= 1 + !ttData.move; // Step 11. ProbCut (~10 Elo) // If we have a good enough capture (or queen promotion) and a reduced search returns a value @@ -847,11 +848,11 @@ Value Search::Worker::search( // there and in further interactions with transposition table cutoff depth is set to depth - 3 // because probCut search has depth set to depth - 4 but we also do a move before it // So effective depth is equal to depth - 3 - && !(tte->depth() >= depth - 3 && ttValue != VALUE_NONE && ttValue < probCutBeta)) + && !(ttData.depth >= depth - 3 && ttData.value != VALUE_NONE && ttData.value < probCutBeta)) { assert(probCutBeta < VALUE_INFINITE && probCutBeta > beta); - MovePicker mp(pos, ttMove, probCutBeta - ss->staticEval, &thisThread->captureHistory); + MovePicker mp(pos, ttData.move, probCutBeta - ss->staticEval, &thisThread->captureHistory); while ((move = mp.next_move()) != Move::none()) if (move != excludedMove && pos.legal(move)) @@ -882,8 +883,8 @@ Value Search::Worker::search( if (value >= probCutBeta) { // Save ProbCut data into transposition table - tte->save(posKey, value_to_tt(value, ss->ply), ss->ttPv, BOUND_LOWER, depth - 3, - move, unadjustedStaticEval, tt.generation()); + ttWriter.write(posKey, value_to_tt(value, ss->ply), ss->ttPv, BOUND_LOWER, + depth - 3, move, unadjustedStaticEval, tt.generation()); return std::abs(value) < VALUE_TB_WIN_IN_MAX_PLY ? value - (probCutBeta - beta) : value; } @@ -896,9 +897,10 @@ moves_loop: // When in check, search starts here // Step 12. A small Probcut idea, when we are in check (~4 Elo) probCutBeta = beta + 388; - if (ss->inCheck && !PvNode && ttCapture && (tte->bound() & BOUND_LOWER) - && tte->depth() >= depth - 4 && ttValue >= probCutBeta - && std::abs(ttValue) < VALUE_TB_WIN_IN_MAX_PLY && std::abs(beta) < VALUE_TB_WIN_IN_MAX_PLY) + if (ss->inCheck && !PvNode && ttCapture && (ttData.bound & BOUND_LOWER) + && ttData.depth >= depth - 4 && ttData.value >= probCutBeta + && std::abs(ttData.value) < VALUE_TB_WIN_IN_MAX_PLY + && std::abs(beta) < VALUE_TB_WIN_IN_MAX_PLY) return probCutBeta; const PieceToHistory* contHist[] = {(ss - 1)->continuationHistory, @@ -911,7 +913,7 @@ moves_loop: // When in check, search starts here Move countermove = prevSq != SQ_NONE ? thisThread->counterMoves[pos.piece_on(prevSq)][prevSq] : Move::none(); - MovePicker mp(pos, ttMove, depth, &thisThread->mainHistory, &thisThread->captureHistory, + MovePicker mp(pos, ttData.move, depth, &thisThread->mainHistory, &thisThread->captureHistory, contHist, &thisThread->pawnHistory, countermove, ss->killers); value = bestValue; @@ -1046,12 +1048,12 @@ moves_loop: // When in check, search starts here // Generally, higher singularBeta (i.e closer to ttValue) and lower extension // margins scale well. - if (!rootNode && move == ttMove && !excludedMove + if (!rootNode && move == ttData.move && !excludedMove && depth >= 4 - (thisThread->completedDepth > 35) + ss->ttPv - && std::abs(ttValue) < VALUE_TB_WIN_IN_MAX_PLY && (tte->bound() & BOUND_LOWER) - && tte->depth() >= depth - 3) + && std::abs(ttData.value) < VALUE_TB_WIN_IN_MAX_PLY && (ttData.bound & BOUND_LOWER) + && ttData.depth >= depth - 3) { - Value singularBeta = ttValue - (52 + 80 * (ss->ttPv && !PvNode)) * depth / 64; + Value singularBeta = ttData.value - (52 + 80 * (ss->ttPv && !PvNode)) * depth / 64; Depth singularDepth = newDepth / 2; ss->excludedMove = move; @@ -1086,7 +1088,7 @@ moves_loop: // When in check, search starts here // so we reduce the ttMove in favor of other moves based on some conditions: // If the ttMove is assumed to fail high over current beta (~7 Elo) - else if (ttValue >= beta) + else if (ttData.value >= beta) extension = -3; // If we are on a cutNode but the ttMove is not assumed to fail high over current beta (~1 Elo) @@ -1126,7 +1128,7 @@ moves_loop: // When in check, search starts here // Decrease reduction if position is or has been on the PV (~7 Elo) if (ss->ttPv) - r -= 1 + (ttValue > alpha) + (tte->depth() >= depth); + r -= 1 + (ttData.value > alpha) + (ttData.depth >= depth); // Decrease reduction for PvNodes (~0 Elo on STC, ~2 Elo on LTC) if (PvNode) @@ -1136,8 +1138,8 @@ moves_loop: // When in check, search starts here // Increase reduction for cut nodes (~4 Elo) if (cutNode) - r += 2 - (tte->depth() >= depth && ss->ttPv) - + (!ss->ttPv && move != ttMove && move != ss->killers[0]); + r += 2 - (ttData.depth >= depth && ss->ttPv) + + (!ss->ttPv && move != ttData.move && move != ss->killers[0]); // Increase reduction if ttMove is a capture (~3 Elo) if (ttCapture) @@ -1149,7 +1151,7 @@ moves_loop: // When in check, search starts here // For first picked move (ttMove) reduce reduction // but never allow it to go below 0 (~3 Elo) - else if (move == ttMove) + else if (move == ttData.move) r = std::max(0, r - 2); ss->statScore = 2 * thisThread->mainHistory[us][move.from_to()] @@ -1197,7 +1199,7 @@ moves_loop: // When in check, search starts here else if (!PvNode || moveCount > 1) { // Increase reduction if ttMove is not present (~6 Elo) - if (!ttMove) + if (!ttData.move) r += 2; // Note that if expected reduction is high, we reduce search depth by 1 here (~9 Elo) @@ -1287,7 +1289,7 @@ moves_loop: // When in check, search starts here if (value >= beta) { - ss->cutoffCnt += 1 + !ttMove - (extension >= 2); + ss->cutoffCnt += 1 + !ttData.move - (extension >= 2); assert(value >= beta); // Fail high break; } @@ -1363,11 +1365,11 @@ moves_loop: // When in check, search starts here // Write gathered information in transposition table // Static evaluation is saved as it was before correction history if (!excludedMove && !(rootNode && thisThread->pvIdx)) - tte->save(posKey, value_to_tt(bestValue, ss->ply), ss->ttPv, - bestValue >= beta ? BOUND_LOWER - : PvNode && bestMove ? BOUND_EXACT - : BOUND_UPPER, - depth, bestMove, unadjustedStaticEval, tt.generation()); + ttWriter.write(posKey, value_to_tt(bestValue, ss->ply), ss->ttPv, + bestValue >= beta ? BOUND_LOWER + : PvNode && bestMove ? BOUND_EXACT + : BOUND_UPPER, + depth, bestMove, unadjustedStaticEval, tt.generation()); // Adjust correction history if (!ss->inCheck && (!bestMove || !pos.capture(bestMove)) @@ -1414,14 +1416,12 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta, StateInfo st; ASSERT_ALIGNED(&st, Eval::NNUE::CacheLineSize); - TTEntry* tte; - Key posKey; - Move ttMove, move, bestMove; - Depth ttDepth; - Value bestValue, value, ttValue, futilityBase; - bool pvHit, givesCheck, capture; - int moveCount; - Color us = pos.side_to_move(); + Key posKey; + Move move, bestMove; + Value bestValue, value, futilityBase; + bool pvHit, givesCheck, capture; + int moveCount; + Color us = pos.side_to_move(); // Step 1. Initialize node if (PvNode) @@ -1447,23 +1447,25 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta, assert(0 <= ss->ply && ss->ply < MAX_PLY); - // Note that unlike regular search, which stores literal depth, in QS we only store the - // current movegen stage. If in check, we search all evasions and thus store - // DEPTH_QS_CHECKS. (Evasions may be quiet, and _CHECKS includes quiets.) - ttDepth = ss->inCheck || depth >= DEPTH_QS_CHECKS ? DEPTH_QS_CHECKS : DEPTH_QS_NORMAL; + // Note that unlike regular search, which stores the literal depth into the TT, from QS we + // only store the current movegen stage as "depth". If in check, we search all evasions and + // thus store DEPTH_QS_CHECKS. (Evasions may be quiet, and _CHECKS includes quiets.) + Depth qsTtDepth = ss->inCheck || depth >= DEPTH_QS_CHECKS ? DEPTH_QS_CHECKS : DEPTH_QS_NORMAL; // Step 3. Transposition table lookup - posKey = pos.key(); - tte = tt.probe(posKey, ss->ttHit); - ttValue = ss->ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE; - ttMove = ss->ttHit ? tte->move() : Move::none(); - pvHit = ss->ttHit && tte->is_pv(); + posKey = pos.key(); + auto [ttHit, ttData, ttWriter] = tt.probe(posKey); + // Need further processing of the saved data + ss->ttHit = ttHit; + ttData.move = ttHit ? ttData.move : Move::none(); + ttData.value = ttHit ? value_from_tt(ttData.value, ss->ply, pos.rule50_count()) : VALUE_NONE; + pvHit = ttHit && ttData.is_pv; // At non-PV nodes we check for an early TT cutoff - if (!PvNode && tte->depth() >= ttDepth - && ttValue != VALUE_NONE // Only in case of TT access race or if !ttHit - && (tte->bound() & (ttValue >= beta ? BOUND_LOWER : BOUND_UPPER))) - return ttValue; + if (!PvNode && ttData.depth >= qsTtDepth + && ttData.value != VALUE_NONE // Can happen when !ttHit or when access race in probe() + && (ttData.bound & (ttData.value >= beta ? BOUND_LOWER : BOUND_UPPER))) + return ttData.value; // Step 4. Static evaluation of the position Value unadjustedStaticEval = VALUE_NONE; @@ -1474,7 +1476,7 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta, if (ss->ttHit) { // Never assume anything about values stored in TT - unadjustedStaticEval = tte->eval(); + unadjustedStaticEval = ttData.eval; if (unadjustedStaticEval == VALUE_NONE) unadjustedStaticEval = evaluate(networks[numaAccessToken], pos, refreshTable, thisThread->optimism[us]); @@ -1482,9 +1484,9 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta, to_corrected_static_eval(unadjustedStaticEval, *thisThread, pos); // ttValue can be used as a better position evaluation (~13 Elo) - if (std::abs(ttValue) < VALUE_TB_WIN_IN_MAX_PLY - && (tte->bound() & (ttValue > bestValue ? BOUND_LOWER : BOUND_UPPER))) - bestValue = ttValue; + if (std::abs(ttData.value) < VALUE_TB_WIN_IN_MAX_PLY + && (ttData.bound & (ttData.value > bestValue ? BOUND_LOWER : BOUND_UPPER))) + bestValue = ttData.value; } else { @@ -1503,9 +1505,9 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta, if (std::abs(bestValue) < VALUE_TB_WIN_IN_MAX_PLY && !PvNode) bestValue = (3 * bestValue + beta) / 4; if (!ss->ttHit) - tte->save(posKey, value_to_tt(bestValue, ss->ply), false, BOUND_LOWER, - DEPTH_UNSEARCHED, Move::none(), unadjustedStaticEval, tt.generation()); - + ttWriter.write(posKey, value_to_tt(bestValue, ss->ply), false, BOUND_LOWER, + DEPTH_UNSEARCHED, Move::none(), unadjustedStaticEval, + tt.generation()); return bestValue; } @@ -1524,7 +1526,7 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta, // (Presently, having the checks stage is worth only 1 Elo, and may be removable in the near future, // which would result in only a single stage of QS movegen.) Square prevSq = ((ss - 1)->currentMove).is_ok() ? ((ss - 1)->currentMove).to_sq() : SQ_NONE; - MovePicker mp(pos, ttMove, depth, &thisThread->mainHistory, &thisThread->captureHistory, + MovePicker mp(pos, ttData.move, depth, &thisThread->mainHistory, &thisThread->captureHistory, contHist, &thisThread->pawnHistory); // Step 5. Loop through all pseudo-legal moves until no moves remain or a beta cutoff occurs. @@ -1643,9 +1645,9 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta, // Save gathered info in transposition table // Static evaluation is saved as it was before adjustment by correction history - tte->save(posKey, value_to_tt(bestValue, ss->ply), pvHit, - bestValue >= beta ? BOUND_LOWER : BOUND_UPPER, ttDepth, bestMove, - unadjustedStaticEval, tt.generation()); + ttWriter.write(posKey, value_to_tt(bestValue, ss->ply), pvHit, + bestValue >= beta ? BOUND_LOWER : BOUND_UPPER, qsTtDepth, bestMove, + unadjustedStaticEval, tt.generation()); assert(bestValue > -VALUE_INFINITE && bestValue < VALUE_INFINITE); @@ -1986,20 +1988,17 @@ bool RootMove::extract_ponder_from_tt(const TranspositionTable& tt, Position& po StateInfo st; ASSERT_ALIGNED(&st, Eval::NNUE::CacheLineSize); - bool ttHit; - assert(pv.size() == 1); if (pv[0] == Move::none()) return false; pos.do_move(pv[0], st); - TTEntry* tte = tt.probe(pos.key(), ttHit); + auto [ttHit, ttData, ttWriter] = tt.probe(pos.key()); if (ttHit) { - Move m = tte->move(); // Local copy to be SMP safe - if (MoveList(pos).contains(m)) - pv.push_back(m); + if (MoveList(pos).contains(ttData.move)) + pv.push_back(ttData.move); } pos.undo_move(pv[0]); diff --git a/src/tt.cpp b/src/tt.cpp index 5a44759e..763e2c9b 100644 --- a/src/tt.cpp +++ b/src/tt.cpp @@ -25,11 +25,63 @@ #include #include "memory.h" +#include "misc.h" #include "syzygy/tbprobe.h" #include "thread.h" namespace Stockfish { + +// TTEntry struct is the 10 bytes transposition table entry, defined as below: +// +// key 16 bit +// depth 8 bit +// generation 5 bit +// pv node 1 bit +// bound type 2 bit +// move 16 bit +// value 16 bit +// evaluation 16 bit +// +// These fields are in the same order as accessed by TT::probe(), since memory is fastest sequentially. +// Equally, the store order in save() matches this order. + +struct TTEntry { + + // Convert internal bitfields to external types + TTData read() const { + return TTData{Move(move16), Value(value16), + Value(eval16), Depth(depth8 + DEPTH_ENTRY_OFFSET), + Bound(genBound8 & 0x3), bool(genBound8 & 0x4)}; + } + + void save(Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev, uint8_t generation8); + // The returned age is a multiple of TranspositionTable::GENERATION_DELTA + uint8_t relative_age(const uint8_t generation8) const; + + private: + friend class TranspositionTable; + + uint16_t key16; + uint8_t depth8; + uint8_t genBound8; + Move move16; + int16_t value16; + int16_t eval16; +}; + +// `genBound8` is where most of the details are. We use the following constants to manipulate 5 leading generation bits +// and 3 trailing miscellaneous bits. + +// These bits are reserved for other things. +static constexpr unsigned GENERATION_BITS = 3; +// increment for generation field +static constexpr int GENERATION_DELTA = (1 << GENERATION_BITS); +// cycle length +static constexpr int GENERATION_CYCLE = 255 + GENERATION_DELTA; +// mask to pull out generation number +static constexpr int GENERATION_MASK = (0xFF << GENERATION_BITS) & 0xFF; + // DEPTH_ENTRY_OFFSET exists because 1) we use `bool(depth8)` as the occupancy check, but // 2) we need to store negative depths for QS. (`depth8` is the only field with "spare bits": // we sacrifice the ability to store depths greater than 1<<8 less the offset, as asserted below.) @@ -65,12 +117,34 @@ uint8_t TTEntry::relative_age(const uint8_t generation8) const { // is needed to keep the unrelated lowest n bits from affecting // the result) to calculate the entry age correctly even after // generation8 overflows into the next cycle. - - return (TranspositionTable::GENERATION_CYCLE + generation8 - genBound8) - & TranspositionTable::GENERATION_MASK; + return (GENERATION_CYCLE + generation8 - genBound8) & GENERATION_MASK; } +// TTWriter is but a very thin wrapper around the pointer +TTWriter::TTWriter(TTEntry* tte) : + entry(tte) {} + +void TTWriter::write( + Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev, uint8_t generation8) { + entry->save(k, v, pv, b, d, m, ev, generation8); +} + + +// A TranspositionTable is an array of Cluster, of size clusterCount. Each cluster consists of ClusterSize number +// of TTEntry. Each non-empty TTEntry contains information on exactly one position. The size of a Cluster should +// divide the size of a cache line for best performance, as the cacheline is prefetched when possible. + +static constexpr int ClusterSize = 3; + +struct Cluster { + TTEntry entry[ClusterSize]; + char padding[2]; // Pad to 32 bytes +}; + +static_assert(sizeof(Cluster) == 32, "Suboptimal Cluster size"); + + // Sets the size of the transposition table, // measured in megabytes. Transposition table consists // of clusters and each cluster consists of ClusterSize number of TTEntry. @@ -114,32 +188,6 @@ void TranspositionTable::clear(ThreadPool& threads) { } -// Looks up the current position in the transposition -// table. It returns true and a pointer to the TTEntry if the position is found. -// Otherwise, it returns false and a pointer to an empty or least valuable TTEntry -// to be replaced later. The replace value of an entry is calculated as its depth -// minus 8 times its relative age. TTEntry t1 is considered more valuable than -// TTEntry t2 if its replace value is greater than that of t2. -TTEntry* TranspositionTable::probe(const Key key, bool& found) const { - - TTEntry* const tte = first_entry(key); - const uint16_t key16 = uint16_t(key); // Use the low 16 bits as key inside the cluster - - for (int i = 0; i < ClusterSize; ++i) - if (tte[i].key16 == key16) - return found = bool(tte[i].depth8), &tte[i]; - - // Find an entry to be replaced according to the replacement strategy - TTEntry* replace = tte; - for (int i = 1; i < ClusterSize; ++i) - if (replace->depth8 - replace->relative_age(generation8) * 2 - > tte[i].depth8 - tte[i].relative_age(generation8) * 2) - replace = &tte[i]; - - return found = false, replace; -} - - // Returns an approximation of the hashtable // occupation during a search. The hash is x permill full, as per UCI protocol. // Only counts entries which match the current generation. @@ -154,4 +202,46 @@ int TranspositionTable::hashfull() const { return cnt / ClusterSize; } + +void TranspositionTable::new_search() { + // increment by delta to keep lower bits as is + generation8 += GENERATION_DELTA; +} + + +uint8_t TranspositionTable::generation() const { return generation8; } + + +// Looks up the current position in the transposition +// table. It returns true if the position is found. +// Otherwise, it returns false and a pointer to an empty or least valuable TTEntry +// to be replaced later. The replace value of an entry is calculated as its depth +// minus 8 times its relative age. TTEntry t1 is considered more valuable than +// TTEntry t2 if its replace value is greater than that of t2. +std::tuple TranspositionTable::probe(const Key key) const { + + TTEntry* const tte = first_entry(key); + const uint16_t key16 = uint16_t(key); // Use the low 16 bits as key inside the cluster + + for (int i = 0; i < ClusterSize; ++i) + if (tte[i].key16 == key16) + // This gap is the main place for read races. + // After `read()` completes that copy is final, but may be self-inconsistent. + return {bool(tte[i].depth8), tte[i].read(), TTWriter(&tte[i])}; + + // Find an entry to be replaced according to the replacement strategy + TTEntry* replace = tte; + for (int i = 1; i < ClusterSize; ++i) + if (replace->depth8 - replace->relative_age(generation8) * 2 + > tte[i].depth8 - tte[i].relative_age(generation8) * 2) + replace = &tte[i]; + + return {false, replace->read(), TTWriter(replace)}; +} + + +TTEntry* TranspositionTable::first_entry(const Key key) const { + return &table[mul_hi64(key, clusterCount)].entry[0]; +} + } // namespace Stockfish diff --git a/src/tt.h b/src/tt.h index b2e8f582..1bece002 100644 --- a/src/tt.h +++ b/src/tt.h @@ -21,103 +21,76 @@ #include #include +#include #include "memory.h" -#include "misc.h" #include "types.h" namespace Stockfish { -// TTEntry struct is the 10 bytes transposition table entry, defined as below: -// -// key 16 bit -// depth 8 bit -// generation 5 bit -// pv node 1 bit -// bound type 2 bit -// move 16 bit -// value 16 bit -// eval value 16 bit -// -// These fields are in the same order as accessed by TT::probe(), since memory is fastest sequentially. -// Equally, the store order in save() matches this order. -struct TTEntry { +class ThreadPool; +struct TTEntry; +struct Cluster; - Move move() const { return Move(move16); } - Value value() const { return Value(value16); } - Value eval() const { return Value(eval16); } - Depth depth() const { return Depth(depth8 + DEPTH_ENTRY_OFFSET); } - bool is_pv() const { return bool(genBound8 & 0x4); } - Bound bound() const { return Bound(genBound8 & 0x3); } - void save(Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev, uint8_t generation8); - // The returned age is a multiple of TranspositionTable::GENERATION_DELTA - uint8_t relative_age(const uint8_t generation8) const; +// There is only one global hash table for the engine and all its threads. For chess in particular, we even allow racy +// updates between threads to and from the TT, as taking the time to synchronize access would cost thinking time and +// thus elo. As a hash table, collisions are possible and may cause chess playing issues (bizarre blunders, faulty mate +// reports, etc). Fixing these also loses elo; however such risk decreases quickly with larger TT size. +// +// `probe` is the primary method: given a board position, we lookup its entry in the table, and return a tuple of: +// 1) whether the entry already has this position +// 2) a copy of the prior data (if any) (may be inconsistent due to read races) +// 3) a writer object to this entry +// The copied data and the writer are separated to maintain clear boundaries between local vs global objects. + + +// A copy of the data already in the entry (possibly collided). `probe` may be racy, resulting in inconsistent data. +struct TTData { + Move move; + Value value, eval; + Depth depth; + Bound bound; + bool is_pv; +}; + + +// This is used to make racy writes to the global TT. +struct TTWriter { + public: + void write(Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev, uint8_t generation8); private: friend class TranspositionTable; - - uint16_t key16; - uint8_t depth8; - uint8_t genBound8; - Move move16; - int16_t value16; - int16_t eval16; + TTEntry* entry; + TTWriter(TTEntry* tte); }; -class ThreadPool; -// A TranspositionTable is an array of Cluster, of size clusterCount. Each -// cluster consists of ClusterSize number of TTEntry. Each non-empty TTEntry -// contains information on exactly one position. The size of a Cluster should -// divide the size of a cache line for best performance, as the cacheline is -// prefetched when possible. class TranspositionTable { - static constexpr int ClusterSize = 3; - - struct Cluster { - TTEntry entry[ClusterSize]; - char padding[2]; // Pad to 32 bytes - }; - - static_assert(sizeof(Cluster) == 32, "Unexpected Cluster size"); - - // Constants used to refresh the hash table periodically - - // We have 8 bits available where the lowest 3 bits are - // reserved for other things. - static constexpr unsigned GENERATION_BITS = 3; - // increment for generation field - static constexpr int GENERATION_DELTA = (1 << GENERATION_BITS); - // cycle length - static constexpr int GENERATION_CYCLE = 255 + GENERATION_DELTA; - // mask to pull out generation number - static constexpr int GENERATION_MASK = (0xFF << GENERATION_BITS) & 0xFF; - public: ~TranspositionTable() { aligned_large_pages_free(table); } - void new_search() { - // increment by delta to keep lower bits as is - generation8 += GENERATION_DELTA; - } - TTEntry* probe(const Key key, bool& found) const; - int hashfull() const; - void resize(size_t mbSize, ThreadPool& threads); - void clear(ThreadPool& threads); + void resize(size_t mbSize, ThreadPool& threads); // Set TT size + void clear(ThreadPool& threads); // Re-initialize memory, multithreaded + int hashfull() + const; // Approximate what fraction of entries (permille) have been written to during this root search - TTEntry* first_entry(const Key key) const { - return &table[mul_hi64(key, clusterCount)].entry[0]; - } - - uint8_t generation() const { return generation8; } + void + new_search(); // This must be called at the beginning of each root search to track entry aging + uint8_t generation() const; // The current age, used when writing new data to the TT + std::tuple + probe(const Key key) const; // The main method, whose retvals separate local vs global objects + TTEntry* first_entry(const Key key) + const; // This is the hash function; its only external use is memory prefetching. private: friend struct TTEntry; size_t clusterCount; - Cluster* table = nullptr; - uint8_t generation8 = 0; // Size must be not bigger than TTEntry::genBound8 + Cluster* table = nullptr; + + uint8_t generation8 = 0; // Size must be not bigger than TTEntry::genBound8 }; } // namespace Stockfish diff --git a/tests/instrumented.sh b/tests/instrumented.sh index 4c63fc57..e77ee0dd 100755 --- a/tests/instrumented.sh +++ b/tests/instrumented.sh @@ -39,13 +39,8 @@ case $1 in threads="2" cat << EOF > tsan.supp -race:Stockfish::TTEntry::move -race:Stockfish::TTEntry::depth -race:Stockfish::TTEntry::bound +race:Stockfish::TTEntry::read race:Stockfish::TTEntry::save -race:Stockfish::TTEntry::value -race:Stockfish::TTEntry::eval -race:Stockfish::TTEntry::is_pv race:Stockfish::TranspositionTable::probe race:Stockfish::TranspositionTable::hashfull From 7013a22b741b9fa937e0e027c4992c52b999283c Mon Sep 17 00:00:00 2001 From: Disservin Date: Tue, 4 Jun 2024 22:29:27 +0200 Subject: [PATCH 124/315] Move options into the engine Move the engine options into the engine class, also avoid duplicated initializations after startup. UCIEngine needs to register an add_listener to listen to all option changes and print these. Also avoid a double initialization of the TT, which was the case with the old state. closes https://github.com/official-stockfish/Stockfish/pull/5356 No functional change --- src/engine.cpp | 84 +++++++++++++++++++++++++++++++++++++++++++++-- src/engine.h | 11 +++++-- src/tune.cpp | 16 ++++----- src/tune.h | 2 ++ src/uci.cpp | 81 ++++++++------------------------------------- src/uci.h | 5 +-- src/ucioption.cpp | 28 ++++++++++++---- src/ucioption.h | 72 ++++++++++++++++++++++++++-------------- 8 files changed, 183 insertions(+), 116 deletions(-) diff --git a/src/engine.cpp b/src/engine.cpp index 6980dd83..233f6270 100644 --- a/src/engine.cpp +++ b/src/engine.cpp @@ -44,7 +44,8 @@ namespace Stockfish { namespace NN = Eval::NNUE; -constexpr auto StartFEN = "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1"; +constexpr auto StartFEN = "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1"; +constexpr int MaxHashMB = Is64Bit ? 33554432 : 2048; Engine::Engine(std::string path) : binaryDirectory(CommandLine::get_binary_directory(path)), @@ -58,6 +59,58 @@ Engine::Engine(std::string path) : NN::NetworkSmall({EvalFileDefaultNameSmall, "None", ""}, NN::EmbeddedNNUEType::SMALL))) { pos.set(StartFEN, false, &states->back()); capSq = SQ_NONE; + + options["Debug Log File"] << Option("", [](const Option& o) { + start_logger(o); + return std::nullopt; + }); + + options["NumaPolicy"] << Option("auto", [this](const Option& o) { + set_numa_config_from_option(o); + return numa_config_information_as_string() + "\n" + thread_binding_information_as_string(); + }); + + options["Threads"] << Option(1, 1, 1024, [this](const Option&) { + resize_threads(); + return thread_binding_information_as_string(); + }); + + options["Hash"] << Option(16, 1, MaxHashMB, [this](const Option& o) { + set_tt_size(o); + return std::nullopt; + }); + + options["Clear Hash"] << Option([this](const Option&) { + search_clear(); + return std::nullopt; + }); + options["Ponder"] << Option(false); + options["MultiPV"] << Option(1, 1, MAX_MOVES); + options["Skill Level"] << Option(20, 0, 20); + options["Move Overhead"] << Option(10, 0, 5000); + options["nodestime"] << Option(0, 0, 10000); + options["UCI_Chess960"] << Option(false); + options["UCI_LimitStrength"] << Option(false); + options["UCI_Elo"] << Option(1320, 1320, 3190); + options["UCI_ShowWDL"] << Option(false); + options["SyzygyPath"] << Option("", [](const Option& o) { + Tablebases::init(o); + return std::nullopt; + }); + options["SyzygyProbeDepth"] << Option(1, 1, 100); + options["Syzygy50MoveRule"] << Option(true); + options["SyzygyProbeLimit"] << Option(7, 0, 7); + options["EvalFile"] << Option(EvalFileDefaultNameBig, [this](const Option& o) { + load_big_network(o); + return std::nullopt; + }); + options["EvalFileSmall"] << Option(EvalFileDefaultNameSmall, [this](const Option& o) { + load_small_network(o); + return std::nullopt; + }); + + load_networks(); + resize_threads(); } std::uint64_t Engine::perft(const std::string& fen, Depth depth, bool isChess960) { @@ -212,7 +265,8 @@ void Engine::trace_eval() const { sync_cout << "\n" << Eval::trace(p, *networks) << sync_endl; } -OptionsMap& Engine::get_options() { return options; } +const OptionsMap& Engine::get_options() const { return options; } +OptionsMap& Engine::get_options() { return options; } std::string Engine::fen() const { return pos.fen(); } @@ -241,4 +295,30 @@ std::string Engine::get_numa_config_as_string() const { return numaContext.get_numa_config().to_string(); } +std::string Engine::numa_config_information_as_string() const { + auto cfgStr = get_numa_config_as_string(); + return "Available Processors: " + cfgStr; +} + +std::string Engine::thread_binding_information_as_string() const { + auto boundThreadsByNode = get_bound_thread_count_by_numa_node(); + if (boundThreadsByNode.empty()) + return ""; + + std::stringstream ss; + ss << "NUMA Node Thread Binding: "; + + bool isFirst = true; + + for (auto&& [current, total] : boundThreadsByNode) + { + if (!isFirst) + ss << ":"; + ss << current << "/" << total; + isFirst = false; + } + + return ss.str(); +} + } diff --git a/src/engine.h b/src/engine.h index 91a8a96b..0d6f0f2b 100644 --- a/src/engine.h +++ b/src/engine.h @@ -29,13 +29,13 @@ #include #include "nnue/network.h" +#include "numa.h" #include "position.h" #include "search.h" #include "syzygy/tbprobe.h" // for Stockfish::Depth #include "thread.h" #include "tt.h" #include "ucioption.h" -#include "numa.h" namespace Stockfish { @@ -92,13 +92,18 @@ class Engine { // utility functions - void trace_eval() const; - OptionsMap& get_options(); + void trace_eval() const; + + const OptionsMap& get_options() const; + OptionsMap& get_options(); + std::string fen() const; void flip(); std::string visualize() const; std::vector> get_bound_thread_count_by_numa_node() const; std::string get_numa_config_as_string() const; + std::string numa_config_information_as_string() const; + std::string thread_binding_information_as_string() const; private: const std::string binaryDirectory; diff --git a/src/tune.cpp b/src/tune.cpp index 94c9b53e..dfcd3468 100644 --- a/src/tune.cpp +++ b/src/tune.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -33,19 +34,19 @@ namespace Stockfish { bool Tune::update_on_last; const Option* LastOption = nullptr; OptionsMap* Tune::options; - - namespace { std::map TuneResults; -void on_tune(const Option& o) { +std::optional on_tune(const Option& o) { if (!Tune::update_on_last || LastOption == &o) Tune::read_options(); + + return std::nullopt; +} } - -void make_option(OptionsMap* options, const string& n, int v, const SetRange& r) { +void Tune::make_option(OptionsMap* opts, const string& n, int v, const SetRange& r) { // Do not generate option when there is nothing to tune (ie. min = max) if (r(v).first == r(v).second) @@ -54,8 +55,8 @@ void make_option(OptionsMap* options, const string& n, int v, const SetRange& r) if (TuneResults.count(n)) v = TuneResults[n]; - (*options)[n] << Option(v, r(v).first, r(v).second, on_tune); - LastOption = &((*options)[n]); + (*opts)[n] << Option(v, r(v).first, r(v).second, on_tune); + LastOption = &((*opts)[n]); // Print formatted parameters, ready to be copy-pasted in Fishtest std::cout << n << "," // @@ -65,7 +66,6 @@ void make_option(OptionsMap* options, const string& n, int v, const SetRange& r) << (r(v).second - r(v).first) / 20.0 << "," // << "0.0020" << std::endl; } -} string Tune::next(string& names, bool pop) { diff --git a/src/tune.h b/src/tune.h index 079614db..ed4738cd 100644 --- a/src/tune.h +++ b/src/tune.h @@ -145,6 +145,8 @@ class Tune { return add(value, (next(names), std::move(names)), args...); } + static void make_option(OptionsMap* options, const std::string& n, int v, const SetRange& r); + std::vector> list; public: diff --git a/src/uci.cpp b/src/uci.cpp index 42c69cde..75b7dfc7 100644 --- a/src/uci.cpp +++ b/src/uci.cpp @@ -30,20 +30,16 @@ #include "benchmark.h" #include "engine.h" -#include "evaluate.h" #include "movegen.h" #include "position.h" #include "score.h" #include "search.h" -#include "syzygy/tbprobe.h" #include "types.h" #include "ucioption.h" namespace Stockfish { -constexpr auto StartFEN = "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1"; -constexpr int MaxHashMB = Is64Bit ? 33554432 : 2048; - +constexpr auto StartFEN = "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1"; template struct overload: Ts... { using Ts::operator()...; @@ -56,55 +52,25 @@ UCIEngine::UCIEngine(int argc, char** argv) : engine(argv[0]), cli(argc, argv) { - auto& options = engine.get_options(); + engine.get_options().add_info_listener([](const std::optional& str) { + if (!str || (*str).empty()) + return; - options["Debug Log File"] << Option("", [](const Option& o) { start_logger(o); }); + // split all lines + auto ss = std::istringstream{*str}; - options["NumaPolicy"] << Option("auto", [this](const Option& o) { - engine.set_numa_config_from_option(o); - print_numa_config_information(); - print_thread_binding_information(); + for (std::string line; std::getline(ss, line, '\n');) + sync_cout << "info string " << line << sync_endl; }); - options["Threads"] << Option(1, 1, 1024, [this](const Option&) { - engine.resize_threads(); - print_thread_binding_information(); - }); - - options["Hash"] << Option(16, 1, MaxHashMB, [this](const Option& o) { engine.set_tt_size(o); }); - - options["Clear Hash"] << Option([this](const Option&) { engine.search_clear(); }); - options["Ponder"] << Option(false); - options["MultiPV"] << Option(1, 1, MAX_MOVES); - options["Skill Level"] << Option(20, 0, 20); - options["Move Overhead"] << Option(10, 0, 5000); - options["nodestime"] << Option(0, 0, 10000); - options["UCI_Chess960"] << Option(false); - options["UCI_LimitStrength"] << Option(false); - options["UCI_Elo"] << Option(1320, 1320, 3190); - options["UCI_ShowWDL"] << Option(false); - options["SyzygyPath"] << Option("", [](const Option& o) { Tablebases::init(o); }); - options["SyzygyProbeDepth"] << Option(1, 1, 100); - options["Syzygy50MoveRule"] << Option(true); - options["SyzygyProbeLimit"] << Option(7, 0, 7); - options["EvalFile"] << Option(EvalFileDefaultNameBig, - [this](const Option& o) { engine.load_big_network(o); }); - options["EvalFileSmall"] << Option(EvalFileDefaultNameSmall, - [this](const Option& o) { engine.load_small_network(o); }); - - engine.set_on_iter([](const auto& i) { on_iter(i); }); engine.set_on_update_no_moves([](const auto& i) { on_update_no_moves(i); }); - engine.set_on_update_full([&](const auto& i) { on_update_full(i, options["UCI_ShowWDL"]); }); + engine.set_on_update_full( + [this](const auto& i) { on_update_full(i, engine.get_options()["UCI_ShowWDL"]); }); engine.set_on_bestmove([](const auto& bm, const auto& p) { on_bestmove(bm, p); }); - - engine.load_networks(); - engine.resize_threads(); - engine.search_clear(); // After threads are up } void UCIEngine::loop() { - std::string token, cmd; for (int i = 1; i < cli.argc; ++i) @@ -136,8 +102,9 @@ void UCIEngine::loop() { sync_cout << "id name " << engine_info(true) << "\n" << engine.get_options() << sync_endl; - print_numa_config_information(); - print_thread_binding_information(); + sync_cout << "info string " << engine.numa_config_information_as_string() << sync_endl; + sync_cout << "info string " << engine.thread_binding_information_as_string() + << sync_endl; sync_cout << "uciok" << sync_endl; } @@ -193,28 +160,6 @@ void UCIEngine::loop() { } while (token != "quit" && cli.argc == 1); // The command-line arguments are one-shot } -void UCIEngine::print_numa_config_information() const { - auto cfgStr = engine.get_numa_config_as_string(); - sync_cout << "info string Available Processors: " << cfgStr << sync_endl; -} - -void UCIEngine::print_thread_binding_information() const { - auto boundThreadsByNode = engine.get_bound_thread_count_by_numa_node(); - if (!boundThreadsByNode.empty()) - { - sync_cout << "info string NUMA Node Thread Binding: "; - bool isFirst = true; - for (auto&& [current, total] : boundThreadsByNode) - { - if (!isFirst) - std::cout << ":"; - std::cout << current << "/" << total; - isFirst = false; - } - std::cout << sync_endl; - } -} - Search::LimitsType UCIEngine::parse_limits(std::istream& is) { Search::LimitsType limits; std::string token; diff --git a/src/uci.h b/src/uci.h index bac62bb9..122bcc40 100644 --- a/src/uci.h +++ b/src/uci.h @@ -19,10 +19,10 @@ #ifndef UCI_H_INCLUDED #define UCI_H_INCLUDED +#include #include #include #include -#include #include "engine.h" #include "misc.h" @@ -42,9 +42,6 @@ class UCIEngine { void loop(); - void print_numa_config_information() const; - void print_thread_binding_information() const; - static int to_cp(Value v, const Position& pos); static std::string format_score(const Score& s); static std::string square(Square s); diff --git a/src/ucioption.cpp b/src/ucioption.cpp index 4819a68d..1cd028c9 100644 --- a/src/ucioption.cpp +++ b/src/ucioption.cpp @@ -36,6 +36,8 @@ bool CaseInsensitiveLess::operator()(const std::string& s1, const std::string& s [](char c1, char c2) { return std::tolower(c1) < std::tolower(c2); }); } +void OptionsMap::add_info_listener(InfoListener&& message_func) { info = std::move(message_func); } + void OptionsMap::setoption(std::istringstream& is) { std::string token, name, value; @@ -57,13 +59,20 @@ void OptionsMap::setoption(std::istringstream& is) { Option OptionsMap::operator[](const std::string& name) const { auto it = options_map.find(name); - return it != options_map.end() ? it->second : Option(); + return it != options_map.end() ? it->second : Option(this); } -Option& OptionsMap::operator[](const std::string& name) { return options_map[name]; } +Option& OptionsMap::operator[](const std::string& name) { + if (!options_map.count(name)) + options_map[name] = Option(this); + return options_map[name]; +} std::size_t OptionsMap::count(const std::string& name) const { return options_map.count(name); } +Option::Option(const OptionsMap* map) : + parent(map) {} + Option::Option(const char* v, OnChange f) : type("string"), min(0), @@ -127,10 +136,12 @@ void Option::operator<<(const Option& o) { static size_t insert_order = 0; - *this = o; - idx = insert_order++; -} + auto p = this->parent; + *this = o; + this->parent = p; + idx = insert_order++; +} // Updates currentValue and triggers on_change() action. It's up to // the GUI to check for option's limits, but we could receive the new value @@ -159,7 +170,12 @@ Option& Option::operator=(const std::string& v) { currentValue = v; if (on_change) - on_change(*this); + { + const auto ret = on_change(*this); + + if (ret && parent != nullptr && parent->info != nullptr) + parent->info(ret); + } return *this; } diff --git a/src/ucioption.h b/src/ucioption.h index 16d46696..a47cc98d 100644 --- a/src/ucioption.h +++ b/src/ucioption.h @@ -23,6 +23,7 @@ #include #include #include +#include #include namespace Stockfish { @@ -31,31 +32,14 @@ struct CaseInsensitiveLess { bool operator()(const std::string&, const std::string&) const; }; -class Option; - -class OptionsMap { - public: - void setoption(std::istringstream&); - - friend std::ostream& operator<<(std::ostream&, const OptionsMap&); - - Option operator[](const std::string&) const; - Option& operator[](const std::string&); - - std::size_t count(const std::string&) const; - - private: - // The options container is defined as a std::map - using OptionsStore = std::map; - - OptionsStore options_map; -}; +class OptionsMap; // The Option class implements each option as specified by the UCI protocol class Option { public: - using OnChange = std::function; + using OnChange = std::function(const Option&)>; + Option(const OptionsMap*); Option(OnChange = nullptr); Option(bool v, OnChange = nullptr); Option(const char* v, OnChange = nullptr); @@ -63,7 +47,6 @@ class Option { Option(const char* v, const char* cur, OnChange = nullptr); Option& operator=(const std::string&); - void operator<<(const Option&); operator int() const; operator std::string() const; bool operator==(const char*) const; @@ -72,10 +55,49 @@ class Option { friend std::ostream& operator<<(std::ostream&, const OptionsMap&); private: - std::string defaultValue, currentValue, type; - int min, max; - size_t idx; - OnChange on_change; + friend class OptionsMap; + friend class Engine; + friend class Tune; + + void operator<<(const Option&); + + std::string defaultValue, currentValue, type; + int min, max; + size_t idx; + OnChange on_change; + const OptionsMap* parent = nullptr; +}; + +class OptionsMap { + public: + using InfoListener = std::function)>; + + OptionsMap() = default; + OptionsMap(const OptionsMap&) = delete; + OptionsMap(OptionsMap&&) = delete; + OptionsMap& operator=(const OptionsMap&) = delete; + OptionsMap& operator=(OptionsMap&&) = delete; + + void add_info_listener(InfoListener&&); + + void setoption(std::istringstream&); + + Option operator[](const std::string&) const; + Option& operator[](const std::string&); + + std::size_t count(const std::string&) const; + + private: + friend class Engine; + friend class Option; + + friend std::ostream& operator<<(std::ostream&, const OptionsMap&); + + // The options container is defined as a std::map + using OptionsStore = std::map; + + OptionsStore options_map; + InfoListener info; }; } From 025da6a0d1f96c1743c0ea6b182487bc2f78082c Mon Sep 17 00:00:00 2001 From: Linmiao Xu Date: Sat, 8 Jun 2024 14:57:09 -0700 Subject: [PATCH 125/315] Give positional output more weight in nnue eval This effectively reverts the removal of delta in: https://github.com/official-stockfish/Stockfish/pull/5373 Passed STC: https://tests.stockfishchess.org/tests/view/6664d41922234461cef58e6b LLR: 2.95 (-2.94,2.94) <0.00,2.00> Total: 56448 W: 14849 L: 14500 D: 27099 Ptnml(0-2): 227, 6481, 14457, 6834, 225 Passed LTC: https://tests.stockfishchess.org/tests/view/666587a1996b40829f4ee007 LLR: 2.94 (-2.94,2.94) <0.50,2.50> Total: 91686 W: 23402 L: 22963 D: 45321 Ptnml(0-2): 78, 10205, 24840, 10640, 80 closes https://github.com/official-stockfish/Stockfish/pull/5382 bench 1160467 --- src/evaluate.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index 1317a01e..4e895fd3 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -66,14 +66,14 @@ Value Eval::evaluate(const Eval::NNUE::Networks& networks, auto [psqt, positional] = smallNet ? networks.small.evaluate(pos, &caches.small) : networks.big.evaluate(pos, &caches.big); - Value nnue = psqt + positional; + Value nnue = (125 * psqt + 131 * positional) / 128; int nnueComplexity = std::abs(psqt - positional); // Re-evaluate the position when higher eval accuracy is worth the time spent if (smallNet && (nnue * simpleEval < 0 || std::abs(nnue) < 227)) { std::tie(psqt, positional) = networks.big.evaluate(pos, &caches.big); - nnue = psqt + positional; + nnue = (125 * psqt + 131 * positional) / 128; nnueComplexity = std::abs(psqt - positional); smallNet = false; } From 3d92950859e1d45dad60d276dd7a78fbeb097bcb Mon Sep 17 00:00:00 2001 From: Joost VandeVondele Date: Tue, 11 Jun 2024 21:28:11 +0200 Subject: [PATCH 126/315] Limit depth after extensions to avoid asserts. currently extensions can cause depth to exceed MAX_PLY. This triggers the assert near line 542 in search when running a binary compiled with `debug=yes` on a testcase like: ``` position fen 7K/P1p1p1p1/2P1P1Pk/6pP/3p2P1/1P6/3P4/8 w - - 0 1 go nodes 1000000 ``` passed STC https://tests.stockfishchess.org/tests/view/6668a56a602682471b064c8d LLR: 2.93 (-2.94,2.94) <-1.75,0.25> Total: 143936 W: 37338 L: 37238 D: 69360 Ptnml(0-2): 514, 16335, 38149, 16477, 493 closes https://github.com/official-stockfish/Stockfish/pull/5383 Bench: 1160467 --- src/search.cpp | 3 +++ tests/instrumented.sh | 5 +++++ 2 files changed, 8 insertions(+) diff --git a/src/search.cpp b/src/search.cpp index 9c3f915d..91b3c789 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -528,6 +528,9 @@ Value Search::Worker::search( if (depth <= 0) return qsearch < PvNode ? PV : NonPV > (pos, ss, alpha, beta); + // Limit the depth if extensions made it too large + depth = std::min(depth, MAX_PLY - 1); + // Check if we have an upcoming move that draws by repetition, or // if the opponent had an alternative move earlier to this position. if (!rootNode && alpha < VALUE_DRAW && pos.has_game_cycle(ss->ply)) diff --git a/tests/instrumented.sh b/tests/instrumented.sh index e77ee0dd..cb5a3a9f 100755 --- a/tests/instrumented.sh +++ b/tests/instrumented.sh @@ -170,6 +170,11 @@ cat << EOF > game.exp expect "score mate -1" expect "bestmove" + send "ucinewgame\n" + send "position fen 7K/P1p1p1p1/2P1P1Pk/6pP/3p2P1/1P6/3P4/8 w - - 0 1\n" + send "go nodes 500000\n" + expect "bestmove" + send "ucinewgame\n" send "position fen 8/5R2/2K1P3/4k3/8/b1PPpp1B/5p2/8 w - -\n" send "go depth 18\n" From 7c0607d2d36afd7b34e686e85711aca3d77c7ecf Mon Sep 17 00:00:00 2001 From: Tomasz Sobczyk Date: Wed, 12 Jun 2024 16:54:15 +0200 Subject: [PATCH 127/315] Fix printing of empty info strings. Handle printing of `info string` in a single place. Fixes #5386 closes https://github.com/official-stockfish/Stockfish/pull/5387 No functional change --- src/misc.cpp | 6 ++++++ src/misc.h | 4 ++++ src/uci.cpp | 27 ++++++++++++++++----------- src/uci.h | 2 ++ 4 files changed, 28 insertions(+), 11 deletions(-) diff --git a/src/misc.cpp b/src/misc.cpp index a8bb46ec..e97d58b9 100644 --- a/src/misc.cpp +++ b/src/misc.cpp @@ -371,6 +371,8 @@ std::ostream& operator<<(std::ostream& os, SyncCout sc) { return os; } +void sync_cout_start() { std::cout << IO_LOCK; } +void sync_cout_end() { std::cout << IO_UNLOCK; } // Trampoline helper to avoid moving Logger to misc.h void start_logger(const std::string& fname) { Logger::start(fname); } @@ -419,6 +421,10 @@ void remove_whitespace(std::string& s) { s.erase(std::remove_if(s.begin(), s.end(), [](char c) { return std::isspace(c); }), s.end()); } +bool is_whitespace(const std::string& s) { + return std::all_of(s.begin(), s.end(), [](char c) { return std::isspace(c); }); +} + std::string CommandLine::get_binary_directory(std::string argv0) { std::string pathSeparator; diff --git a/src/misc.h b/src/misc.h index 557a4d8c..bdc7c864 100644 --- a/src/misc.h +++ b/src/misc.h @@ -101,6 +101,7 @@ inline std::vector split(const std::string& s, const std::string& d } void remove_whitespace(std::string& s); +bool is_whitespace(const std::string& s); enum SyncCout { IO_LOCK, @@ -111,6 +112,9 @@ std::ostream& operator<<(std::ostream&, SyncCout); #define sync_cout std::cout << IO_LOCK #define sync_endl std::endl << IO_UNLOCK +void sync_cout_start(); +void sync_cout_end(); + // True if and only if the binary is compiled on a little-endian machine static inline const union { uint32_t i; diff --git a/src/uci.cpp b/src/uci.cpp index 75b7dfc7..4bc358d8 100644 --- a/src/uci.cpp +++ b/src/uci.cpp @@ -48,19 +48,25 @@ struct overload: Ts... { template overload(Ts...) -> overload; +void UCIEngine::print_info_string(const std::string& str) { + sync_cout_start(); + for (auto& line : split(str, "\n")) + { + if (!is_whitespace(line)) + { + std::cout << "info string " << line << '\n'; + } + } + sync_cout_end(); +} + UCIEngine::UCIEngine(int argc, char** argv) : engine(argv[0]), cli(argc, argv) { engine.get_options().add_info_listener([](const std::optional& str) { - if (!str || (*str).empty()) - return; - - // split all lines - auto ss = std::istringstream{*str}; - - for (std::string line; std::getline(ss, line, '\n');) - sync_cout << "info string " << line << sync_endl; + if (str.has_value()) + print_info_string(*str); }); engine.set_on_iter([](const auto& i) { on_iter(i); }); @@ -102,9 +108,8 @@ void UCIEngine::loop() { sync_cout << "id name " << engine_info(true) << "\n" << engine.get_options() << sync_endl; - sync_cout << "info string " << engine.numa_config_information_as_string() << sync_endl; - sync_cout << "info string " << engine.thread_binding_information_as_string() - << sync_endl; + print_info_string(engine.numa_config_information_as_string()); + print_info_string(engine.thread_binding_information_as_string()); sync_cout << "uciok" << sync_endl; } diff --git a/src/uci.h b/src/uci.h index 122bcc40..23745f96 100644 --- a/src/uci.h +++ b/src/uci.h @@ -58,6 +58,8 @@ class UCIEngine { Engine engine; CommandLine cli; + static void print_info_string(const std::string& str); + void go(std::istringstream& is); void bench(std::istream& args); void position(std::istringstream& is); From 44cddbd962c738678f407a7414efa5b93f0710d9 Mon Sep 17 00:00:00 2001 From: Joost VandeVondele Date: Thu, 13 Jun 2024 18:43:19 +0200 Subject: [PATCH 128/315] Add matetrack to CI verifies that all mate PVs printed for finished iterations (i.e. no lower or upper bounds), are complete, i.e. of the expected length and ending in mate, and do not contain drawing or illegal moves. based on a set of 2000 positions and the code in https://github.com/vondele/matetrack closes https://github.com/official-stockfish/Stockfish/pull/5390 No functional change --- .github/workflows/matetrack.yml | 36 +++++++++++++++++++++++++++++++++ .github/workflows/stockfish.yml | 2 ++ 2 files changed, 38 insertions(+) create mode 100644 .github/workflows/matetrack.yml diff --git a/.github/workflows/matetrack.yml b/.github/workflows/matetrack.yml new file mode 100644 index 00000000..dd81f334 --- /dev/null +++ b/.github/workflows/matetrack.yml @@ -0,0 +1,36 @@ +# This workflow will run matetrack on the PR + +name: Matetrack +on: + workflow_call: +jobs: + Matetrack: + name: Matetrack + runs-on: ubuntu-22.04 + steps: + - name: Checkout SF repo + uses: actions/checkout@v4 + with: + ref: ${{ github.event.pull_request.head.sha }} + path: Stockfish + + - name: build SF + working-directory: Stockfish/src + run: make -j profile-build + + - name: Checkout matetrack repo + uses: actions/checkout@v4 + with: + repository: vondele/matetrack + path: matetrack + ref: 20287a1a145f30a166b7ef251eddb611e4e44fbf + + - name: matetrack install deps + working-directory: matetrack + run: pip install -r requirements.txt + + - name: Run matetrack + working-directory: matetrack + run: | + python matecheck.py --engine /home/runner/work/Stockfish/Stockfish/Stockfish/src/stockfish --epdFile mates2000.epd --nodes 100000 | tee matecheckout.out + ! grep "issues were detected" matecheckout.out > /dev/null diff --git a/.github/workflows/stockfish.yml b/.github/workflows/stockfish.yml index 13d57f9e..fcaa3f6b 100644 --- a/.github/workflows/stockfish.yml +++ b/.github/workflows/stockfish.yml @@ -90,6 +90,8 @@ jobs: uses: ./.github/workflows/sanitizers.yml Tests: uses: ./.github/workflows/tests.yml + Matetrack: + uses: ./.github/workflows/matetrack.yml Binaries: if: github.repository == 'official-stockfish/Stockfish' needs: [Matrix, Prerelease, Compilation] From b01fdb596a196f966549f7132c042ab67962fbbd Mon Sep 17 00:00:00 2001 From: Joost VandeVondele Date: Wed, 12 Jun 2024 13:23:26 +0200 Subject: [PATCH 129/315] Fix upperbound/lowerbound output in multithreaded case In case a stop is received during multithreaded searches, the PV of the best thread might be printed without the correct upperbound/lowerbound indicators. This was due to the pvIdx variable being incremented after receiving the stop. passed STC: https://tests.stockfishchess.org/tests/view/666985da602682471b064d08 LLR: 2.93 (-2.94,2.94) <-1.75,0.25> Total: 196576 W: 51039 L: 50996 D: 94541 Ptnml(0-2): 760, 22545, 51603, 22652, 728 closes https://github.com/official-stockfish/Stockfish/pull/5391 Bench: 1160467 --- src/search.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index 91b3c789..af0ab400 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -299,7 +299,7 @@ void Search::Worker::iterative_deepening() { searchAgainCounter++; // MultiPV loop. We perform a full root search for each PV line - for (pvIdx = 0; pvIdx < multiPV && !threads.stop; ++pvIdx) + for (pvIdx = 0; pvIdx < multiPV; ++pvIdx) { if (pvIdx == pvLast) { @@ -390,6 +390,9 @@ void Search::Worker::iterative_deepening() { // below pick a proven score/PV for this thread (from the previous iteration). && !(threads.abortedSearch && rootMoves[0].uciScore <= VALUE_TB_LOSS_IN_MAX_PLY)) main_manager()->pv(*this, threads, tt, rootDepth); + + if (threads.stop) + break; } if (!threads.stop) From ff10f4ac6516d691b5a48788bc7b21d0ecd83b03 Mon Sep 17 00:00:00 2001 From: Dubslow Date: Wed, 12 Jun 2024 03:14:55 -0500 Subject: [PATCH 130/315] Fix readability of TTEntry occupancy check Passed STC: https://tests.stockfishchess.org/tests/view/66695b6a602682471b064cfc LLR: 2.93 (-2.94,2.94) <-1.75,0.25> Total: 107520 W: 28138 L: 27998 D: 51384 Ptnml(0-2): 373, 12257, 28358, 12401, 371 closes https://github.com/official-stockfish/Stockfish/pull/5394 No functional change --- src/tt.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/tt.cpp b/src/tt.cpp index 763e2c9b..30104ab7 100644 --- a/src/tt.cpp +++ b/src/tt.cpp @@ -55,6 +55,7 @@ struct TTEntry { Bound(genBound8 & 0x3), bool(genBound8 & 0x4)}; } + bool is_occupied() const; void save(Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev, uint8_t generation8); // The returned age is a multiple of TranspositionTable::GENERATION_DELTA uint8_t relative_age(const uint8_t generation8) const; @@ -84,7 +85,8 @@ static constexpr int GENERATION_MASK = (0xFF << GENERATION_BITS) & 0xFF; // DEPTH_ENTRY_OFFSET exists because 1) we use `bool(depth8)` as the occupancy check, but // 2) we need to store negative depths for QS. (`depth8` is the only field with "spare bits": -// we sacrifice the ability to store depths greater than 1<<8 less the offset, as asserted below.) +// we sacrifice the ability to store depths greater than 1<<8 less the offset, as asserted in `save`.) +bool TTEntry::is_occupied() const { return bool(depth8); } // Populates the TTEntry with a new node's data, possibly // overwriting an old position. The update is not atomic and can be racy. @@ -196,7 +198,7 @@ int TranspositionTable::hashfull() const { int cnt = 0; for (int i = 0; i < 1000; ++i) for (int j = 0; j < ClusterSize; ++j) - cnt += table[i].entry[j].depth8 + cnt += table[i].entry[j].is_occupied() && (table[i].entry[j].genBound8 & GENERATION_MASK) == generation8; return cnt / ClusterSize; @@ -227,7 +229,7 @@ std::tuple TranspositionTable::probe(const Key key) cons if (tte[i].key16 == key16) // This gap is the main place for read races. // After `read()` completes that copy is final, but may be self-inconsistent. - return {bool(tte[i].depth8), tte[i].read(), TTWriter(&tte[i])}; + return {tte[i].is_occupied(), tte[i].read(), TTWriter(&tte[i])}; // Find an entry to be replaced according to the replacement strategy TTEntry* replace = tte; From 2046c92ad461f5e852ba62a144b53c3d3fea04b0 Mon Sep 17 00:00:00 2001 From: FauziAkram Date: Wed, 12 Jun 2024 14:04:43 +0300 Subject: [PATCH 131/315] Tweak the reduction formula Tweak the reduction formula if position is or has been on the PV Taking inspiration from an old Viren test. Passed STC: LLR: 2.94 (-2.94,2.94) <0.00,2.00> Total: 78528 W: 20607 L: 20225 D: 37696 Ptnml(0-2): 262, 9297, 19785, 9637, 283 https://tests.stockfishchess.org/tests/view/666339c70ff7cb4868d1fe24 Passed LTC: LLR: 2.94 (-2.94,2.94) <0.50,2.50> Total: 138630 W: 35666 L: 35132 D: 67832 Ptnml(0-2): 118, 15345, 37835, 15919, 98 https://tests.stockfishchess.org/tests/view/66645dec0612cd151f9e77b0 closes https://github.com/official-stockfish/Stockfish/pull/5385 Bench: 1134281 --- src/search.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index af0ab400..8fb65fe7 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1134,7 +1134,8 @@ moves_loop: // When in check, search starts here // Decrease reduction if position is or has been on the PV (~7 Elo) if (ss->ttPv) - r -= 1 + (ttData.value > alpha) + (ttData.depth >= depth); + r -= 1 + (ttData.value > alpha) + (ttData.depth >= depth) + - (PvNode && ttData.value < alpha && ttData.depth >= depth); // Decrease reduction for PvNodes (~0 Elo on STC, ~2 Elo on LTC) if (PvNode) From 2678606e8dbeac8332909f0b3e43638936570835 Mon Sep 17 00:00:00 2001 From: xoto10 <23479932+xoto10@users.noreply.github.com> Date: Fri, 14 Jun 2024 17:27:09 +0100 Subject: [PATCH 132/315] Consider wider range of moves near leaves. try to avoid missing good moves for opponent or engine, by updating bestMove also when value == bestValue (i.e. value == alpha) under certain conditions. In particular require this is at higher depth in the tree, leaving the logic near the root unchanged, and only apply randomly. Avoid doing this near mate scores, leaving mate PVs intact. Passed SMP STC 6+0.06 th7 : LLR: 2.94 (-2.94,2.94) <0.00,2.00> Total: 42040 W: 10930 L: 10624 D: 20486 Ptnml(0-2): 28, 4682, 11289, 4998, 23 https://tests.stockfishchess.org/tests/view/66608b00c340c8eed7757d1d Passed SMP LTC 24+0.24 th7 : LLR: 2.94 (-2.94,2.94) <0.50,2.50> Total: 73692 W: 18978 L: 18600 D: 36114 Ptnml(0-2): 9, 7421, 21614, 7787, 15 https://tests.stockfishchess.org/tests/view/666095e8c340c8eed7757d49 closes https://github.com/official-stockfish/Stockfish/pull/5367 Bench 1205168 --- src/search.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 8fb65fe7..75eea2fd 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1283,11 +1283,17 @@ moves_loop: // When in check, search starts here rm.score = -VALUE_INFINITE; } - if (value > bestValue) + // In case we have an alternative move equal in eval to the current bestmove, + // promote it to bestmove by pretending it just exceeds alpha (but not beta). + int inc = (value == bestValue && (int(nodes) & 15) == 0 + && ss->ply + 2 + ss->ply / 32 >= thisThread->rootDepth + && std::abs(value) + 1 < VALUE_TB_WIN_IN_MAX_PLY); + + if (value + inc > bestValue) { bestValue = value; - if (value > alpha) + if (value + inc > alpha) { bestMove = move; From 5514690f8e19631054271a6ca7e1cbfaf1b443f2 Mon Sep 17 00:00:00 2001 From: Joost VandeVondele Date: Sat, 22 Jun 2024 09:17:45 +0200 Subject: [PATCH 133/315] CI/CD: play games this action plays games under fast-chess with a `debug=yes` compiled binary. It checks for triggered asserts in the code, or generally for engine disconnects. closes https://github.com/official-stockfish/Stockfish/pull/5403 No functional change --- .github/workflows/games.yml | 41 +++++++++++++++++++++++++++++++++ .github/workflows/stockfish.yml | 2 ++ 2 files changed, 43 insertions(+) create mode 100644 .github/workflows/games.yml diff --git a/.github/workflows/games.yml b/.github/workflows/games.yml new file mode 100644 index 00000000..088695e5 --- /dev/null +++ b/.github/workflows/games.yml @@ -0,0 +1,41 @@ +# This workflow will play games with a debug enabled SF using the PR + +name: Games +on: + workflow_call: +jobs: + Matetrack: + name: Games + runs-on: ubuntu-22.04 + steps: + - name: Checkout SF repo + uses: actions/checkout@v4 + with: + ref: ${{ github.event.pull_request.head.sha }} + path: Stockfish + + - name: build debug enabled version of SF + working-directory: Stockfish/src + run: make -j build debug=yes + + - name: Checkout fast-chess repo + uses: actions/checkout@v4 + with: + repository: Disservin/fast-chess + path: fast-chess + ref: d54af1910d5479c669dc731f1f54f9108a251951 + + - name: fast-chess build + working-directory: fast-chess + run: make -j + + - name: Run games + working-directory: fast-chess + run: | + ./fast-chess -rounds 4 -games 2 -repeat -concurrency 4 -openings file=app/tests/data/openings.epd format=epd order=random -srand $RANDOM\ + -engine name=sf1 cmd=/home/runner/work/Stockfish/Stockfish/Stockfish/src/stockfish\ + -engine name=sf2 cmd=/home/runner/work/Stockfish/Stockfish/Stockfish/src/stockfish\ + -ratinginterval 1 -report penta=true -each proto=uci tc=4+0.04 -log file=fast.log | tee fast.out + cat fast.log + ! grep "Assertion" fast.log > /dev/null + ! grep "disconnect" fast.out > /dev/null diff --git a/.github/workflows/stockfish.yml b/.github/workflows/stockfish.yml index fcaa3f6b..8a1094fb 100644 --- a/.github/workflows/stockfish.yml +++ b/.github/workflows/stockfish.yml @@ -92,6 +92,8 @@ jobs: uses: ./.github/workflows/tests.yml Matetrack: uses: ./.github/workflows/matetrack.yml + Games: + uses: ./.github/workflows/games.yml Binaries: if: github.repository == 'official-stockfish/Stockfish' needs: [Matrix, Prerelease, Compilation] From 8806a58ebf5ade73696fd1f89ac4ea12cd1eedd3 Mon Sep 17 00:00:00 2001 From: evqsx <149484438+evqsx@users.noreply.github.com> Date: Sun, 16 Jun 2024 12:34:24 +0800 Subject: [PATCH 134/315] Simplify static exchange evaluation pruning formula Passed STC: https://tests.stockfishchess.org/tests/view/666bda31602682471b064e1f LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 141696 W: 36932 L: 36826 D: 67938 Ptnml(0-2): 510, 16880, 35989, 16932, 537 Passed LTC: https://tests.stockfishchess.org/tests/view/666e6b67602682471b064f4b LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 159504 W: 40552 L: 40471 D: 78481 Ptnml(0-2): 130, 18160, 43103, 18217, 142 closes https://github.com/official-stockfish/Stockfish/pull/5400 bench: 1084115 --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index 75eea2fd..9b296e7f 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1586,7 +1586,7 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta, // If static exchange evaluation is much worse than what is needed to not // fall below alpha we can prune this move. - if (futilityBase > alpha && !pos.see_ge(move, (alpha - futilityBase) * 2 - 30)) + if (futilityBase > alpha && !pos.see_ge(move, (alpha - futilityBase) * 4)) { bestValue = alpha; continue; From d5c130569b364899fc151101d069291a8934789a Mon Sep 17 00:00:00 2001 From: Shawn Xu Date: Sun, 16 Jun 2024 16:14:22 -0700 Subject: [PATCH 135/315] Simplify Bonus Formula In History Adjustment Inspired by a discord message [1] from Vizvezdenec, this patch simplifies the bonus adjustment bonus = bonus > 0 ? 2 * bonus : bonus / 2 to a constant addition, maintaining bonus average at around 0 in regular bench. As cj5716 pointed in discord [2], the constant bonus can also be considered as factoring tempo when calculating bonus, yielding a better value of the move. [1] https://discord.com/channels/435943710472011776/882956631514689597/1243877089443188776 [2] https://discord.com/channels/435943710472011776/813919248455827515/1252277437249622077 Passed Non-regression STC: LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 29984 W: 7908 L: 7677 D: 14399 Ptnml(0-2): 95, 3502, 7594, 3679, 122 https://tests.stockfishchess.org/tests/view/666f7210602682471b064fa2 Passed Non-regression LTC: LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 170136 W: 43214 L: 43145 D: 83777 Ptnml(0-2): 158, 19185, 46311, 19258, 156 https://tests.stockfishchess.org/tests/view/666fb32e602682471b064fb5 closes https://github.com/official-stockfish/Stockfish/pull/5401 bench 1438375 --- src/search.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 9b296e7f..562bdbf9 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -750,8 +750,7 @@ Value Search::Worker::search( // Use static evaluation difference to improve quiet move ordering (~9 Elo) if (((ss - 1)->currentMove).is_ok() && !(ss - 1)->inCheck && !priorCapture) { - int bonus = std::clamp(-10 * int((ss - 1)->staticEval + ss->staticEval), -1590, 1371); - bonus = bonus > 0 ? 2 * bonus : bonus / 2; + int bonus = std::clamp(-10 * int((ss - 1)->staticEval + ss->staticEval), -1590, 1371) + 800; thisThread->mainHistory[~us][((ss - 1)->currentMove).from_to()] << bonus; if (type_of(pos.piece_on(prevSq)) != PAWN && ((ss - 1)->currentMove).type_of() != PROMOTION) thisThread->pawnHistory[pawn_structure_index(pos)][pos.piece_on(prevSq)][prevSq] From cc992e5e4a7110b21f85168bdedad7978edad140 Mon Sep 17 00:00:00 2001 From: FauziAkram Date: Mon, 17 Jun 2024 00:03:15 +0300 Subject: [PATCH 136/315] Internal iterative reductions: decrease depth more For PV nodes without a ttMove, we decrease depth. But in this patch, additionally, if the current position is found in the TT, and the stored depth in the TT is greater than or equal to the current search depth, we decrease the search depth even further. Passed STC: LLR: 2.96 (-2.94,2.94) <0.00,2.00> Total: 84384 W: 22154 L: 21761 D: 40469 Ptnml(0-2): 292, 9972, 21315, 10277, 336 https://tests.stockfishchess.org/tests/view/666b0a4d602682471b064db6 Passed LTC: LLR: 2.95 (-2.94,2.94) <0.50,2.50> Total: 92106 W: 23471 L: 23032 D: 45603 Ptnml(0-2): 79, 10155, 25154, 10578, 87 https://tests.stockfishchess.org/tests/view/666c423d602682471b064e56 closes https://github.com/official-stockfish/Stockfish/pull/5397 bench: 1038234 --- src/search.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 562bdbf9..e63595c1 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -829,9 +829,12 @@ Value Search::Worker::search( } // Step 10. Internal iterative reductions (~9 Elo) - // For PV nodes without a ttMove, we decrease depth by 3. + // For PV nodes without a ttMove, we decrease depth. + // Additionally, if the current position is found in the TT + // and the stored depth in the TT is greater than or equal to + // current search depth, we decrease search depth even further. if (PvNode && !ttData.move) - depth -= 3; + depth -= 3 + (ss->ttHit && ttData.depth >= depth); // Use qsearch if depth <= 0. if (depth <= 0) From 5fbfd06171cadf97e6e8173216046b099ebfa43b Mon Sep 17 00:00:00 2001 From: Joost VandeVondele Date: Sun, 23 Jun 2024 21:53:25 +0200 Subject: [PATCH 137/315] Move info output afer uciok fixes #5393 : an incompatibility with an older GUI (Chesspartner) fixes #5396 : an incompatibility with an older GUI (Fritz9) closes https://github.com/official-stockfish/Stockfish/pull/5404 No functional change --- src/uci.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/uci.cpp b/src/uci.cpp index 4bc358d8..3c9177ee 100644 --- a/src/uci.cpp +++ b/src/uci.cpp @@ -108,10 +108,11 @@ void UCIEngine::loop() { sync_cout << "id name " << engine_info(true) << "\n" << engine.get_options() << sync_endl; + sync_cout << "uciok" << sync_endl; + + // keep info strings after uciok for old GUIs print_info_string(engine.numa_config_information_as_string()); print_info_string(engine.thread_binding_information_as_string()); - - sync_cout << "uciok" << sync_endl; } else if (token == "setoption") From b2a12917e2125fcd1e1c344165e840b0756201a8 Mon Sep 17 00:00:00 2001 From: FauziAkram Date: Mon, 24 Jun 2024 17:12:07 +0300 Subject: [PATCH 138/315] Remove redundant inline constexpr implies inline anyway closes https://github.com/official-stockfish/Stockfish/pull/5406 No functional change --- src/misc.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/misc.cpp b/src/misc.cpp index e97d58b9..26dd3a28 100644 --- a/src/misc.cpp +++ b/src/misc.cpp @@ -281,7 +281,7 @@ template struct DebugInfo { std::atomic data[N] = {0}; - constexpr inline std::atomic& operator[](int index) { return data[index]; } + constexpr std::atomic& operator[](int index) { return data[index]; } }; DebugInfo<2> hit[MaxDebugSlots]; From 66e6274d32e9a59b6d0d8c347a0f1ee8175ffcdc Mon Sep 17 00:00:00 2001 From: Joost VandeVondele Date: Mon, 1 Jul 2024 19:44:00 +0200 Subject: [PATCH 139/315] Fix typos in comments closes https://github.com/official-stockfish/Stockfish/pull/5409 No functional change --- src/thread.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/thread.h b/src/thread.h index 7416271b..81ca39bb 100644 --- a/src/thread.h +++ b/src/thread.h @@ -39,10 +39,10 @@ namespace Stockfish { class OptionsMap; using Value = int; -// Sometimes we don't want to actually bind the threads, but the recipent still +// Sometimes we don't want to actually bind the threads, but the recipient still // needs to think it runs on *some* NUMA node, such that it can access structures // that rely on NUMA node knowledge. This class encapsulates this optional process -// such that the recipent does not need to know whether the binding happened or not. +// such that the recipient does not need to know whether the binding happened or not. class OptionalThreadToNumaNodeBinder { public: OptionalThreadToNumaNodeBinder(NumaIndex n) : @@ -87,7 +87,7 @@ class Thread { // this name is no longer correct. However, this class (and ThreadPool) // require further work to make them properly generic while maintaining // appropriate specificity regarding search, from the point of view of an - // outside user, so renaming of this function in left for whenever that happens. + // outside user, so renaming of this function is left for whenever that happens. void wait_for_search_finished(); size_t id() const { return idx; } From 22a502ac7486576f52d7ba6cf884702162e92400 Mon Sep 17 00:00:00 2001 From: Taras Vuk <117687515+TarasVuk@users.noreply.github.com> Date: Tue, 25 Jun 2024 10:48:50 +0200 Subject: [PATCH 140/315] Skip futility pruning if beta is below TB loss value Passed STC: LLR: 2.95 (-2.94,2.94) <-1.75,0.25> Total: 77024 W: 20122 L: 19946 D: 36956 Ptnml(0-2): 278, 8754, 20277, 8920, 283 https://tests.stockfishchess.org/tests/view/66752d59602682471b0652f3 Passed LTC: LLR: 2.95 (-2.94,2.94) <-1.75,0.25> Total: 93114 W: 23623 L: 23477 D: 46014 Ptnml(0-2): 77, 9839, 26566, 10011, 64 https://tests.stockfishchess.org/tests/view/6676b3e1602682471b065395 closes https://github.com/official-stockfish/Stockfish/pull/5413 bench: 1003441 --- src/search.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index e63595c1..d04ba194 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -784,8 +784,9 @@ Value Search::Worker::search( && eval - futility_margin(depth, cutNode && !ss->ttHit, improving, opponentWorsening) - (ss - 1)->statScore / 263 >= beta - && eval >= beta && eval < VALUE_TB_WIN_IN_MAX_PLY && (!ttData.move || ttCapture)) - return beta > VALUE_TB_LOSS_IN_MAX_PLY ? beta + (eval - beta) / 3 : eval; + && eval >= beta && (!ttData.move || ttCapture) && beta > VALUE_TB_LOSS_IN_MAX_PLY + && eval < VALUE_TB_WIN_IN_MAX_PLY) + return beta + (eval - beta) / 3; // Step 9. Null move search with verification search (~35 Elo) if (!PvNode && (ss - 1)->currentMove != Move::null() && (ss - 1)->statScore < 14369 From 90eca83e7f40ca719cd49e487893f32598ae6f19 Mon Sep 17 00:00:00 2001 From: mstembera Date: Sat, 29 Jun 2024 17:18:39 -0700 Subject: [PATCH 141/315] Simplify away a useless TTEntry::read() Not needed when we don hit an entry. closes https://github.com/official-stockfish/Stockfish/pull/5416 No functional change --- src/tt.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tt.cpp b/src/tt.cpp index 30104ab7..4b55e53f 100644 --- a/src/tt.cpp +++ b/src/tt.cpp @@ -238,7 +238,7 @@ std::tuple TranspositionTable::probe(const Key key) cons > tte[i].depth8 - tte[i].relative_age(generation8) * 2) replace = &tte[i]; - return {false, replace->read(), TTWriter(replace)}; + return {false, TTData(), TTWriter(replace)}; } From 91ec31dac430e1d587f8239f2377ffb796008f8a Mon Sep 17 00:00:00 2001 From: Daniel Monroe <39802758+Ergodice@users.noreply.github.com> Date: Sat, 29 Jun 2024 21:23:41 -0400 Subject: [PATCH 142/315] Grade countermove bonus for low statscores Passed STC: LLR: 2.96 (-2.94,2.94) <0.00,2.00> Total: 338592 W: 88396 L: 87627 D: 162569 Ptnml(0-2): 1161, 40201, 85788, 41000, 1146 https://tests.stockfishchess.org/tests/view/6679d40c0c2db3fa2dcecbcc Passed LTC: LLR: 2.95 (-2.94,2.94) <0.50,2.50> Total: 83526 W: 21429 L: 21010 D: 41087 Ptnml(0-2): 54, 9173, 22913, 9546, 77 https://tests.stockfishchess.org/tests/view/667c5f2980450dba965911fc closes https://github.com/official-stockfish/Stockfish/pull/5418 bench: 1489815 --- src/search.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index d04ba194..81bb9a06 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1355,10 +1355,16 @@ moves_loop: // When in check, search starts here // Bonus for prior countermove that caused the fail low else if (!priorCapture && prevSq != SQ_NONE) { - int bonus = (113 * (depth > 5) + 118 * (PvNode || cutNode) - + 191 * ((ss - 1)->statScore < -14396) + 119 * ((ss - 1)->moveCount > 8) + int bonus = (113 * (depth > 5) + 118 * (PvNode || cutNode) + 119 * ((ss - 1)->moveCount > 8) + 64 * (!ss->inCheck && bestValue <= ss->staticEval - 107) + 147 * (!(ss - 1)->inCheck && bestValue <= -(ss - 1)->staticEval - 75)); + + + // proportional to "how much damage we have to undo" + if ((ss - 1)->statScore < -8000) + bonus += std::clamp(-(ss - 1)->statScore / 100, 0, 250); + + update_continuation_histories(ss - 1, pos.piece_on(prevSq), prevSq, stat_bonus(depth) * bonus / 100); thisThread->mainHistory[~us][((ss - 1)->currentMove).from_to()] From 7b49f9dd7091ce1d075ebdd16fff85ff1dba31fa Mon Sep 17 00:00:00 2001 From: Michael Chaly Date: Sun, 30 Jun 2024 12:47:04 +0300 Subject: [PATCH 143/315] Tweak multicut This patch is an original patch by author of Altair (https://github.com/Alex2262/AltairChessEngine) chess engine. It allows to produce more aggressive multicut compared to master by changing condition it needs to fulfil and also returns bigger value. Also has applied matetrack fix on top. Passed STC: https://tests.stockfishchess.org/tests/view/667223ab602682471b0650e2 LLR: 2.95 (-2.94,2.94) <0.00,2.00> Total: 50048 W: 13200 L: 12860 D: 23988 Ptnml(0-2): 181, 5822, 12679, 6160, 182 Passed LTC: https://tests.stockfishchess.org/tests/view/6672f777602682471b06515d LLR: 2.97 (-2.94,2.94) <0.50,2.50> Total: 706380 W: 179707 L: 177981 D: 348692 Ptnml(0-2): 656, 79250, 191665, 80950, 669 closes https://github.com/official-stockfish/Stockfish/pull/5421 bench 1148966 --- src/search.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 81bb9a06..da01f82f 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1087,8 +1087,8 @@ moves_loop: // When in check, search starts here // and if after excluding the ttMove with a reduced search we fail high over the original beta, // we assume this expected cut-node is not singular (multiple moves fail high), // and we can prune the whole subtree by returning a softbound. - else if (singularBeta >= beta) - return singularBeta; + else if (value >= beta && std::abs(value) < VALUE_TB_WIN_IN_MAX_PLY) + return value; // Negative extensions // If other moves failed high over (ttValue - margin) without the ttMove on a reduced search, From 38c5fc33e493f210dc199dab7c105e84e7601b99 Mon Sep 17 00:00:00 2001 From: "Shahin M. Shahin" Date: Sun, 30 Jun 2024 16:32:20 +0300 Subject: [PATCH 144/315] Increase reduction based on correct expectation If the current node is not a cutNode then it means that the child is one in LMR and the cutoff count is expected, so more reduction when the cutoffs are expected Passed STC: https://tests.stockfishchess.org/tests/view/66815e791c5b344a34ca7090 LLR: 2.93 (-2.94,2.94) <0.00,2.00> Total: 64416 W: 16876 L: 16519 D: 31021 Ptnml(0-2): 150, 7670, 16264, 7921, 203 Passed LTC: https://tests.stockfishchess.org/tests/view/668162f61c5b344a34ca725c LLR: 2.94 (-2.94,2.94) <0.50,2.50> Total: 78186 W: 19905 L: 19499 D: 38782 Ptnml(0-2): 55, 8561, 21437, 9003, 37 closes https://github.com/official-stockfish/Stockfish/pull/5422 bench: 1161531 --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index da01f82f..b68b3026 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1157,7 +1157,7 @@ moves_loop: // When in check, search starts here // Increase reduction if next ply has a lot of fail high (~5 Elo) if ((ss + 1)->cutoffCnt > 3) - r++; + r += 1 + !(PvNode || cutNode); // For first picked move (ttMove) reduce reduction // but never allow it to go below 0 (~3 Elo) From 5deb26239340a6a1a91d1c2050f90b7a36f9f5d1 Mon Sep 17 00:00:00 2001 From: FauziAkram Date: Sun, 30 Jun 2024 22:24:28 +0300 Subject: [PATCH 145/315] Simplify rm.averageScore calculation Passed STC: LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 485056 W: 125222 L: 125497 D: 234337 Ptnml(0-2): 1384, 58197, 123614, 57976, 1357 https://tests.stockfishchess.org/tests/view/6681816d442423e54714133f Passed LTC: LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 56622 W: 14301 L: 14115 D: 28206 Ptnml(0-2): 31, 6259, 15538, 6459, 24 https://tests.stockfishchess.org/tests/view/6681a9a5596d543edc677490 closes https://github.com/official-stockfish/Stockfish/pull/5423 bench: 1171203 --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index b68b3026..f561b183 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1246,7 +1246,7 @@ moves_loop: // When in check, search starts here rm.effort += nodes - nodeCount; rm.averageScore = - rm.averageScore != -VALUE_INFINITE ? (2 * value + rm.averageScore) / 3 : value; + rm.averageScore != -VALUE_INFINITE ? (value + rm.averageScore) / 2 : value; // PV move or new best move? if (moveCount == 1 || value > alpha) From f6842a145cf59176abc229928b94404543daa250 Mon Sep 17 00:00:00 2001 From: Linmiao Xu Date: Sun, 30 Jun 2024 11:43:36 -0400 Subject: [PATCH 146/315] Simplify worsening deduction in futility margin Passed non-regression STC: https://tests.stockfishchess.org/tests/view/66817d46442423e547141226 LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 345408 W: 89146 L: 89266 D: 166996 Ptnml(0-2): 954, 41317, 88286, 41189, 958 Passed non-regression LTC: https://tests.stockfishchess.org/tests/view/66818dbe1e90a146232d1f62 LLR: 2.95 (-2.94,2.94) <-1.75,0.25> Total: 173214 W: 43821 L: 43755 D: 85638 Ptnml(0-2): 108, 19407, 47492, 19511, 89 closes https://github.com/official-stockfish/Stockfish/pull/5424 bench 981017 --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index f561b183..52eefdc9 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -62,7 +62,7 @@ static constexpr double EvalLevel[10] = {0.981, 0.956, 0.895, 0.949, 0.913, Value futility_margin(Depth d, bool noTtCutNode, bool improving, bool oppWorsening) { Value futilityMult = 109 - 40 * noTtCutNode; Value improvingDeduction = 59 * improving * futilityMult / 32; - Value worseningDeduction = 328 * oppWorsening * futilityMult / 1024; + Value worseningDeduction = oppWorsening * futilityMult / 3; return futilityMult * d - improvingDeduction - worseningDeduction; } From 843b6f7c9873d86742cf9b6ce3523f2c5dc69d2a Mon Sep 17 00:00:00 2001 From: Linmiao Xu Date: Sun, 30 Jun 2024 17:00:49 -0400 Subject: [PATCH 147/315] Update some params for pruning at shallow depth Values found around 82k / 120k spsa games at 60+0.6: https://tests.stockfishchess.org/tests/view/6681aca4481148df247298bd Passed STC: https://tests.stockfishchess.org/tests/view/6681c795c1657e386d2948fa LLR: 2.93 (-2.94,2.94) <0.00,2.00> Total: 145216 W: 37595 L: 37122 D: 70499 Ptnml(0-2): 375, 17122, 37185, 17507, 419 Passed LTC: https://tests.stockfishchess.org/tests/view/6681d4eec1657e386d2949e0 LLR: 2.95 (-2.94,2.94) <0.50,2.50> Total: 154062 W: 39117 L: 38557 D: 76388 Ptnml(0-2): 67, 16874, 42608, 17396, 86 closes https://github.com/official-stockfish/Stockfish/pull/5425 bench 996419 --- src/search.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 52eefdc9..2e8d47cf 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -994,7 +994,7 @@ moves_loop: // When in check, search starts here // Futility pruning for captures (~2 Elo) if (!givesCheck && lmrDepth < 7 && !ss->inCheck) { - Value futilityValue = ss->staticEval + 287 + 248 * lmrDepth + Value futilityValue = ss->staticEval + 294 + 246 * lmrDepth + PieceValue[capturedPiece] + captHist / 7; if (futilityValue <= alpha) continue; @@ -1002,7 +1002,7 @@ moves_loop: // When in check, search starts here // SEE based pruning for captures and checks (~11 Elo) int seeHist = std::clamp(captHist / 32, -180 * depth, 163 * depth); - if (!pos.see_ge(move, -160 * depth - seeHist)) + if (!pos.see_ge(move, -163 * depth - seeHist)) continue; } else @@ -1013,15 +1013,15 @@ moves_loop: // When in check, search starts here + thisThread->pawnHistory[pawn_structure_index(pos)][movedPiece][move.to_sq()]; // Continuation history based pruning (~2 Elo) - if (lmrDepth < 6 && history < -4151 * depth) + if (lmrDepth < 6 && history < -3899 * depth) continue; history += 2 * thisThread->mainHistory[us][move.from_to()]; - lmrDepth += history / 3678; + lmrDepth += history / 4040; Value futilityValue = - ss->staticEval + (bestValue < ss->staticEval - 51 ? 138 : 54) + 140 * lmrDepth; + ss->staticEval + (bestValue < ss->staticEval - 51 ? 135 : 56) + 140 * lmrDepth; // Futility pruning: parent node (~13 Elo) if (!ss->inCheck && lmrDepth < 12 && futilityValue <= alpha) From 6138a0fd0e43753a86e4a170a5f6e2b7b6752677 Mon Sep 17 00:00:00 2001 From: Dubslow Date: Sun, 30 Jun 2024 19:22:04 -0400 Subject: [PATCH 148/315] Probcut in check no matter if pv or capture Passed STC: https://tests.stockfishchess.org/tests/view/6681e9c8c1657e386d294cef LLR: 2.93 (-2.94,2.94) <-1.75,0.25> Total: 217824 W: 56149 L: 56129 D: 105546 Ptnml(0-2): 587, 25926, 55848, 25982, 569 Passed LTC: https://tests.stockfishchess.org/tests/view/6681fcb8c1657e386d294db1 LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 357552 W: 90546 L: 90671 D: 176335 Ptnml(0-2): 207, 40064, 98362, 39933, 210 Each half of this also passed STC+LTC separately closes https://github.com/official-stockfish/Stockfish/pull/5427 bench 1227870 --- src/search.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 2e8d47cf..31278241 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -906,9 +906,8 @@ moves_loop: // When in check, search starts here // Step 12. A small Probcut idea, when we are in check (~4 Elo) probCutBeta = beta + 388; - if (ss->inCheck && !PvNode && ttCapture && (ttData.bound & BOUND_LOWER) - && ttData.depth >= depth - 4 && ttData.value >= probCutBeta - && std::abs(ttData.value) < VALUE_TB_WIN_IN_MAX_PLY + if (ss->inCheck && (ttData.bound & BOUND_LOWER) && ttData.depth >= depth - 4 + && ttData.value >= probCutBeta && std::abs(ttData.value) < VALUE_TB_WIN_IN_MAX_PLY && std::abs(beta) < VALUE_TB_WIN_IN_MAX_PLY) return probCutBeta; From 69ad4667fb40cc0d7195f9fa20652903813d698c Mon Sep 17 00:00:00 2001 From: Shawn Xu Date: Sun, 30 Jun 2024 22:04:51 -0700 Subject: [PATCH 149/315] Do Capture History Updates In Probcut This patch introduces history updates to probcut. Standard depth - 3 bonus and maluses are given to the capture that caused fail high and previously searched captures, respectively. Similar to #5243, a negative history fill is applied to compensate for an increase in capture history average, thus improving the scaling of this patch. Passed STC: LLR: 2.95 (-2.94,2.94) <0.00,2.00> Total: 84832 W: 21941 L: 21556 D: 41335 Ptnml(0-2): 226, 9927, 21688, 10386, 189 https://tests.stockfishchess.org/tests/view/6682fab9389b9ee542b1d029 Passed LTC: LLR: 2.94 (-2.94,2.94) <0.50,2.50> Total: 104298 W: 26469 L: 26011 D: 51818 Ptnml(0-2): 43, 11458, 28677, 11940, 31 https://tests.stockfishchess.org/tests/view/6682ff06389b9ee542b1d0a0 closes https://github.com/official-stockfish/Stockfish/pull/5428 bench 1281351 --- src/search.cpp | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index 31278241..188e81f4 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -502,7 +502,7 @@ void Search::Worker::iterative_deepening() { void Search::Worker::clear() { counterMoves.fill(Move::none()); mainHistory.fill(0); - captureHistory.fill(0); + captureHistory.fill(-700); pawnHistory.fill(-1193); correctionHistory.fill(0); @@ -862,12 +862,19 @@ Value Search::Worker::search( assert(probCutBeta < VALUE_INFINITE && probCutBeta > beta); MovePicker mp(pos, ttData.move, probCutBeta - ss->staticEval, &thisThread->captureHistory); + Move probcutCapturesSearched[32]; + int probcutCaptureCount = 0; + Piece captured; while ((move = mp.next_move()) != Move::none()) if (move != excludedMove && pos.legal(move)) { assert(pos.capture_stage(move)); + movedPiece = pos.moved_piece(move); + captured = pos.piece_on(move.to_sq()); + + // Prefetch the TT entry for the resulting position prefetch(tt.first_entry(pos.key_after(move))); @@ -891,12 +898,28 @@ Value Search::Worker::search( if (value >= probCutBeta) { + thisThread->captureHistory[movedPiece][move.to_sq()][type_of(captured)] + << stat_bonus(depth - 2); + + for (int i = 0; i < probcutCaptureCount; i++) + { + movedPiece = pos.moved_piece(probcutCapturesSearched[i]); + captured = pos.piece_on(probcutCapturesSearched[i].to_sq()); + + thisThread->captureHistory[movedPiece][probcutCapturesSearched[i].to_sq()] + [type_of(captured)] + << -stat_malus(depth - 3); + } + // Save ProbCut data into transposition table ttWriter.write(posKey, value_to_tt(value, ss->ply), ss->ttPv, BOUND_LOWER, depth - 3, move, unadjustedStaticEval, tt.generation()); return std::abs(value) < VALUE_TB_WIN_IN_MAX_PLY ? value - (probCutBeta - beta) : value; } + + if (probcutCaptureCount < 32) + probcutCapturesSearched[probcutCaptureCount++] = move; } Eval::NNUE::hint_common_parent_position(pos, networks[numaAccessToken], refreshTable); From 6b7822119feffd0a27ae5b2a95d3570c9e046090 Mon Sep 17 00:00:00 2001 From: "Shahin M. Shahin" Date: Tue, 25 Jun 2024 01:57:35 +0300 Subject: [PATCH 150/315] Limit has_game_cycle() to only upcoming repetition use the original algorithm according to the paper http://web.archive.org/web/20201107002606/https://marcelk.net/2013-04-06/paper/upcoming-rep-v2.pdf, which detects accurately if a position has an upcoming repetition. The 'no progress' part of has_game_cycle has been removed, the function has been renamed to upcoming_repetition to reflect this. As a result of this fix, to the best of our knowledge, all PVs for completed iterations that yield a mate or decisive table base score now end in mate or contain a TB position, respectively. passed non-regression STC: https://tests.stockfishchess.org/tests/view/6679fa1d0c2db3fa2dcecbf2 LLR: 2.96 (-2.94,2.94) <-1.75,0.25> Total: 63584 W: 16666 L: 16472 D: 30446 Ptnml(0-2): 186, 7552, 16146, 7698, 210 Passed non-regression LTC: https://tests.stockfishchess.org/tests/view/667ac965e439ed1c7a9ca042 LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 464574 W: 117493 L: 117729 D: 229352 Ptnml(0-2): 311, 52468, 126974, 52214, 320 closes https://github.com/official-stockfish/Stockfish/pull/5432 bench: 1209805 --- src/position.cpp | 20 ++++++++++---------- src/position.h | 2 +- src/search.cpp | 10 ++++------ 3 files changed, 15 insertions(+), 17 deletions(-) diff --git a/src/position.cpp b/src/position.cpp index b46ba029..d374b1c0 100644 --- a/src/position.cpp +++ b/src/position.cpp @@ -1156,9 +1156,9 @@ bool Position::has_repeated() const { } -// Tests if the position has a move which draws by repetition, -// or an earlier position has a move that directly reaches the current position. -bool Position::has_game_cycle(int ply) const { +// Tests if the position has a move which draws by repetition. +// This function accurately matches the outcome of is_draw() over all legal moves. +bool Position::upcoming_repetition(int ply) const { int j; @@ -1169,10 +1169,16 @@ bool Position::has_game_cycle(int ply) const { Key originalKey = st->key; StateInfo* stp = st->previous; + Key other = originalKey ^ stp->key ^ Zobrist::side; for (int i = 3; i <= end; i += 2) { - stp = stp->previous->previous; + stp = stp->previous; + other ^= stp->key ^ stp->previous->key ^ Zobrist::side; + stp = stp->previous; + + if (other != 0) + continue; Key moveKey = originalKey ^ stp->key; if ((j = H1(moveKey), cuckoo[j] == moveKey) || (j = H2(moveKey), cuckoo[j] == moveKey)) @@ -1188,12 +1194,6 @@ bool Position::has_game_cycle(int ply) const { // For nodes before or at the root, check that the move is a // repetition rather than a move to the current position. - // In the cuckoo table, both moves Rc1c5 and Rc5c1 are stored in - // the same location, so we have to select which square to check. - if (color_of(piece_on(empty(s1) ? s2 : s1)) != side_to_move()) - continue; - - // For repetitions before or at the root, require one more if (stp->repetition) return true; } diff --git a/src/position.h b/src/position.h index 154ed652..3cfb87d0 100644 --- a/src/position.h +++ b/src/position.h @@ -156,7 +156,7 @@ class Position { int game_ply() const; bool is_chess960() const; bool is_draw(int ply) const; - bool has_game_cycle(int ply) const; + bool upcoming_repetition(int ply) const; bool has_repeated() const; int rule50_count() const; Value non_pawn_material(Color c) const; diff --git a/src/search.cpp b/src/search.cpp index 188e81f4..6368acc6 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -534,9 +534,8 @@ Value Search::Worker::search( // Limit the depth if extensions made it too large depth = std::min(depth, MAX_PLY - 1); - // Check if we have an upcoming move that draws by repetition, or - // if the opponent had an alternative move earlier to this position. - if (!rootNode && alpha < VALUE_DRAW && pos.has_game_cycle(ss->ply)) + // Check if we have an upcoming move that draws by repetition. + if (!rootNode && alpha < VALUE_DRAW && pos.upcoming_repetition(ss->ply)) { alpha = value_draw(this->nodes); if (alpha >= beta) @@ -1447,9 +1446,8 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta, assert(PvNode || (alpha == beta - 1)); assert(depth <= 0); - // Check if we have an upcoming move that draws by repetition, or if - // the opponent had an alternative move earlier to this position. (~1 Elo) - if (alpha < VALUE_DRAW && pos.has_game_cycle(ss->ply)) + // Check if we have an upcoming move that draws by repetition. (~1 Elo) + if (alpha < VALUE_DRAW && pos.upcoming_repetition(ss->ply)) { alpha = value_draw(this->nodes); if (alpha >= beta) From ad0f1fecda6987b16e34807a5ebc3947ced9a866 Mon Sep 17 00:00:00 2001 From: Joost VandeVondele Date: Tue, 2 Jul 2024 14:18:04 +0200 Subject: [PATCH 151/315] Move info strings once more Follow up from #5404 ... current location leads to troubles with Aquarium GUI Fixes #5430 Now prints the information on threads and available processors at the beginning of search, where info about the networks is already printed (and is known to work) closes https://github.com/official-stockfish/Stockfish/pull/5433 No functional change. --- src/engine.cpp | 16 ++++++++++------ src/uci.cpp | 9 +++++---- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/src/engine.cpp b/src/engine.cpp index 233f6270..2bc0db6a 100644 --- a/src/engine.cpp +++ b/src/engine.cpp @@ -297,16 +297,20 @@ std::string Engine::get_numa_config_as_string() const { std::string Engine::numa_config_information_as_string() const { auto cfgStr = get_numa_config_as_string(); - return "Available Processors: " + cfgStr; + return "Available processors: " + cfgStr; } std::string Engine::thread_binding_information_as_string() const { - auto boundThreadsByNode = get_bound_thread_count_by_numa_node(); - if (boundThreadsByNode.empty()) - return ""; - + auto boundThreadsByNode = get_bound_thread_count_by_numa_node(); std::stringstream ss; - ss << "NUMA Node Thread Binding: "; + + size_t threadsSize = threads.size(); + ss << "Using " << threadsSize << (threadsSize > 1 ? " threads" : " thread"); + + if (boundThreadsByNode.empty()) + return ss.str(); + + ss << " with NUMA node thread binding: "; bool isFirst = true; diff --git a/src/uci.cpp b/src/uci.cpp index 3c9177ee..9b60680d 100644 --- a/src/uci.cpp +++ b/src/uci.cpp @@ -109,16 +109,17 @@ void UCIEngine::loop() { << engine.get_options() << sync_endl; sync_cout << "uciok" << sync_endl; - - // keep info strings after uciok for old GUIs - print_info_string(engine.numa_config_information_as_string()); - print_info_string(engine.thread_binding_information_as_string()); } else if (token == "setoption") setoption(is); else if (token == "go") + { + // send info strings after the go command is sent for old GUIs and python-chess + print_info_string(engine.numa_config_information_as_string()); + print_info_string(engine.thread_binding_information_as_string()); go(is); + } else if (token == "position") position(is); else if (token == "ucinewgame") From b9ff5bb93be410b418d6812d6753e64cf216057a Mon Sep 17 00:00:00 2001 From: Shawn Xu Date: Tue, 2 Jul 2024 15:06:37 -0700 Subject: [PATCH 152/315] Implement dbg_extremes_of An alternative to #5431, implements one function `dbg_extremes_of` to keep track of min and max. closes https://github.com/official-stockfish/Stockfish/pull/5434 No functional change --- src/misc.cpp | 35 +++++++++++++++++++++++++++++++---- src/misc.h | 2 ++ 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/src/misc.cpp b/src/misc.cpp index 26dd3a28..b68c12b9 100644 --- a/src/misc.cpp +++ b/src/misc.cpp @@ -284,10 +284,18 @@ struct DebugInfo { constexpr std::atomic& operator[](int index) { return data[index]; } }; -DebugInfo<2> hit[MaxDebugSlots]; -DebugInfo<2> mean[MaxDebugSlots]; -DebugInfo<3> stdev[MaxDebugSlots]; -DebugInfo<6> correl[MaxDebugSlots]; +struct DebugExtremes: public DebugInfo<3> { + DebugExtremes() { + data[1] = std::numeric_limits::min(); + data[2] = std::numeric_limits::max(); + } +}; + +DebugInfo<2> hit[MaxDebugSlots]; +DebugInfo<2> mean[MaxDebugSlots]; +DebugInfo<3> stdev[MaxDebugSlots]; +DebugInfo<6> correl[MaxDebugSlots]; +DebugExtremes extremes[MaxDebugSlots]; } // namespace @@ -311,6 +319,18 @@ void dbg_stdev_of(int64_t value, int slot) { stdev[slot][2] += value * value; } +void dbg_extremes_of(int64_t value, int slot) { + ++extremes[slot][0]; + + int64_t current_max = extremes[slot][1].load(); + while (current_max < value && !extremes[slot][1].compare_exchange_weak(current_max, value)) + {} + + int64_t current_min = extremes[slot][2].load(); + while (current_min > value && !extremes[slot][2].compare_exchange_weak(current_min, value)) + {} +} + void dbg_correl_of(int64_t value1, int64_t value2, int slot) { ++correl[slot][0]; @@ -345,6 +365,13 @@ void dbg_print() { std::cerr << "Stdev #" << i << ": Total " << n << " Stdev " << r << std::endl; } + for (int i = 0; i < MaxDebugSlots; ++i) + if ((n = extremes[i][0])) + { + std::cerr << "Extremity #" << i << ": Total " << n << " Min " << extremes[i][2] + << " Max " << extremes[i][1] << std::endl; + } + for (int i = 0; i < MaxDebugSlots; ++i) if ((n = correl[i][0])) { diff --git a/src/misc.h b/src/misc.h index bdc7c864..0184ab88 100644 --- a/src/misc.h +++ b/src/misc.h @@ -67,6 +67,8 @@ std::optional read_file_to_string(const std::string& path); void dbg_hit_on(bool cond, int slot = 0); void dbg_mean_of(int64_t value, int slot = 0); void dbg_stdev_of(int64_t value, int slot = 0); +void dbg_extremes_of(int64_t value, int slot); + void dbg_correl_of(int64_t value1, int64_t value2, int slot = 0); void dbg_print(); From ee6fc7e38b4aeef44862159215a56d97122f59a0 Mon Sep 17 00:00:00 2001 From: Joost VandeVondele Date: Wed, 3 Jul 2024 11:14:41 +0200 Subject: [PATCH 153/315] CI: limit artifact uploads do not upload some unneeded intermediate directories, disable running authenticated git commands with the checkout action. Thanks to Yaron A for the report. closes https://github.com/official-stockfish/Stockfish/pull/5435 No functional change --- .github/workflows/arm_compilation.yml | 6 +++++- .github/workflows/clang-format.yml | 1 + .github/workflows/codeql.yml | 2 ++ .github/workflows/compilation.yml | 7 ++++++- .github/workflows/games.yml | 2 ++ .github/workflows/iwyu.yml | 2 ++ .github/workflows/matetrack.yml | 2 ++ .github/workflows/sanitizers.yml | 2 ++ .github/workflows/stockfish.yml | 4 ++++ .github/workflows/tests.yml | 1 + .github/workflows/upload_binaries.yml | 2 ++ 11 files changed, 29 insertions(+), 2 deletions(-) diff --git a/.github/workflows/arm_compilation.yml b/.github/workflows/arm_compilation.yml index 3934ac2d..5bf2a93e 100644 --- a/.github/workflows/arm_compilation.yml +++ b/.github/workflows/arm_compilation.yml @@ -26,6 +26,7 @@ jobs: - uses: actions/checkout@v4 with: fetch-depth: 0 + persist-credentials: false - name: Download required linux packages if: runner.os == 'Linux' @@ -91,4 +92,7 @@ jobs: uses: actions/upload-artifact@v4 with: name: ${{ matrix.config.simple_name }} ${{ matrix.binaries }} - path: . + path: | + . + !.git + !.output diff --git a/.github/workflows/clang-format.yml b/.github/workflows/clang-format.yml index 630edbf9..637cfc0d 100644 --- a/.github/workflows/clang-format.yml +++ b/.github/workflows/clang-format.yml @@ -19,6 +19,7 @@ jobs: - uses: actions/checkout@v4 with: ref: ${{ github.event.pull_request.head.sha }} + persist-credentials: false - name: Run clang-format style check uses: jidicula/clang-format-action@f62da5e3d3a2d88ff364771d9d938773a618ab5e # @v4.11.0 diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index d949a5a7..d01ed41f 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -30,6 +30,8 @@ jobs: steps: - name: Checkout repository uses: actions/checkout@v4 + with: + persist-credentials: false # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL diff --git a/.github/workflows/compilation.yml b/.github/workflows/compilation.yml index 3524d5e9..5878adec 100644 --- a/.github/workflows/compilation.yml +++ b/.github/workflows/compilation.yml @@ -25,6 +25,8 @@ jobs: shell: ${{ matrix.config.shell }} steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - name: Install fixed GCC on Linux if: runner.os == 'Linux' @@ -86,4 +88,7 @@ jobs: uses: actions/upload-artifact@v4 with: name: ${{ matrix.config.simple_name }} ${{ matrix.binaries }} - path: . + path: | + . + !.git + !.output diff --git a/.github/workflows/games.yml b/.github/workflows/games.yml index 088695e5..f0bca442 100644 --- a/.github/workflows/games.yml +++ b/.github/workflows/games.yml @@ -13,6 +13,7 @@ jobs: with: ref: ${{ github.event.pull_request.head.sha }} path: Stockfish + persist-credentials: false - name: build debug enabled version of SF working-directory: Stockfish/src @@ -24,6 +25,7 @@ jobs: repository: Disservin/fast-chess path: fast-chess ref: d54af1910d5479c669dc731f1f54f9108a251951 + persist-credentials: false - name: fast-chess build working-directory: fast-chess diff --git a/.github/workflows/iwyu.yml b/.github/workflows/iwyu.yml index 0552a598..f8898b1c 100644 --- a/.github/workflows/iwyu.yml +++ b/.github/workflows/iwyu.yml @@ -14,6 +14,7 @@ jobs: uses: actions/checkout@v4 with: path: Stockfish + persist-credentials: false - name: Checkout include-what-you-use uses: actions/checkout@v4 @@ -21,6 +22,7 @@ jobs: repository: include-what-you-use/include-what-you-use ref: f25caa280dc3277c4086ec345ad279a2463fea0f path: include-what-you-use + persist-credentials: false - name: Download required linux packages run: | diff --git a/.github/workflows/matetrack.yml b/.github/workflows/matetrack.yml index dd81f334..de65209f 100644 --- a/.github/workflows/matetrack.yml +++ b/.github/workflows/matetrack.yml @@ -13,6 +13,7 @@ jobs: with: ref: ${{ github.event.pull_request.head.sha }} path: Stockfish + persist-credentials: false - name: build SF working-directory: Stockfish/src @@ -24,6 +25,7 @@ jobs: repository: vondele/matetrack path: matetrack ref: 20287a1a145f30a166b7ef251eddb611e4e44fbf + persist-credentials: false - name: matetrack install deps working-directory: matetrack diff --git a/.github/workflows/sanitizers.yml b/.github/workflows/sanitizers.yml index b75c06cf..55459292 100644 --- a/.github/workflows/sanitizers.yml +++ b/.github/workflows/sanitizers.yml @@ -40,6 +40,8 @@ jobs: shell: ${{ matrix.config.shell }} steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - name: Download required linux packages run: | diff --git a/.github/workflows/stockfish.yml b/.github/workflows/stockfish.yml index 8a1094fb..5589c762 100644 --- a/.github/workflows/stockfish.yml +++ b/.github/workflows/stockfish.yml @@ -17,6 +17,8 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + with: + persist-credentials: false # returns null if no pre-release exists - name: Get Commit SHA of Latest Pre-release @@ -66,6 +68,8 @@ jobs: arm_matrix: ${{ steps.set-arm-matrix.outputs.arm_matrix }} steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - id: set-matrix run: | TASKS=$(echo $(cat .github/ci/matrix.json) ) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 328c9cf9..836555e6 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -106,6 +106,7 @@ jobs: - uses: actions/checkout@v4 with: fetch-depth: 0 + persist-credentials: false - name: Download required linux packages if: runner.os == 'Linux' diff --git a/.github/workflows/upload_binaries.yml b/.github/workflows/upload_binaries.yml index acf91a8f..c91824a2 100644 --- a/.github/workflows/upload_binaries.yml +++ b/.github/workflows/upload_binaries.yml @@ -25,6 +25,8 @@ jobs: shell: ${{ matrix.config.shell }} steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - name: Download artifact from compilation uses: actions/download-artifact@v4 From 74a8fc060465a822f0c047f908d5fb07ebc6ad96 Mon Sep 17 00:00:00 2001 From: Disservin Date: Wed, 3 Jul 2024 14:07:48 +0200 Subject: [PATCH 154/315] Use explicit action permissions in CI Necessary modifications according to changes in the GitHub Action settings. closes https://github.com/official-stockfish/Stockfish/pull/5437 Follow up from the report by Yaron Avital (yaronav) earlier. No functional change --- .github/workflows/stockfish.yml | 10 ++++++++++ .github/workflows/upload_binaries.yml | 5 +++++ 2 files changed, 15 insertions(+) diff --git a/.github/workflows/stockfish.yml b/.github/workflows/stockfish.yml index 5589c762..1f87e061 100644 --- a/.github/workflows/stockfish.yml +++ b/.github/workflows/stockfish.yml @@ -15,6 +15,8 @@ jobs: Prerelease: if: github.repository == 'official-stockfish/Stockfish' && (github.ref == 'refs/heads/master' || (startsWith(github.ref_name, 'sf_') && github.ref_type == 'tag')) runs-on: ubuntu-latest + permissions: + contents: write # For deleting/creating a prerelease steps: - uses: actions/checkout@v4 with: @@ -104,9 +106,17 @@ jobs: uses: ./.github/workflows/upload_binaries.yml with: matrix: ${{ needs.Matrix.outputs.matrix }} + permissions: + contents: write # For deleting/creating a (pre)release + secrets: + token: ${{ secrets.GITHUB_TOKEN }} ARM_Binaries: if: github.repository == 'official-stockfish/Stockfish' needs: [Matrix, Prerelease, ARMCompilation] uses: ./.github/workflows/upload_binaries.yml with: matrix: ${{ needs.Matrix.outputs.arm_matrix }} + permissions: + contents: write # For deleting/creating a (pre)release + secrets: + token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/upload_binaries.yml b/.github/workflows/upload_binaries.yml index c91824a2..c5a2cd10 100644 --- a/.github/workflows/upload_binaries.yml +++ b/.github/workflows/upload_binaries.yml @@ -5,6 +5,9 @@ on: matrix: type: string required: true + secrets: + token: + required: true jobs: Artifacts: @@ -80,6 +83,7 @@ jobs: uses: softprops/action-gh-release@4634c16e79c963813287e889244c50009e7f0981 with: files: stockfish-${{ matrix.config.simple_name }}-${{ matrix.binaries }}.${{ matrix.config.archive_ext }} + token: ${{ secrets.token }} - name: Get last commit sha id: last_commit @@ -106,3 +110,4 @@ jobs: tag_name: stockfish-dev-${{ env.COMMIT_DATE }}-${{ env.COMMIT_SHA }} prerelease: true files: stockfish-${{ matrix.config.simple_name }}-${{ matrix.binaries }}.${{ matrix.config.archive_ext }} + token: ${{ secrets.token }} From 25361e514bffb81284d4311601a9f7a4a7ced79b Mon Sep 17 00:00:00 2001 From: Joost VandeVondele Date: Wed, 3 Jul 2024 17:39:55 +0200 Subject: [PATCH 155/315] Output from a fix depth onward, instead of 3s. To avoid output that depends on timing, output currmove and similar only from depth > 30 onward. Current choice of 3s makes the output of the same search depending on the system load, and doesn't always start at move 1. Depth 30 is nowadays reached in a few seconds on most systems. closes https://github.com/official-stockfish/Stockfish/pull/5436 No functional change --- src/search.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 6368acc6..5eda1217 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -349,10 +349,10 @@ void Search::Worker::iterative_deepening() { if (threads.stop) break; - // When failing high/low give some update (without cluttering - // the UI) before a re-search. + // When failing high/low give some update before a re-search. + // To avoid excessive output, only start at rootDepth > 30. if (mainThread && multiPV == 1 && (bestValue <= alpha || bestValue >= beta) - && elapsed_time() > 3000) + && rootDepth > 30) main_manager()->pv(*this, threads, tt, rootDepth); // In case of failing low/high increase aspiration window and @@ -383,7 +383,7 @@ void Search::Worker::iterative_deepening() { std::stable_sort(rootMoves.begin() + pvFirst, rootMoves.begin() + pvIdx + 1); if (mainThread - && (threads.stop || pvIdx + 1 == multiPV || elapsed_time() > 3000) + && (threads.stop || pvIdx + 1 == multiPV || rootDepth > 30) // A thread that aborted search can have mated-in/TB-loss PV and score // that cannot be trusted, i.e. it can be delayed or refuted if we would have // had time to fully search other root-moves. Thus we suppress this output and @@ -974,7 +974,7 @@ moves_loop: // When in check, search starts here ss->moveCount = ++moveCount; - if (rootNode && is_mainthread() && elapsed_time() > 3000) + if (rootNode && is_mainthread() && rootDepth > 30) { main_manager()->updates.onIter( {depth, UCIEngine::move(move, pos.is_chess960()), moveCount + thisThread->pvIdx}); From 3c379e55d9d92a5704632c6255e72892a4db9a2f Mon Sep 17 00:00:00 2001 From: Linmiao Xu Date: Mon, 1 Jul 2024 16:26:44 -0400 Subject: [PATCH 156/315] Update 7 stat bonus/malus params Values found around 119k / 120k spsa games at 60+0.6: https://tests.stockfishchess.org/tests/view/6683112a192114e61f92f87a Passed STC: https://tests.stockfishchess.org/tests/view/66838148c4f539faa0326897 LLR: 2.95 (-2.94,2.94) <0.00,2.00> Total: 40928 W: 10835 L: 10508 D: 19585 Ptnml(0-2): 139, 4802, 10254, 5131, 138 Passed LTC: https://tests.stockfishchess.org/tests/view/668448a87a1863935cee42c6 LLR: 2.96 (-2.94,2.94) <0.50,2.50> Total: 29208 W: 7559 L: 7253 D: 14396 Ptnml(0-2): 17, 3118, 8019, 3442, 8 closes https://github.com/official-stockfish/Stockfish/pull/5439 bench 1138753 --- src/search.cpp | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 5eda1217..f74d4f87 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -79,10 +79,10 @@ Value to_corrected_static_eval(Value v, const Worker& w, const Position& pos) { } // History and stats update bonus, based on depth -int stat_bonus(Depth d) { return std::clamp(186 * d - 285, 20, 1524); } +int stat_bonus(Depth d) { return std::clamp(191 * d - 285, 20, 1412); } // History and stats update malus, based on depth -int stat_malus(Depth d) { return (d < 4 ? 707 * d - 260 : 2073); } +int stat_malus(Depth d) { return (d < 4 ? 727 * d - 260 : 1908); } // Add a small random component to draw evaluations to avoid 3-fold blindness Value value_draw(size_t nodes) { return VALUE_DRAW - 1 + Value(nodes & 0x2); } @@ -1380,11 +1380,9 @@ moves_loop: // When in check, search starts here + 64 * (!ss->inCheck && bestValue <= ss->staticEval - 107) + 147 * (!(ss - 1)->inCheck && bestValue <= -(ss - 1)->staticEval - 75)); - - // proportional to "how much damage we have to undo" - if ((ss - 1)->statScore < -8000) - bonus += std::clamp(-(ss - 1)->statScore / 100, 0, 250); - + // Proportional to "how much damage we have to undo" + if ((ss - 1)->statScore < -7850) + bonus += std::clamp(-(ss - 1)->statScore / 100, 0, 224); update_continuation_histories(ss - 1, pos.piece_on(prevSq), prevSq, stat_bonus(depth) * bonus / 100); @@ -1801,7 +1799,7 @@ void update_all_stats(const Position& pos, if (!pos.capture_stage(bestMove)) { - int bestMoveBonus = bestValue > beta + 164 ? quietMoveBonus // larger bonus + int bestMoveBonus = bestValue > beta + 166 ? quietMoveBonus // larger bonus : stat_bonus(depth); // smaller bonus update_quiet_stats(pos, ss, workerThread, bestMove, bestMoveBonus); From 2cbc20e846e46da8bfc8e254a7703a0bfad3b850 Mon Sep 17 00:00:00 2001 From: Joost VandeVondele Date: Tue, 25 Jun 2024 16:54:25 +0200 Subject: [PATCH 157/315] Correct and extend PV lines with decisive TB score Currently (after #5407), SF has the property that any PV line with a decisive TB score contains the corresponding TB position, with a score that correctly identifies the depth at which TB are entered. The PV line that follows might not preserve the game outcome, but can easily be verified and extended based on TB information. This patch provides this functionality, simply extending the PV lines on output, this doesn't affect search. Indeed, if DTZ tables are available, search based PV lines that correspond to decisive TB scores are verified to preserve game outcome, truncating the line as needed. Subsequently, such PV lines are extended with a game outcome preserving line until mate, as a possible continuation. These lines are not optimal mating lines, but are similar to what a user could produce on a website like https://syzygy-tables.info/ clicking always the top ranked move, i.e. minimizing or maximizing DTZ (with a simple tie-breaker for moves that have identical DTZ), and are thus an just an illustration of how to game can be won. A similar approach is already in established in https://github.com/joergoster/Stockfish/tree/matefish2 This also contributes to addressing #5175 where SF can give an incorrect TB win/loss for positions in TB with a movecounter that doesn't reflect optimal play. While the full solution requires either TB generated differently, or a search when ranking rootmoves, current SF will eventually find a draw in these cases, in practice quite quickly, e.g. `1kq5/q2r4/5K2/8/8/8/8/7Q w - - 96 1` `8/8/6k1/3B4/3K4/4N3/8/8 w - - 54 106` Gives the same results as master on an extended set of test positions from https://github.com/mcostalba/Stockfish/commit/9173d29c414ddb8f4bec74e4db3ccbe664c66bf9 with the exception of the above mentioned fen where this commit improves. With https://github.com/vondele/matetrack using 6men TB, all generated PVs verify: ``` Using ../Stockfish/src/stockfish.syzygyExtend on matetrack.epd with --nodes 1000000 --syzygyPath /chess/syzygy/3-4-5-6/WDL:/chess/syzygy/3-4-5-6/DTZ Engine ID: Stockfish dev-20240704-ff227954 Total FENs: 6555 Found mates: 3299 Best mates: 2582 Found TB wins: 568 ``` As repeated DTZ probing could be slow a procedure (100ms+ on HDD, a few ms on SSD), the extension is only done as long as the time taken is less than half the `Move Overhead` parameter. For tournaments where these lines might be of interest to the user, a suitable `Move Overhead` might be needed (e.g. TCEC has 1000ms already). closes https://github.com/official-stockfish/Stockfish/pull/5414 No functional change --- src/search.cpp | 177 +++++++++++++++++++++++++++++++++++++---- src/search.h | 4 +- src/syzygy/tbprobe.cpp | 29 ++++--- src/syzygy/tbprobe.h | 7 +- 4 files changed, 186 insertions(+), 31 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index f74d4f87..023e5113 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -20,6 +20,7 @@ #include #include +#include #include #include #include @@ -28,6 +29,9 @@ #include #include #include +#include +#include +#include #include "evaluate.h" #include "misc.h" @@ -50,6 +54,12 @@ namespace Stockfish { namespace TB = Tablebases; +void syzygy_extend_pv(const OptionsMap& options, + const Search::LimitsType& limits, + Stockfish::Position& pos, + Stockfish::Search::RootMove& rootMove, + Value& v); + using Eval::evaluate; using namespace Search; @@ -1955,18 +1965,145 @@ void SearchManager::check_time(Search::Worker& worker) { worker.threads.stop = worker.threads.abortedSearch = true; } -void SearchManager::pv(const Search::Worker& worker, +// Used to correct and extend PVs for moves that have a TB (but not a mate) score. +// Keeps the search based PV for as long as it is verified to maintain the game outcome, truncates afterwards. +// Finally, extends to mate the PV, providing a possible continuation (but not a proven mating line). +void syzygy_extend_pv(const OptionsMap& options, + const Search::LimitsType& limits, + Position& pos, + RootMove& rootMove, + Value& v) { + + auto t_start = std::chrono::steady_clock::now(); + int moveOverhead = int(options["Move Overhead"]); + + // Do not use more than moveOverhead / 2 time, if time management is active. + auto time_abort = [&t_start, &moveOverhead, &limits]() -> bool { + auto t_end = std::chrono::steady_clock::now(); + return limits.use_time_management() + && 2 * std::chrono::duration(t_end - t_start).count() + > moveOverhead; + }; + + std::list sts; + + // Step 1, walk the PV to the last position in TB with correct decisive score + int ply = 0; + while (size_t(ply) < rootMove.pv.size()) + { + Move& pvMove = rootMove.pv[ply]; + + RootMoves legalMoves; + for (const auto& m : MoveList(pos)) + legalMoves.emplace_back(m); + + Tablebases::Config config = Tablebases::rank_root_moves(options, pos, legalMoves); + RootMove& rm = *std::find(legalMoves.begin(), legalMoves.end(), pvMove); + + if (legalMoves[0].tbRank != rm.tbRank) + break; + + ply++; + + auto& st = sts.emplace_back(); + pos.do_move(pvMove, st); + + // don't allow for repetitions or drawing moves along the PV in TB regime. + if (config.rootInTB && pos.is_draw(ply)) + { + pos.undo_move(pvMove); + ply--; + break; + } + + // Full PV shown will thus be validated and end TB. + // If we can't validate the full PV in time, we don't show it. + if (config.rootInTB && time_abort()) + break; + } + + // resize the PV to the correct part + rootMove.pv.resize(ply); + + // Step 2, now extend the PV to mate, as if the user explores syzygy-tables.info using + // top ranked moves (minimal DTZ), which gives optimal mates only for simple endgames e.g. KRvK + while (!pos.is_draw(0)) + { + if (time_abort()) + break; + + RootMoves legalMoves; + for (const auto& m : MoveList(pos)) + { + auto& rm = legalMoves.emplace_back(m); + StateInfo tmpSI; + pos.do_move(m, tmpSI); + // Give a score of each move to break DTZ ties + // restricting opponent mobility, but not giving the opponent a capture. + for (const auto& mOpp : MoveList(pos)) + rm.tbRank -= pos.capture(mOpp) ? 100 : 1; + pos.undo_move(m); + } + + // Mate found + if (legalMoves.size() == 0) + break; + + // sort moves according to their above assigned rank, + // This will break ties for moves with equal DTZ in rank_root_moves. + std::stable_sort( + legalMoves.begin(), legalMoves.end(), + [](const Search::RootMove& a, const Search::RootMove& b) { return a.tbRank > b.tbRank; }); + + // The winning side tries to minimize DTZ, the losing side maximizes it. + Tablebases::Config config = Tablebases::rank_root_moves(options, pos, legalMoves, true); + + // If DTZ is not available we might not find a mate, so we bail out. + if (!config.rootInTB || config.cardinality > 0) + break; + + ply++; + + Move& pvMove = legalMoves[0].pv[0]; + rootMove.pv.push_back(pvMove); + auto& st = sts.emplace_back(); + pos.do_move(pvMove, st); + } + + // Finding a draw in this function is an exceptional case, that cannot happen during engine game play, + // since we have a winning score, and play correctly with TB support. + // However, it can be that a position is draw due to the 50 move rule if it has been been reached + // on the board with a non-optimal 50 move counter e.g. 8/8/6k1/3B4/3K4/4N3/8/8 w - - 54 106 + // which TB with dtz counter rounding cannot always correctly rank. See also + // https://github.com/official-stockfish/Stockfish/issues/5175#issuecomment-2058893495 + // We adjust the score to match the found PV. Note that a TB loss score can be displayed + // if the engine did not find a drawing move yet, but eventually search will figure it out. + // E.g. 1kq5/q2r4/5K2/8/8/8/8/7Q w - - 96 1 + if (pos.is_draw(0)) + v = VALUE_DRAW; + + // Undo the PV moves. + for (auto it = rootMove.pv.rbegin(); it != rootMove.pv.rend(); ++it) + pos.undo_move(*it); + + // Inform if we couldn't get a full extension in time. + if (time_abort()) + sync_cout + << "info string Syzygy based PV extension requires more time, increase Move Overhead as needed." + << sync_endl; +} + +void SearchManager::pv(Search::Worker& worker, const ThreadPool& threads, const TranspositionTable& tt, - Depth depth) const { + Depth depth) { - const auto nodes = threads.nodes_searched(); - const auto& rootMoves = worker.rootMoves; - const auto& pos = worker.rootPos; - size_t pvIdx = worker.pvIdx; - TimePoint time = tm.elapsed_time() + 1; - size_t multiPV = std::min(size_t(worker.options["MultiPV"]), rootMoves.size()); - uint64_t tbHits = threads.tb_hits() + (worker.tbConfig.rootInTB ? rootMoves.size() : 0); + const auto nodes = threads.nodes_searched(); + auto& rootMoves = worker.rootMoves; + auto& pos = worker.rootPos; + size_t pvIdx = worker.pvIdx; + size_t multiPV = std::min(size_t(worker.options["MultiPV"]), rootMoves.size()); + uint64_t tbHits = threads.tb_hits() + (worker.tbConfig.rootInTB ? rootMoves.size() : 0); for (size_t i = 0; i < multiPV; ++i) { @@ -1984,6 +2121,13 @@ void SearchManager::pv(const Search::Worker& worker, bool tb = worker.tbConfig.rootInTB && std::abs(v) <= VALUE_TB; v = tb ? rootMoves[i].tbScore : v; + bool isExact = i != pvIdx || tb || !updated; // tablebase- and previous-scores are exact + + // Potentially correct and extend the PV, and in exceptional cases v + if (std::abs(v) >= VALUE_TB_WIN_IN_MAX_PLY && std::abs(v) < VALUE_MATE_IN_MAX_PLY + && ((!rootMoves[i].scoreLowerbound && !rootMoves[i].scoreUpperbound) || isExact)) + syzygy_extend_pv(worker.options, worker.limits, pos, rootMoves[i], v); + std::string pv; for (Move m : rootMoves[i].pv) pv += UCIEngine::move(m, pos.is_chess960()) + " "; @@ -2005,15 +2149,16 @@ void SearchManager::pv(const Search::Worker& worker, info.score = {v, pos}; info.wdl = wdl; - if (i == pvIdx && !tb && updated) // tablebase- and previous-scores are exact + if (!isExact) info.bound = bound; - info.timeMs = time; - info.nodes = nodes; - info.nps = nodes * 1000 / time; - info.tbHits = tbHits; - info.pv = pv; - info.hashfull = tt.hashfull(); + TimePoint time = tm.elapsed_time() + 1; + info.timeMs = time; + info.nodes = nodes; + info.nps = nodes * 1000 / time; + info.tbHits = tbHits; + info.pv = pv; + info.hashfull = tt.hashfull(); updates.onUpdateFull(info); } diff --git a/src/search.h b/src/search.h index d5210c2e..e8e33b1a 100644 --- a/src/search.h +++ b/src/search.h @@ -202,10 +202,10 @@ class SearchManager: public ISearchManager { void check_time(Search::Worker& worker) override; - void pv(const Search::Worker& worker, + void pv(Search::Worker& worker, const ThreadPool& threads, const TranspositionTable& tt, - Depth depth) const; + Depth depth); Stockfish::TimeManagement tm; double originalTimeAdjust; diff --git a/src/syzygy/tbprobe.cpp b/src/syzygy/tbprobe.cpp index 722dc9d3..fc2a092a 100644 --- a/src/syzygy/tbprobe.cpp +++ b/src/syzygy/tbprobe.cpp @@ -66,7 +66,7 @@ namespace { constexpr int TBPIECES = 7; // Max number of supported pieces constexpr int MAX_DTZ = - 1 << 18; // Max DTZ supported, large enough to deal with the syzygy TB limit. + 1 << 18; // Max DTZ supported times 2, large enough to deal with the syzygy TB limit. enum { BigEndian, @@ -1574,7 +1574,10 @@ int Tablebases::probe_dtz(Position& pos, ProbeState* result) { // Use the DTZ tables to rank root moves. // // A return value false indicates that not all probes were successful. -bool Tablebases::root_probe(Position& pos, Search::RootMoves& rootMoves, bool rule50) { +bool Tablebases::root_probe(Position& pos, + Search::RootMoves& rootMoves, + bool rule50, + bool rankDTZ) { ProbeState result = OK; StateInfo st; @@ -1585,7 +1588,7 @@ bool Tablebases::root_probe(Position& pos, Search::RootMoves& rootMoves, bool ru // Check whether a position was repeated since the last zeroing move. bool rep = pos.has_repeated(); - int dtz, bound = rule50 ? (MAX_DTZ - 100) : 1; + int dtz, bound = rule50 ? (MAX_DTZ / 2 - 100) : 1; // Probe and rank each move for (auto& m : rootMoves) @@ -1624,8 +1627,10 @@ bool Tablebases::root_probe(Position& pos, Search::RootMoves& rootMoves, bool ru // Better moves are ranked higher. Certain wins are ranked equally. // Losing moves are ranked equally unless a 50-move draw is in sight. - int r = dtz > 0 ? (dtz + cnt50 <= 99 && !rep ? MAX_DTZ : MAX_DTZ - (dtz + cnt50)) - : dtz < 0 ? (-dtz * 2 + cnt50 < 100 ? -MAX_DTZ : -MAX_DTZ + (-dtz + cnt50)) + int r = dtz > 0 ? (dtz + cnt50 <= 99 && !rep ? MAX_DTZ - (rankDTZ ? dtz : 0) + : MAX_DTZ / 2 - (dtz + cnt50)) + : dtz < 0 ? (-dtz * 2 + cnt50 < 100 ? -MAX_DTZ - (rankDTZ ? dtz : 0) + : -MAX_DTZ / 2 + (-dtz + cnt50)) : 0; m.tbRank = r; @@ -1633,10 +1638,11 @@ bool Tablebases::root_probe(Position& pos, Search::RootMoves& rootMoves, bool ru // 1 cp to cursed wins and let it grow to 49 cp as the positions gets // closer to a real win. m.tbScore = r >= bound ? VALUE_MATE - MAX_PLY - 1 - : r > 0 ? Value((std::max(3, r - (MAX_DTZ - 200)) * int(PawnValue)) / 200) - : r == 0 ? VALUE_DRAW - : r > -bound ? Value((std::min(-3, r + (MAX_DTZ - 200)) * int(PawnValue)) / 200) - : -VALUE_MATE + MAX_PLY + 1; + : r > 0 ? Value((std::max(3, r - (MAX_DTZ / 2 - 200)) * int(PawnValue)) / 200) + : r == 0 ? VALUE_DRAW + : r > -bound + ? Value((std::min(-3, r + (MAX_DTZ / 2 - 200)) * int(PawnValue)) / 200) + : -VALUE_MATE + MAX_PLY + 1; } return true; @@ -1683,7 +1689,8 @@ bool Tablebases::root_probe_wdl(Position& pos, Search::RootMoves& rootMoves, boo Config Tablebases::rank_root_moves(const OptionsMap& options, Position& pos, - Search::RootMoves& rootMoves) { + Search::RootMoves& rootMoves, + bool rankDTZ) { Config config; if (rootMoves.empty()) @@ -1707,7 +1714,7 @@ Config Tablebases::rank_root_moves(const OptionsMap& options, if (config.cardinality >= popcount(pos.pieces()) && !pos.can_castle(ANY_CASTLING)) { // Rank moves using DTZ tables - config.rootInTB = root_probe(pos, rootMoves, options["Syzygy50MoveRule"]); + config.rootInTB = root_probe(pos, rootMoves, options["Syzygy50MoveRule"], rankDTZ); if (!config.rootInTB) { diff --git a/src/syzygy/tbprobe.h b/src/syzygy/tbprobe.h index e10950f4..75a18585 100644 --- a/src/syzygy/tbprobe.h +++ b/src/syzygy/tbprobe.h @@ -66,9 +66,12 @@ extern int MaxCardinality; void init(const std::string& paths); WDLScore probe_wdl(Position& pos, ProbeState* result); int probe_dtz(Position& pos, ProbeState* result); -bool root_probe(Position& pos, Search::RootMoves& rootMoves, bool rule50); +bool root_probe(Position& pos, Search::RootMoves& rootMoves, bool rule50, bool rankDTZ); bool root_probe_wdl(Position& pos, Search::RootMoves& rootMoves, bool rule50); -Config rank_root_moves(const OptionsMap& options, Position& pos, Search::RootMoves& rootMoves); +Config rank_root_moves(const OptionsMap& options, + Position& pos, + Search::RootMoves& rootMoves, + bool rankDTZ = false); } // namespace Stockfish::Tablebases From c40dd26cbce89cf15055acac75800da6a9721307 Mon Sep 17 00:00:00 2001 From: Joost VandeVondele Date: Sat, 6 Jul 2024 17:31:54 +0200 Subject: [PATCH 158/315] CI give creditials for the clang-format action following up from earlier changes closes https://github.com/official-stockfish/Stockfish/pull/5450 No functional change --- .github/workflows/clang-format.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/clang-format.yml b/.github/workflows/clang-format.yml index 637cfc0d..630edbf9 100644 --- a/.github/workflows/clang-format.yml +++ b/.github/workflows/clang-format.yml @@ -19,7 +19,6 @@ jobs: - uses: actions/checkout@v4 with: ref: ${{ github.event.pull_request.head.sha }} - persist-credentials: false - name: Run clang-format style check uses: jidicula/clang-format-action@f62da5e3d3a2d88ff364771d9d938773a618ab5e # @v4.11.0 From d212e663bb00226f861f3046b36a5d8a3a127865 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ste=CC=81phane=20Nicolet?= Date: Sat, 6 Jul 2024 12:16:38 +0200 Subject: [PATCH 159/315] Introduction evaluation grain of 16 (and randomize) This patch uses an evaluation grain of 16 in order to get more cutoffs in the alpha-beta algorithm. For a discussion of the efficiency of alpha-beta related to changes in the number of discrete values of terminal nodes, see for instance section 9.1.2 of Judea Pearl's classical book "Heuristics" : https://mat.uab.cat/~alseda/MasterOpt/Judea_Pearl-Heuristics_Intelligent_Search_Strategies_for_Computer_Problem_Solving.pdf Moreover, we add a small (-1, +1) random component after the quantification to help the search exploration a little bit. This is similar in spirit to the (-1, +1) random component already present in the function draw_value() to make Stockfish more robust in draw evaluations. passed STC: LLR: 2.95 (-2.94,2.94) <0.00,2.00> Total: 220960 W: 57249 L: 56668 D: 107043 Ptnml(0-2): 499, 26017, 56882, 26568, 514 https://tests.stockfishchess.org/tests/view/668907fb7edfb6f233f999f8 passed LTC : LLR: 2.94 (-2.94,2.94) <0.50,2.50> Total: 48966 W: 12574 L: 12233 D: 24159 Ptnml(0-2): 14, 5233, 13654, 5562, 20 https://tests.stockfishchess.org/tests/view/6689105659cb3228a47598bf closes https://github.com/official-stockfish/Stockfish/pull/5449 bench: 1336007 --- src/evaluate.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index 4e895fd3..44890a36 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -85,6 +85,9 @@ Value Eval::evaluate(const Eval::NNUE::Networks& networks, int material = 554 * pos.count() + pos.non_pawn_material(); v = (nnue * (73921 + material) + optimism * (8112 + material)) / 73260; + // Evaluation grain (to get more alpha-beta cuts) with randomization (for robustness) + v = (v / 16) * 16 - 1 + (pos.key() & 0x2); + // Damp down the evaluation linearly when shuffling v -= v * pos.rule50_count() / 212; From daa9e217ab59a090a6344738505edbdfcd09a700 Mon Sep 17 00:00:00 2001 From: Muzhen Gaming <61100393+XInTheDark@users.noreply.github.com> Date: Sat, 6 Jul 2024 10:43:35 +0800 Subject: [PATCH 160/315] VVLTC search tune Passed VVLTC 1st sprt: https://tests.stockfishchess.org/tests/view/6688af640c9d7c1ab33ed327 LLR: 2.94 (-2.94,2.94) <0.00,2.00> Total: 16050 W: 4200 L: 3959 D: 7891 Ptnml(0-2): 0, 1383, 5018, 1624, 0 Passed VVLTC 2nd sprt: https://tests.stockfishchess.org/tests/view/6688e8900c9d7c1ab33efd60 LLR: 2.95 (-2.94,2.94) <0.50,2.50> Total: 44044 W: 11303 L: 10999 D: 21742 Ptnml(0-2): 1, 3973, 13772, 4273, 3 closes https://github.com/official-stockfish/Stockfish/pull/5444 Bench: 992058 --- src/search.cpp | 92 +++++++++++++++++++++++++------------------------- 1 file changed, 46 insertions(+), 46 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 023e5113..0863013e 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -70,8 +70,8 @@ static constexpr double EvalLevel[10] = {0.981, 0.956, 0.895, 0.949, 0.913, // Futility margin Value futility_margin(Depth d, bool noTtCutNode, bool improving, bool oppWorsening) { - Value futilityMult = 109 - 40 * noTtCutNode; - Value improvingDeduction = 59 * improving * futilityMult / 32; + Value futilityMult = 122 - 37 * noTtCutNode; + Value improvingDeduction = 58 * improving * futilityMult / 32; Value worseningDeduction = oppWorsening * futilityMult / 3; return futilityMult * d - improvingDeduction - worseningDeduction; @@ -84,15 +84,15 @@ constexpr int futility_move_count(bool improving, Depth depth) { // Add correctionHistory value to raw staticEval and guarantee evaluation does not hit the tablebase range Value to_corrected_static_eval(Value v, const Worker& w, const Position& pos) { auto cv = w.correctionHistory[pos.side_to_move()][pawn_structure_index(pos)]; - v += cv / 10; + v += cv * std::abs(cv) / 5073; return std::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1); } // History and stats update bonus, based on depth -int stat_bonus(Depth d) { return std::clamp(191 * d - 285, 20, 1412); } +int stat_bonus(Depth d) { return std::clamp(190 * d - 298, 20, 1596); } // History and stats update malus, based on depth -int stat_malus(Depth d) { return (d < 4 ? 727 * d - 260 : 1908); } +int stat_malus(Depth d) { return (d < 4 ? 736 * d - 268 : 2044); } // Add a small random component to draw evaluations to avoid 3-fold blindness Value value_draw(size_t nodes) { return VALUE_DRAW - 1 + Value(nodes & 0x2); } @@ -324,12 +324,12 @@ void Search::Worker::iterative_deepening() { // Reset aspiration window starting size Value avg = rootMoves[pvIdx].averageScore; - delta = 9 + avg * avg / 10182; + delta = 9 + avg * avg / 10424; alpha = std::max(avg - delta, -VALUE_INFINITE); beta = std::min(avg + delta, VALUE_INFINITE); // Adjust optimism based on root move's averageScore (~4 Elo) - optimism[us] = 127 * avg / (std::abs(avg) + 86); + optimism[us] = 125 * avg / (std::abs(avg) + 89); optimism[~us] = -optimism[us]; // Start with a small aspiration window and, in the case of a fail @@ -513,17 +513,17 @@ void Search::Worker::clear() { counterMoves.fill(Move::none()); mainHistory.fill(0); captureHistory.fill(-700); - pawnHistory.fill(-1193); + pawnHistory.fill(-1188); correctionHistory.fill(0); for (bool inCheck : {false, true}) for (StatsType c : {NoCaptures, Captures}) for (auto& to : continuationHistory[inCheck][c]) for (auto& h : to) - h->fill(-56); + h->fill(-58); for (size_t i = 1; i < reductions.size(); ++i) - reductions[i] = int((19.26 + std::log(size_t(options["Threads"])) / 2) * std::log(i)); + reductions[i] = int((18.62 + std::log(size_t(options["Threads"])) / 2) * std::log(i)); refreshTable.clear(networks[numaAccessToken]); } @@ -759,7 +759,7 @@ Value Search::Worker::search( // Use static evaluation difference to improve quiet move ordering (~9 Elo) if (((ss - 1)->currentMove).is_ok() && !(ss - 1)->inCheck && !priorCapture) { - int bonus = std::clamp(-10 * int((ss - 1)->staticEval + ss->staticEval), -1590, 1371) + 800; + int bonus = std::clamp(-10 * int((ss - 1)->staticEval + ss->staticEval), -1664, 1471) + 752; thisThread->mainHistory[~us][((ss - 1)->currentMove).from_to()] << bonus; if (type_of(pos.piece_on(prevSq)) != PAWN && ((ss - 1)->currentMove).type_of() != PROMOTION) thisThread->pawnHistory[pawn_structure_index(pos)][pos.piece_on(prevSq)][prevSq] @@ -780,7 +780,7 @@ Value Search::Worker::search( // Step 7. Razoring (~1 Elo) // If eval is really low check with qsearch if it can exceed alpha, if it can't, // return a fail low. - if (eval < alpha - 512 - 293 * depth * depth) + if (eval < alpha - 494 - 290 * depth * depth) { value = qsearch(pos, ss, alpha - 1, alpha); if (value < alpha && std::abs(value) < VALUE_TB_WIN_IN_MAX_PLY) @@ -791,22 +791,22 @@ Value Search::Worker::search( // The depth condition is important for mate finding. if (!ss->ttPv && depth < 13 && eval - futility_margin(depth, cutNode && !ss->ttHit, improving, opponentWorsening) - - (ss - 1)->statScore / 263 + - (ss - 1)->statScore / 260 >= beta && eval >= beta && (!ttData.move || ttCapture) && beta > VALUE_TB_LOSS_IN_MAX_PLY && eval < VALUE_TB_WIN_IN_MAX_PLY) return beta + (eval - beta) / 3; // Step 9. Null move search with verification search (~35 Elo) - if (!PvNode && (ss - 1)->currentMove != Move::null() && (ss - 1)->statScore < 14369 - && eval >= beta && ss->staticEval >= beta - 21 * depth + 393 && !excludedMove + if (!PvNode && (ss - 1)->currentMove != Move::null() && (ss - 1)->statScore < 14389 + && eval >= beta && ss->staticEval >= beta - 21 * depth + 390 && !excludedMove && pos.non_pawn_material(us) && ss->ply >= thisThread->nmpMinPly && beta > VALUE_TB_LOSS_IN_MAX_PLY) { assert(eval - beta >= 0); // Null move dynamic reduction based on depth and eval - Depth R = std::min(int(eval - beta) / 197, 6) + depth / 3 + 5; + Depth R = std::min(int(eval - beta) / 202, 6) + depth / 3 + 5; ss->currentMove = Move::null(); ss->continuationHistory = &thisThread->continuationHistory[0][0][NO_PIECE][0]; @@ -852,13 +852,13 @@ Value Search::Worker::search( // For cutNodes, if depth is high enough, decrease depth by 2 if there is no ttMove, or // by 1 if there is a ttMove with an upper bound. - if (cutNode && depth >= 8 && (!ttData.move || ttData.bound == BOUND_UPPER)) + if (cutNode && depth >= 7 && (!ttData.move || ttData.bound == BOUND_UPPER)) depth -= 1 + !ttData.move; // Step 11. ProbCut (~10 Elo) // If we have a good enough capture (or queen promotion) and a reduced search returns a value // much above beta, we can (almost) safely prune the previous move. - probCutBeta = beta + 177 - 57 * improving; + probCutBeta = beta + 184 - 53 * improving; if ( !PvNode && depth > 3 && std::abs(beta) < VALUE_TB_WIN_IN_MAX_PLY @@ -937,7 +937,7 @@ Value Search::Worker::search( moves_loop: // When in check, search starts here // Step 12. A small Probcut idea, when we are in check (~4 Elo) - probCutBeta = beta + 388; + probCutBeta = beta + 390; if (ss->inCheck && (ttData.bound & BOUND_LOWER) && ttData.depth >= depth - 4 && ttData.value >= probCutBeta && std::abs(ttData.value) < VALUE_TB_WIN_IN_MAX_PLY && std::abs(beta) < VALUE_TB_WIN_IN_MAX_PLY) @@ -1011,7 +1011,7 @@ moves_loop: // When in check, search starts here // Skip quiet moves if movecount exceeds our FutilityMoveCount threshold (~8 Elo) moveCountPruning = moveCount >= futility_move_count(improving, depth) - - (singularBound == BOUND_UPPER && singularValue < alpha - 50); + - (singularBound == BOUND_UPPER && singularValue < alpha - 51); // Reduced depth of the next LMR search int lmrDepth = newDepth - r; @@ -1025,15 +1025,15 @@ moves_loop: // When in check, search starts here // Futility pruning for captures (~2 Elo) if (!givesCheck && lmrDepth < 7 && !ss->inCheck) { - Value futilityValue = ss->staticEval + 294 + 246 * lmrDepth + Value futilityValue = ss->staticEval + 285 + 251 * lmrDepth + PieceValue[capturedPiece] + captHist / 7; if (futilityValue <= alpha) continue; } // SEE based pruning for captures and checks (~11 Elo) - int seeHist = std::clamp(captHist / 32, -180 * depth, 163 * depth); - if (!pos.see_ge(move, -163 * depth - seeHist)) + int seeHist = std::clamp(captHist / 32, -182 * depth, 166 * depth); + if (!pos.see_ge(move, -168 * depth - seeHist)) continue; } else @@ -1044,15 +1044,15 @@ moves_loop: // When in check, search starts here + thisThread->pawnHistory[pawn_structure_index(pos)][movedPiece][move.to_sq()]; // Continuation history based pruning (~2 Elo) - if (lmrDepth < 6 && history < -3899 * depth) + if (lmrDepth < 6 && history < -4165 * depth) continue; history += 2 * thisThread->mainHistory[us][move.from_to()]; - lmrDepth += history / 4040; + lmrDepth += history / 3853; Value futilityValue = - ss->staticEval + (bestValue < ss->staticEval - 51 ? 135 : 56) + 140 * lmrDepth; + ss->staticEval + (bestValue < ss->staticEval - 51 ? 143 : 52) + 135 * lmrDepth; // Futility pruning: parent node (~13 Elo) if (!ss->inCheck && lmrDepth < 12 && futilityValue <= alpha) @@ -1089,11 +1089,11 @@ moves_loop: // When in check, search starts here // margins scale well. if (!rootNode && move == ttData.move && !excludedMove - && depth >= 4 - (thisThread->completedDepth > 35) + ss->ttPv + && depth >= 4 - (thisThread->completedDepth > 36) + ss->ttPv && std::abs(ttData.value) < VALUE_TB_WIN_IN_MAX_PLY && (ttData.bound & BOUND_LOWER) && ttData.depth >= depth - 3) { - Value singularBeta = ttData.value - (52 + 80 * (ss->ttPv && !PvNode)) * depth / 64; + Value singularBeta = ttData.value - (54 + 76 * (ss->ttPv && !PvNode)) * depth / 64; Depth singularDepth = newDepth / 2; ss->excludedMove = move; @@ -1104,13 +1104,13 @@ moves_loop: // When in check, search starts here if (value < singularBeta) { - int doubleMargin = 290 * PvNode - 200 * !ttCapture; - int tripleMargin = 107 + 247 * PvNode - 278 * !ttCapture + 99 * ss->ttPv; + int doubleMargin = 293 * PvNode - 195 * !ttCapture; + int tripleMargin = 107 + 259 * PvNode - 260 * !ttCapture + 98 * ss->ttPv; extension = 1 + (value < singularBeta - doubleMargin) + (value < singularBeta - tripleMargin); - depth += ((!PvNode) && (depth < 18)); + depth += ((!PvNode) && (depth < 16)); } // Multi-cut pruning @@ -1140,7 +1140,7 @@ moves_loop: // When in check, search starts here else if (PvNode && move.to_sq() == prevSq && thisThread->captureHistory[movedPiece][move.to_sq()] [type_of(pos.piece_on(move.to_sq()))] - > 3922) + > 3994) extension = 1; } @@ -1197,10 +1197,10 @@ moves_loop: // When in check, search starts here ss->statScore = 2 * thisThread->mainHistory[us][move.from_to()] + (*contHist[0])[movedPiece][move.to_sq()] - + (*contHist[1])[movedPiece][move.to_sq()] - 4747; + + (*contHist[1])[movedPiece][move.to_sq()] - 4664; // Decrease/increase reduction for moves with a good/bad history (~8 Elo) - r -= ss->statScore / 11125; + r -= ss->statScore / 10898; // Step 17. Late moves reduction / extension (LMR, ~117 Elo) if (depth >= 2 && moveCount > 1 + rootNode) @@ -1343,7 +1343,7 @@ moves_loop: // When in check, search starts here else { // Reduce other moves if we have found at least one score improvement (~2 Elo) - if (depth > 2 && depth < 13 && std::abs(value) < VALUE_TB_WIN_IN_MAX_PLY) + if (depth > 2 && depth < 14 && std::abs(value) < VALUE_TB_WIN_IN_MAX_PLY) depth -= 2; assert(depth > 0); @@ -1386,13 +1386,13 @@ moves_loop: // When in check, search starts here // Bonus for prior countermove that caused the fail low else if (!priorCapture && prevSq != SQ_NONE) { - int bonus = (113 * (depth > 5) + 118 * (PvNode || cutNode) + 119 * ((ss - 1)->moveCount > 8) - + 64 * (!ss->inCheck && bestValue <= ss->staticEval - 107) - + 147 * (!(ss - 1)->inCheck && bestValue <= -(ss - 1)->staticEval - 75)); + int bonus = (114 * (depth > 5) + 116 * (PvNode || cutNode) + 123 * ((ss - 1)->moveCount > 8) + + 64 * (!ss->inCheck && bestValue <= ss->staticEval - 108) + + 153 * (!(ss - 1)->inCheck && bestValue <= -(ss - 1)->staticEval - 76)); // Proportional to "how much damage we have to undo" - if ((ss - 1)->statScore < -7850) - bonus += std::clamp(-(ss - 1)->statScore / 100, 0, 224); + if ((ss - 1)->statScore < -7865) + bonus += std::clamp(-(ss - 1)->statScore / 103, 0, 258); update_continuation_histories(ss - 1, pos.piece_on(prevSq), prevSq, stat_bonus(depth) * bonus / 100); @@ -1564,7 +1564,7 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta, if (bestValue > alpha) alpha = bestValue; - futilityBase = ss->staticEval + 294; + futilityBase = ss->staticEval + 299; } const PieceToHistory* contHist[] = {(ss - 1)->continuationHistory, @@ -1636,11 +1636,11 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta, + (*contHist[1])[pos.moved_piece(move)][move.to_sq()] + thisThread->pawnHistory[pawn_structure_index(pos)][pos.moved_piece(move)] [move.to_sq()] - <= 4452) + <= 4643) continue; // Do not search moves with bad enough SEE values (~5 Elo) - if (!pos.see_ge(move, -74)) + if (!pos.see_ge(move, -83)) continue; } @@ -1706,7 +1706,7 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta, Depth Search::Worker::reduction(bool i, Depth d, int mn, int delta) const { int reductionScale = reductions[d] * reductions[mn]; - return (reductionScale + 1236 - delta * 746 / rootDelta) / 1024 + (!i && reductionScale > 1326); + return (reductionScale + 1274 - delta * 746 / rootDelta) / 1024 + (!i && reductionScale > 1293); } // elapsed() returns the time elapsed since the search started. If the @@ -1809,7 +1809,7 @@ void update_all_stats(const Position& pos, if (!pos.capture_stage(bestMove)) { - int bestMoveBonus = bestValue > beta + 166 ? quietMoveBonus // larger bonus + int bestMoveBonus = bestValue > beta + 172 ? quietMoveBonus // larger bonus : stat_bonus(depth); // smaller bonus update_quiet_stats(pos, ss, workerThread, bestMove, bestMoveBonus); @@ -1847,7 +1847,7 @@ void update_all_stats(const Position& pos, // by moves at ply -1, -2, -3, -4, and -6 with current move. void update_continuation_histories(Stack* ss, Piece pc, Square to, int bonus) { - bonus = bonus * 51 / 64; + bonus = bonus * 52 / 64; for (int i : {1, 2, 3, 4, 6}) { From a45c2bc34ae03dd35402e6cf26d515bae1425517 Mon Sep 17 00:00:00 2001 From: Shawn Xu Date: Mon, 1 Jul 2024 17:08:22 -0700 Subject: [PATCH 161/315] Simplify Away Countermove Heuristic Passed Non-regression STC: LLR: 2.93 (-2.94,2.94) <-1.75,0.25> Total: 977824 W: 252072 L: 252888 D: 472864 Ptnml(0-2): 2518, 117120, 250560, 116088, 2626 https://tests.stockfishchess.org/tests/view/6683452d95b0d1e881e81541 Passed Non-regression LTC: LLR: 2.95 (-2.94,2.94) <-1.75,0.25> Total: 81048 W: 20630 L: 20470 D: 39948 Ptnml(0-2): 36, 8915, 22464, 9071, 38 https://tests.stockfishchess.org/tests/view/66886b7b0c9d7c1ab33ed281 closes https://github.com/official-stockfish/Stockfish/pull/5441 bench 1276784 --- src/movepick.cpp | 7 +------ src/movepick.h | 5 ----- src/search.cpp | 18 ++++-------------- src/search.h | 1 - 4 files changed, 5 insertions(+), 26 deletions(-) diff --git a/src/movepick.cpp b/src/movepick.cpp index 52e8c526..05f57ae7 100644 --- a/src/movepick.cpp +++ b/src/movepick.cpp @@ -91,7 +91,6 @@ MovePicker::MovePicker(const Position& p, const CapturePieceToHistory* cph, const PieceToHistory** ch, const PawnHistory* ph, - Move cm, const Move* killers) : pos(p), mainHistory(mh), @@ -99,7 +98,7 @@ MovePicker::MovePicker(const Position& p, continuationHistory(ch), pawnHistory(ph), ttMove(ttm), - refutations{{killers[0], 0}, {killers[1], 0}, {cm, 0}}, + refutations{{killers[0], 0}, {killers[1], 0}}, depth(d) { assert(d > 0); @@ -273,10 +272,6 @@ top: cur = std::begin(refutations); endMoves = std::end(refutations); - // If the countermove is the same as a killer, skip it - if (refutations[0] == refutations[2] || refutations[1] == refutations[2]) - --endMoves; - ++stage; [[fallthrough]]; diff --git a/src/movepick.h b/src/movepick.h index b81f76e1..8a2e0145 100644 --- a/src/movepick.h +++ b/src/movepick.h @@ -118,10 +118,6 @@ enum StatsType { // see www.chessprogramming.org/Butterfly_Boards (~11 elo) using ButterflyHistory = Stats; -// CounterMoveHistory stores counter moves indexed by [piece][to] of the previous -// move, see www.chessprogramming.org/Countermove_Heuristic -using CounterMoveHistory = Stats; - // CapturePieceToHistory is addressed by a move's [piece][to][captured piece type] using CapturePieceToHistory = Stats; @@ -164,7 +160,6 @@ class MovePicker { const CapturePieceToHistory*, const PieceToHistory**, const PawnHistory*, - Move, const Move*); MovePicker(const Position&, Move, diff --git a/src/search.cpp b/src/search.cpp index 0863013e..cd29176e 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -125,7 +125,7 @@ Value value_to_tt(Value v, int ply); Value value_from_tt(Value v, int ply, int r50c); void update_pv(Move* pv, Move move, const Move* childPv); void update_continuation_histories(Stack* ss, Piece pc, Square to, int bonus); -void update_refutations(const Position& pos, Stack* ss, Search::Worker& workerThread, Move move); +void update_refutations(Stack* ss, Move move); void update_quiet_histories( const Position& pos, Stack* ss, Search::Worker& workerThread, Move move, int bonus); void update_quiet_stats( @@ -510,7 +510,6 @@ void Search::Worker::iterative_deepening() { } void Search::Worker::clear() { - counterMoves.fill(Move::none()); mainHistory.fill(0); captureHistory.fill(-700); pawnHistory.fill(-1188); @@ -950,11 +949,9 @@ moves_loop: // When in check, search starts here nullptr, (ss - 6)->continuationHistory}; - Move countermove = - prevSq != SQ_NONE ? thisThread->counterMoves[pos.piece_on(prevSq)][prevSq] : Move::none(); MovePicker mp(pos, ttData.move, depth, &thisThread->mainHistory, &thisThread->captureHistory, - contHist, &thisThread->pawnHistory, countermove, ss->killers); + contHist, &thisThread->pawnHistory, ss->killers); value = bestValue; moveCountPruning = false; @@ -1860,7 +1857,7 @@ void update_continuation_histories(Stack* ss, Piece pc, Square to, int bonus) { } // Updates move sorting heuristics -void update_refutations(const Position& pos, Stack* ss, Search::Worker& workerThread, Move move) { +void update_refutations(Stack* ss, Move move) { // Update killers if (ss->killers[0] != move) @@ -1868,13 +1865,6 @@ void update_refutations(const Position& pos, Stack* ss, Search::Worker& workerTh ss->killers[1] = ss->killers[0]; ss->killers[0] = move; } - - // Update countermove history - if (((ss - 1)->currentMove).is_ok()) - { - Square prevSq = ((ss - 1)->currentMove).to_sq(); - workerThread.counterMoves[pos.piece_on(prevSq)][prevSq] = move; - } } void update_quiet_histories( @@ -1893,7 +1883,7 @@ void update_quiet_histories( void update_quiet_stats( const Position& pos, Stack* ss, Search::Worker& workerThread, Move move, int bonus) { - update_refutations(pos, ss, workerThread, move); + update_refutations(ss, move); update_quiet_histories(pos, ss, workerThread, move, bonus); } diff --git a/src/search.h b/src/search.h index e8e33b1a..122cd549 100644 --- a/src/search.h +++ b/src/search.h @@ -247,7 +247,6 @@ class Worker { bool is_mainthread() const { return threadIdx == 0; } // Public because they need to be updatable by the stats - CounterMoveHistory counterMoves; ButterflyHistory mainHistory; CapturePieceToHistory captureHistory; ContinuationHistory continuationHistory[2][2]; From ec8288fe0d81be31084cf4609767466b04458ec7 Mon Sep 17 00:00:00 2001 From: FauziAkram Date: Sat, 6 Jul 2024 14:26:31 +0300 Subject: [PATCH 162/315] Simplify away eval in TM Passed STC: LLR: 2.93 (-2.94,2.94) <-1.75,0.25> Total: 40160 W: 10523 L: 10309 D: 19328 Ptnml(0-2): 129, 4543, 10524, 4753, 131 https://tests.stockfishchess.org/tests/view/6685ab8b99271ae49479dbe9 Passed LTC: LLR: 2.95 (-2.94,2.94) <-1.75,0.25> Total: 195672 W: 49681 L: 49639 D: 96352 Ptnml(0-2): 112, 20976, 55597, 21060, 91 https://tests.stockfishchess.org/tests/view/6686f27a7092ade1206f7889 closes https://github.com/official-stockfish/Stockfish/pull/5445 No functional change --- src/search.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index cd29176e..2fd38e50 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -65,9 +65,6 @@ using namespace Search; namespace { -static constexpr double EvalLevel[10] = {0.981, 0.956, 0.895, 0.949, 0.913, - 0.942, 0.933, 0.890, 0.984, 0.941}; - // Futility margin Value futility_margin(Depth d, bool noTtCutNode, bool improving, bool oppWorsening) { Value futilityMult = 122 - 37 * noTtCutNode; @@ -463,11 +460,10 @@ void Search::Worker::iterative_deepening() { timeReduction = lastBestMoveDepth + 8 < completedDepth ? 1.495 : 0.687; double reduction = (1.48 + mainThread->previousTimeReduction) / (2.17 * timeReduction); double bestMoveInstability = 1 + 1.88 * totBestMoveChanges / threads.size(); - int el = std::clamp((bestValue + 750) / 150, 0, 9); double recapture = limits.capSq == rootMoves[0].pv[0].to_sq() ? 0.955 : 1.005; double totalTime = mainThread->tm.optimum() * fallingEval * reduction - * bestMoveInstability * EvalLevel[el] * recapture; + * bestMoveInstability * recapture; // Cap used time in case of a single legal move for a better viewer experience if (rootMoves.size() == 1) From 24ab46c5110d6f5c587f4929e23a13983babb759 Mon Sep 17 00:00:00 2001 From: Shawn Xu Date: Sat, 6 Jul 2024 04:45:37 -0700 Subject: [PATCH 163/315] Non-functional Fixes & Updates Fixes a missing default slot for dbg_extremes of, removes a extra newline, and updates SE elo estimate from https://tests.stockfishchess.org/tests/view/664ebd1e928b1fb18de4e4b7 while we are at it. closes https://github.com/official-stockfish/Stockfish/pull/5446 No functional change --- src/misc.h | 3 +-- src/search.cpp | 8 ++++---- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/misc.h b/src/misc.h index 0184ab88..ce49a1f6 100644 --- a/src/misc.h +++ b/src/misc.h @@ -67,8 +67,7 @@ std::optional read_file_to_string(const std::string& path); void dbg_hit_on(bool cond, int slot = 0); void dbg_mean_of(int64_t value, int slot = 0); void dbg_stdev_of(int64_t value, int slot = 0); -void dbg_extremes_of(int64_t value, int slot); - +void dbg_extremes_of(int64_t value, int slot = 0); void dbg_correl_of(int64_t value1, int64_t value2, int slot = 0); void dbg_print(); diff --git a/src/search.cpp b/src/search.cpp index 2fd38e50..2bdcd25a 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -462,8 +462,8 @@ void Search::Worker::iterative_deepening() { double bestMoveInstability = 1 + 1.88 * totBestMoveChanges / threads.size(); double recapture = limits.capSq == rootMoves[0].pv[0].to_sq() ? 0.955 : 1.005; - double totalTime = mainThread->tm.optimum() * fallingEval * reduction - * bestMoveInstability * recapture; + double totalTime = + mainThread->tm.optimum() * fallingEval * reduction * bestMoveInstability * recapture; // Cap used time in case of a single legal move for a better viewer experience if (rootMoves.size() == 1) @@ -1068,8 +1068,8 @@ moves_loop: // When in check, search starts here // We take care to not overdo to avoid search getting stuck. if (ss->ply < thisThread->rootDepth * 2) { - // Singular extension search (~94 Elo). If all moves but one fail low on a - // search of (alpha-s, beta-s), and just one fails high on (alpha, beta), + // Singular extension search (~76 Elo, ~170 nElo). If all moves but one fail + // low on a search of (alpha-s, beta-s), and just one fails high on (alpha, beta), // then that move is singular and should be extended. To verify this we do // a reduced search on the position excluding the ttMove and if the result // is lower than ttValue minus a margin, then we will extend the ttMove. From 55cb235d47afe8422cc781970ef69790387f42bc Mon Sep 17 00:00:00 2001 From: Muzhen Gaming <61100393+XInTheDark@users.noreply.github.com> Date: Sat, 6 Jul 2024 19:07:42 +0800 Subject: [PATCH 164/315] Simplify internal iterative reductions This is a revert of cc992e5. This patch is based on consistent observations that decreasing depth more in IIR generally has a bad scaling behaviour (good at STC, bad at longer time controls). Simplification STC: https://tests.stockfishchess.org/tests/view/6689266659cb3228a4759b26 LLR: 2.93 (-2.94,2.94) <-1.75,0.25> Total: 96992 W: 24977 L: 24824 D: 47191 Ptnml(0-2): 251, 11497, 24851, 11642, 255 Simplification LTC: https://tests.stockfishchess.org/tests/view/668930ffe59d990b103f6ab1 LLR: 2.95 (-2.94,2.94) <-1.75,0.25> Total: 35808 W: 9185 L: 8980 D: 17643 Ptnml(0-2): 25, 3776, 10101, 3973, 29 closes https://github.com/official-stockfish/Stockfish/pull/5447 Bench: 1097766 --- src/search.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 2bdcd25a..576e1f90 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -835,11 +835,8 @@ Value Search::Worker::search( // Step 10. Internal iterative reductions (~9 Elo) // For PV nodes without a ttMove, we decrease depth. - // Additionally, if the current position is found in the TT - // and the stored depth in the TT is greater than or equal to - // current search depth, we decrease search depth even further. if (PvNode && !ttData.move) - depth -= 3 + (ss->ttHit && ttData.depth >= depth); + depth -= 3; // Use qsearch if depth <= 0. if (depth <= 0) From 4d6e1225bd409c72a9b966c3008cf99a804c5026 Mon Sep 17 00:00:00 2001 From: Muzhen Gaming <61100393+XInTheDark@users.noreply.github.com> Date: Sat, 6 Jul 2024 19:08:28 +0800 Subject: [PATCH 165/315] Simplify ttPv reduction formula This is a revert of 2046c92. This patch is based on the fact that the ttPv reduction has proven non-linear scaling (as documented in the code, along with testing guidelines); however, the original patch had (erroneously) not been tested at VLTC or longer. Simplification STC: https://tests.stockfishchess.org/tests/view/6689266e59cb3228a4759b28 LLR: 2.95 (-2.94,2.94) <-1.75,0.25> Total: 100320 W: 25913 L: 25763 D: 48644 Ptnml(0-2): 270, 11842, 25750, 12064, 234 Simplification LTC: https://tests.stockfishchess.org/tests/view/66893103e59d990b103f6ab3 LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 57078 W: 14466 L: 14282 D: 28330 Ptnml(0-2): 34, 6214, 15851, 6414, 26 closes https://github.com/official-stockfish/Stockfish/pull/5448 Bench: 1124658 --- src/search.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 576e1f90..cb0340ec 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1158,8 +1158,7 @@ moves_loop: // When in check, search starts here // Decrease reduction if position is or has been on the PV (~7 Elo) if (ss->ttPv) - r -= 1 + (ttData.value > alpha) + (ttData.depth >= depth) - - (PvNode && ttData.value < alpha && ttData.depth >= depth); + r -= 1 + (ttData.value > alpha) + (ttData.depth >= depth); // Decrease reduction for PvNodes (~0 Elo on STC, ~2 Elo on LTC) if (PvNode) From b1f522930d58118a6035870fe7d02b3d82681ec8 Mon Sep 17 00:00:00 2001 From: Shawn Xu Date: Thu, 4 Jul 2024 23:39:10 -0700 Subject: [PATCH 166/315] Simplify Away Move Count Pruning Adjustment Using Singular Search Result Passed Non-regression STC: LLR: 3.01 (-2.94,2.94) <-1.75,0.25> Total: 62688 W: 16319 L: 16121 D: 30248 Ptnml(0-2): 196, 7317, 16104, 7547, 180 https://tests.stockfishchess.org/tests/view/66879bf51b527f04dd477ff9 Passed Non-regression LTC: LLR: 2.96 (-2.94,2.94) <-1.75,0.25> Total: 116502 W: 29504 L: 29379 D: 57619 Ptnml(0-2): 66, 12881, 32226, 13018, 60 https://tests.stockfishchess.org/tests/view/6688629e0c9d7c1ab33ed030 closes https://github.com/official-stockfish/Stockfish/pull/5442 bench 1207930 --- src/search.cpp | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index cb0340ec..ffe6e04b 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -559,12 +559,11 @@ Value Search::Worker::search( Key posKey; Move move, excludedMove, bestMove; Depth extension, newDepth; - Value bestValue, value, eval, maxValue, probCutBeta, singularValue; + Value bestValue, value, eval, maxValue, probCutBeta; bool givesCheck, improving, priorCapture, opponentWorsening; bool capture, moveCountPruning, ttCapture; Piece movedPiece; int moveCount, captureCount, quietCount; - Bound singularBound; // Step 1. Initialize node Worker* thisThread = this; @@ -948,8 +947,6 @@ moves_loop: // When in check, search starts here value = bestValue; moveCountPruning = false; - singularValue = VALUE_INFINITE; - singularBound = BOUND_NONE; // Step 13. Loop through all pseudo-legal moves until no moves remain // or a beta cutoff occurs. @@ -999,9 +996,7 @@ moves_loop: // When in check, search starts here if (!rootNode && pos.non_pawn_material(us) && bestValue > VALUE_TB_LOSS_IN_MAX_PLY) { // Skip quiet moves if movecount exceeds our FutilityMoveCount threshold (~8 Elo) - moveCountPruning = - moveCount >= futility_move_count(improving, depth) - - (singularBound == BOUND_UPPER && singularValue < alpha - 51); + moveCountPruning = moveCount >= futility_move_count(improving, depth); // Reduced depth of the next LMR search int lmrDepth = newDepth - r; @@ -1087,9 +1082,8 @@ moves_loop: // When in check, search starts here Depth singularDepth = newDepth / 2; ss->excludedMove = move; - value = singularValue = + value = search(pos, ss, singularBeta - 1, singularBeta, singularDepth, cutNode); - singularBound = singularValue >= singularBeta ? BOUND_LOWER : BOUND_UPPER; ss->excludedMove = Move::none(); if (value < singularBeta) From b79ac764ff1662b40d5480595bafb599b72512eb Mon Sep 17 00:00:00 2001 From: Shawn Xu Date: Mon, 1 Jul 2024 21:57:53 -0700 Subject: [PATCH 167/315] Simplify in-check condition for Probcut-in-check dont let your memes be dreams !? Passed Non-regression STC: LLR: 2.95 (-2.94,2.94) <-1.75,0.25> Total: 512000 W: 132193 L: 132497 D: 247310 Ptnml(0-2): 1600, 61170, 130704, 60986, 1540 https://tests.stockfishchess.org/tests/view/66838911c4f539faa03268a2 Passed Non-regression LTC: LLR: 2.95 (-2.94,2.94) <-1.75,0.25> Total: 380268 W: 95894 L: 96042 D: 188332 Ptnml(0-2): 193, 42861, 104180, 42701, 199 https://tests.stockfishchess.org/tests/view/6688d0550c9d7c1ab33ed5a8 closes https://github.com/official-stockfish/Stockfish/pull/5443 Bench: 1130282 --- src/search.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index ffe6e04b..1bce9daa 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -927,10 +927,9 @@ Value Search::Worker::search( moves_loop: // When in check, search starts here - // Step 12. A small Probcut idea, when we are in check (~4 Elo) + // Step 12. A small Probcut idea (~4 Elo) probCutBeta = beta + 390; - if (ss->inCheck && (ttData.bound & BOUND_LOWER) && ttData.depth >= depth - 4 - && ttData.value >= probCutBeta && std::abs(ttData.value) < VALUE_TB_WIN_IN_MAX_PLY + if ((ttData.bound & BOUND_LOWER) && ttData.depth >= depth - 4 && ttData.value >= probCutBeta && std::abs(beta) < VALUE_TB_WIN_IN_MAX_PLY) return probCutBeta; From 2d3ef434b4009fcc9e198b508f00957c2d05eb1e Mon Sep 17 00:00:00 2001 From: Shawn Xu Date: Sat, 6 Jul 2024 03:44:28 -0700 Subject: [PATCH 168/315] Tweak LMR at Root Passed STC: LLR: 2.93 (-2.94,2.94) <0.00,2.00> Total: 328192 W: 84751 L: 84014 D: 159427 Ptnml(0-2): 758, 38802, 84253, 39511, 772 https://tests.stockfishchess.org/tests/view/6689203959cb3228a4759a49 Passed LTC: LLR: 2.95 (-2.94,2.94) <0.50,2.50> Total: 56748 W: 14494 L: 14136 D: 28118 Ptnml(0-2): 19, 6089, 15803, 6441, 22 https://tests.stockfishchess.org/tests/view/66892d76e59d990b103f6626 closes https://github.com/official-stockfish/Stockfish/pull/5452 Bench 1253593 --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index 1bce9daa..9d0b5627 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1185,7 +1185,7 @@ moves_loop: // When in check, search starts here r -= ss->statScore / 10898; // Step 17. Late moves reduction / extension (LMR, ~117 Elo) - if (depth >= 2 && moveCount > 1 + rootNode) + if (depth >= 2 && moveCount > 1 + (rootNode && depth < 10)) { // In general we want to cap the LMR depth search at newDepth, but when // reduction is negative, we allow this move a limited search extension From bb9b65408ffe0f71eb60760e05c5d599300173da Mon Sep 17 00:00:00 2001 From: Linmiao Xu Date: Sat, 6 Jul 2024 13:41:11 -0400 Subject: [PATCH 169/315] Simplify improving deduction in futility margin Passed non-regression STC: https://tests.stockfishchess.org/tests/view/668981d4df142e108ffc9bb4 LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 312672 W: 80280 L: 80363 D: 152029 Ptnml(0-2): 729, 37198, 80529, 37187, 693 Passed non-regression LTC: https://tests.stockfishchess.org/tests/view/668988c6df142e108ffca042 LLR: 2.95 (-2.94,2.94) <-1.75,0.25> Total: 126042 W: 31971 L: 31857 D: 62214 Ptnml(0-2): 50, 13988, 34832, 14100, 51 closes https://github.com/official-stockfish/Stockfish/pull/5454 bench 1100483 --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index 9d0b5627..153eba18 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -68,7 +68,7 @@ namespace { // Futility margin Value futility_margin(Depth d, bool noTtCutNode, bool improving, bool oppWorsening) { Value futilityMult = 122 - 37 * noTtCutNode; - Value improvingDeduction = 58 * improving * futilityMult / 32; + Value improvingDeduction = improving * futilityMult * 2; Value worseningDeduction = oppWorsening * futilityMult / 3; return futilityMult * d - improvingDeduction - worseningDeduction; From 75c8cb2c2f7d687d3ba02eac2088860b625acd47 Mon Sep 17 00:00:00 2001 From: Michael Chaly Date: Sat, 6 Jul 2024 22:21:33 +0300 Subject: [PATCH 170/315] Adjust usage of previous statscore in bonus assignments This patch adjusts usage of previous statscore for bonus assginments - allowing it for any statscores and clamping it to wider range. Passed STC: https://tests.stockfishchess.org/tests/view/66892e76e59d990b103f6a91 LLR: 2.94 (-2.94,2.94) <0.00,2.00> Total: 431520 W: 111767 L: 110872 D: 208881 Ptnml(0-2): 1180, 51165, 110133, 52144, 1138 Passed LTC: https://tests.stockfishchess.org/tests/view/66897176e59d990b103f9605 LLR: 2.94 (-2.94,2.94) <0.50,2.50> Total: 143184 W: 36463 L: 35929 D: 70792 Ptnml(0-2): 55, 15540, 39863, 16084, 50 closes https://github.com/official-stockfish/Stockfish/pull/5455 bench 1330556 --- src/search.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 153eba18..9b0ea9df 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1373,8 +1373,7 @@ moves_loop: // When in check, search starts here + 153 * (!(ss - 1)->inCheck && bestValue <= -(ss - 1)->staticEval - 76)); // Proportional to "how much damage we have to undo" - if ((ss - 1)->statScore < -7865) - bonus += std::clamp(-(ss - 1)->statScore / 103, 0, 258); + bonus += std::clamp(-(ss - 1)->statScore / 100, -50, 274); update_continuation_histories(ss - 1, pos.piece_on(prevSq), prevSq, stat_bonus(depth) * bonus / 100); From 4e9fded5a63a2a72964f6d3518e2f66186662d05 Mon Sep 17 00:00:00 2001 From: Linmiao Xu Date: Sat, 6 Jul 2024 16:04:07 -0400 Subject: [PATCH 171/315] Larger bonus when updating quiet stats Also removes unused arguments to update_all_stats to fix compiler warnings about unused parameters. Passed non-regression STC: https://tests.stockfishchess.org/tests/view/6689a79a0fdd852d63cf52e9 LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 26496 W: 6901 L: 6669 D: 12926 Ptnml(0-2): 62, 3094, 6715, 3304, 73 Passed non-regression LTC: https://tests.stockfishchess.org/tests/view/6689a9960fdd852d63cf532d LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 41214 W: 10373 L: 10173 D: 20668 Ptnml(0-2): 11, 4491, 11412, 4673, 20 closes https://github.com/official-stockfish/Stockfish/pull/5456 bench 1169958 --- src/search.cpp | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 9b0ea9df..3f160047 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -131,8 +131,6 @@ void update_all_stats(const Position& pos, Stack* ss, Search::Worker& workerThread, Move bestMove, - Value bestValue, - Value beta, Square prevSq, Move* quietsSearched, int quietCount, @@ -1362,8 +1360,8 @@ moves_loop: // When in check, search starts here // If there is a move that produces search value greater than alpha we update the stats of searched moves else if (bestMove) - update_all_stats(pos, ss, *this, bestMove, bestValue, beta, prevSq, quietsSearched, - quietCount, capturesSearched, captureCount, depth); + update_all_stats(pos, ss, *this, bestMove, prevSq, quietsSearched, quietCount, + capturesSearched, captureCount, depth); // Bonus for prior countermove that caused the fail low else if (!priorCapture && prevSq != SQ_NONE) @@ -1772,8 +1770,6 @@ void update_all_stats(const Position& pos, Stack* ss, Search::Worker& workerThread, Move bestMove, - Value bestValue, - Value beta, Square prevSq, Move* quietsSearched, int quietCount, @@ -1790,10 +1786,7 @@ void update_all_stats(const Position& pos, if (!pos.capture_stage(bestMove)) { - int bestMoveBonus = bestValue > beta + 172 ? quietMoveBonus // larger bonus - : stat_bonus(depth); // smaller bonus - - update_quiet_stats(pos, ss, workerThread, bestMove, bestMoveBonus); + update_quiet_stats(pos, ss, workerThread, bestMove, quietMoveBonus); // Decrease stats for all non-best quiet moves for (int i = 0; i < quietCount; ++i) From cdb0b96e0725bb9aafc0ca9aecfebdae32eede8f Mon Sep 17 00:00:00 2001 From: MinetaS Date: Sun, 7 Jul 2024 08:27:43 +0900 Subject: [PATCH 172/315] Clean up refutations array in MovePicker This is a follow-up cleanup to a45c2bc34ae03dd35402e6cf26d515bae1425517. closes https://github.com/official-stockfish/Stockfish/pull/5458 No functional change --- src/movepick.cpp | 9 +++------ src/movepick.h | 2 +- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/src/movepick.cpp b/src/movepick.cpp index 05f57ae7..f6f9f0dc 100644 --- a/src/movepick.cpp +++ b/src/movepick.cpp @@ -297,9 +297,8 @@ top: [[fallthrough]]; case GOOD_QUIET : - if (!skipQuiets && select([&]() { - return *cur != refutations[0] && *cur != refutations[1] && *cur != refutations[2]; - })) + if (!skipQuiets + && select([&]() { return *cur != refutations[0] && *cur != refutations[1]; })) { if ((cur - 1)->value > -7998 || (cur - 1)->value <= quiet_threshold(depth)) return *(cur - 1); @@ -328,9 +327,7 @@ top: case BAD_QUIET : if (!skipQuiets) - return select([&]() { - return *cur != refutations[0] && *cur != refutations[1] && *cur != refutations[2]; - }); + return select([&]() { return *cur != refutations[0] && *cur != refutations[1]; }); return Move::none(); diff --git a/src/movepick.h b/src/movepick.h index 8a2e0145..2564f730 100644 --- a/src/movepick.h +++ b/src/movepick.h @@ -185,7 +185,7 @@ class MovePicker { const PieceToHistory** continuationHistory; const PawnHistory* pawnHistory; Move ttMove; - ExtMove refutations[3], *cur, *endMoves, *endBadCaptures, *beginBadQuiets, *endBadQuiets; + ExtMove refutations[2], *cur, *endMoves, *endBadCaptures, *beginBadQuiets, *endBadQuiets; int stage; int threshold; Depth depth; From 5752529cabb3270e055147709ff0847e4d59ec22 Mon Sep 17 00:00:00 2001 From: Linmiao Xu Date: Sat, 6 Jul 2024 09:31:35 -0400 Subject: [PATCH 173/315] Update default main net to nn-74f1d263ae9a.nnue Created by setting output weights (256) and biases (8) of the previous main net nn-ddcfb9224cdb.nnue to values found around 12k / 120k spsa games at 120+1.2 This used modified fishtest dev workers to construct .nnue files from spsa params, then load them with EvalFile when running tests: https://github.com/linrock/fishtest/tree/spsa-file-modified-nnue/worker Inspired by researching loading spsa params from files: https://github.com/official-stockfish/fishtest/pull/1926 Scripts for modifying nnue files and preparing params: https://github.com/linrock/nnue-pytorch/tree/no-gpu-modify-nnue spsa params: weights: [-127, 127], c_end = 6 biases: [-8192, 8192], c_end = 64 Example of reading output weights and biases from the previous main net using nnue-pytorch and printing spsa params in a format compatible with fishtest: ``` import features from serialize import NNUEReader feature_set = features.get_feature_set_from_name("HalfKAv2_hm") with open("nn-ddcfb9224cdb.nnue", "rb") as f: model = NNUEReader(f, feature_set).model c_end_weights = 6 c_end_biases = 64 for i in range(8): for j in range(32): value = round(int(model.layer_stacks.output.weight[i, j] * 600 * 16) / 127) print(f"oW[{i}][{j}],{value},-127,127,{c_end_weights},0.0020") for i in range(8): value = int(model.layer_stacks.output.bias[i] * 600 * 16) print(f"oB[{i}],{value},-8192,8192,{c_end_biases},0.0020") ``` For more info on spsa tuning params in nets: https://github.com/official-stockfish/Stockfish/pull/5149 https://github.com/official-stockfish/Stockfish/pull/5254 Passed STC: https://tests.stockfishchess.org/tests/view/66894d64e59d990b103f8a37 LLR: 2.94 (-2.94,2.94) <0.00,2.00> Total: 32000 W: 8443 L: 8137 D: 15420 Ptnml(0-2): 80, 3627, 8309, 3875, 109 Passed LTC: https://tests.stockfishchess.org/tests/view/6689668ce59d990b103f8b8b LLR: 2.94 (-2.94,2.94) <0.50,2.50> Total: 172176 W: 43822 L: 43225 D: 85129 Ptnml(0-2): 97, 18821, 47633, 19462, 75 closes https://github.com/official-stockfish/Stockfish/pull/5459 bench 1120091 --- src/evaluate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/evaluate.h b/src/evaluate.h index bdef9ceb..047c4a56 100644 --- a/src/evaluate.h +++ b/src/evaluate.h @@ -33,7 +33,7 @@ namespace Eval { // for the build process (profile-build and fishtest) to work. Do not change the // name of the macro or the location where this macro is defined, as it is used // in the Makefile/Fishtest. -#define EvalFileDefaultNameBig "nn-ddcfb9224cdb.nnue" +#define EvalFileDefaultNameBig "nn-74f1d263ae9a.nnue" #define EvalFileDefaultNameSmall "nn-37f18f62d772.nnue" namespace NNUE { From 5d3517c601c64d026824251784dd44f0cbf14873 Mon Sep 17 00:00:00 2001 From: Joost VandeVondele Date: Sun, 7 Jul 2024 11:23:50 +0200 Subject: [PATCH 174/315] Fix output for GUI Fritz 19 can hang with the current way to provide output, i.e. too much output in a short time for a mate / depth 245 found quickly. fallout from 25361e514bffb81284d4311601a9f7a4a7ced79b closes https://github.com/official-stockfish/Stockfish/pull/5460 No functional change --- src/search.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 3f160047..d22761e3 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -355,9 +355,10 @@ void Search::Worker::iterative_deepening() { break; // When failing high/low give some update before a re-search. - // To avoid excessive output, only start at rootDepth > 30. + // To avoid excessive output that could hang GUIs like Fritz 19, only start + // at nodes > 10M (rather than depth N, which can be reached quickly) if (mainThread && multiPV == 1 && (bestValue <= alpha || bestValue >= beta) - && rootDepth > 30) + && nodes > 10000000) main_manager()->pv(*this, threads, tt, rootDepth); // In case of failing low/high increase aspiration window and @@ -388,7 +389,7 @@ void Search::Worker::iterative_deepening() { std::stable_sort(rootMoves.begin() + pvFirst, rootMoves.begin() + pvIdx + 1); if (mainThread - && (threads.stop || pvIdx + 1 == multiPV || rootDepth > 30) + && (threads.stop || pvIdx + 1 == multiPV || nodes > 10000000) // A thread that aborted search can have mated-in/TB-loss PV and score // that cannot be trusted, i.e. it can be delayed or refuted if we would have // had time to fully search other root-moves. Thus we suppress this output and @@ -968,7 +969,7 @@ moves_loop: // When in check, search starts here ss->moveCount = ++moveCount; - if (rootNode && is_mainthread() && rootDepth > 30) + if (rootNode && is_mainthread() && nodes > 10000000) { main_manager()->updates.onIter( {depth, UCIEngine::move(move, pos.is_chess960()), moveCount + thisThread->pvIdx}); From eac2d080a3279358a79e35bfcecf016d01db97e4 Mon Sep 17 00:00:00 2001 From: Michael Chaly Date: Sun, 7 Jul 2024 22:25:10 +0300 Subject: [PATCH 175/315] Further simplify stat bonuses Based on recent simplification by linrock Since it completely removed any special bonuses based on values difference and made it flat stat_bonus(depth + 1) I got an idea that we might as well remove all (depth + 1) bonuses and make them usual depth bonuses. Also removes weird negative bonus for depth 1 as a side effect. Passed STC: https://tests.stockfishchess.org/tests/view/6689d817eca84f4d25863746 LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 18080 W: 4789 L: 4552 D: 8739 Ptnml(0-2): 46, 1987, 4727, 2244, 36 Passed LTC: https://tests.stockfishchess.org/tests/view/6689daa4eca84f4d258639d7 LLR: 2.95 (-2.94,2.94) <-1.75,0.25> Total: 109062 W: 27548 L: 27417 D: 54097 Ptnml(0-2): 58, 11983, 30293, 12164, 33 Passed direct LTC vs linrock patch: https://tests.stockfishchess.org/tests/view/668a46f8eca84f4d25866fe9 LLR: 2.95 (-2.94,2.94) <-1.75,0.25> Total: 100002 W: 25351 L: 25209 D: 49442 Ptnml(0-2): 54, 11119, 27529, 11229, 70 closes https://github.com/official-stockfish/Stockfish/pull/5461 Bench 1175744 --- src/search.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index d22761e3..ac0e59b5 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -86,7 +86,7 @@ Value to_corrected_static_eval(Value v, const Worker& w, const Position& pos) { } // History and stats update bonus, based on depth -int stat_bonus(Depth d) { return std::clamp(190 * d - 298, 20, 1596); } +int stat_bonus(Depth d) { return std::min(190 * d - 108, 1596); } // History and stats update malus, based on depth int stat_malus(Depth d) { return (d < 4 ? 736 * d - 268 : 2044); } @@ -1782,7 +1782,7 @@ void update_all_stats(const Position& pos, Piece moved_piece = pos.moved_piece(bestMove); PieceType captured; - int quietMoveBonus = stat_bonus(depth + 1); + int quietMoveBonus = stat_bonus(depth); int quietMoveMalus = stat_malus(depth); if (!pos.capture_stage(bestMove)) From acd0a933ad5beacaafeb89025b2e60802e993e43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ste=CC=81phane=20Nicolet?= Date: Tue, 9 Jul 2024 00:48:40 +0200 Subject: [PATCH 176/315] Fix compilation on Apple MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Always use the posix function posix_memalign() as aligned memory allocator on Apple computers. This should allow to compile Stockfish out of the box on all versions of Mac OS X. Patch tested on the following systems (apart from the CI) : • Mac OS 10.9.6 (arch x86-64-sse41-popcnt) with gcc-10 • Mac OS 10.13.6 (arch x86-64-bmi2) with gcc-10, gcc-14 and clang-11 • Mac OS 14.1.1 (arch apple-silicon) with clang-15 closes https://github.com/official-stockfish/Stockfish/pull/5462 No functional change --- src/memory.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/memory.cpp b/src/memory.cpp index 565b39b2..1769a661 100644 --- a/src/memory.cpp +++ b/src/memory.cpp @@ -68,12 +68,12 @@ namespace Stockfish { // does not guarantee the availability of aligned_alloc(). Memory allocated with // std_aligned_alloc() must be freed with std_aligned_free(). void* std_aligned_alloc(size_t alignment, size_t size) { - // Apple requires 10.15, which is enforced in the makefile -#if defined(_ISOC11_SOURCE) || defined(__APPLE__) +#if defined(_ISOC11_SOURCE) return aligned_alloc(alignment, size); #elif defined(POSIXALIGNEDALLOC) - void* mem; - return posix_memalign(&mem, alignment, size) ? nullptr : mem; + void* mem = nullptr; + posix_memalign(&mem, alignment, size); + return mem; #elif defined(_WIN32) && !defined(_M_ARM) && !defined(_M_ARM64) return _mm_malloc(size, alignment); #elif defined(_WIN32) From 4880ed4ad177f2cec50cfc784a6cbb65d31ff4ef Mon Sep 17 00:00:00 2001 From: Shawn Xu Date: Sat, 6 Jul 2024 20:07:01 -0700 Subject: [PATCH 177/315] Simplify Probcut Malus Passed Non-regression STC: LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 74880 W: 19261 L: 19083 D: 36536 Ptnml(0-2): 202, 8861, 19120, 9071, 186 https://tests.stockfishchess.org/tests/view/668a0966eca84f4d25864cba Passed Non-regression LTC: LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 263916 W: 66690 L: 66718 D: 130508 Ptnml(0-2): 125, 29348, 73040, 29320, 125 https://tests.stockfishchess.org/tests/view/668a17e3eca84f4d25864e91 closes https://github.com/official-stockfish/Stockfish/pull/5464 bench 1331408 --- src/search.cpp | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index ac0e59b5..3e6c56a6 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -861,8 +861,6 @@ Value Search::Worker::search( assert(probCutBeta < VALUE_INFINITE && probCutBeta > beta); MovePicker mp(pos, ttData.move, probCutBeta - ss->staticEval, &thisThread->captureHistory); - Move probcutCapturesSearched[32]; - int probcutCaptureCount = 0; Piece captured; while ((move = mp.next_move()) != Move::none()) @@ -900,25 +898,12 @@ Value Search::Worker::search( thisThread->captureHistory[movedPiece][move.to_sq()][type_of(captured)] << stat_bonus(depth - 2); - for (int i = 0; i < probcutCaptureCount; i++) - { - movedPiece = pos.moved_piece(probcutCapturesSearched[i]); - captured = pos.piece_on(probcutCapturesSearched[i].to_sq()); - - thisThread->captureHistory[movedPiece][probcutCapturesSearched[i].to_sq()] - [type_of(captured)] - << -stat_malus(depth - 3); - } - // Save ProbCut data into transposition table ttWriter.write(posKey, value_to_tt(value, ss->ply), ss->ttPv, BOUND_LOWER, depth - 3, move, unadjustedStaticEval, tt.generation()); return std::abs(value) < VALUE_TB_WIN_IN_MAX_PLY ? value - (probCutBeta - beta) : value; } - - if (probcutCaptureCount < 32) - probcutCapturesSearched[probcutCaptureCount++] = move; } Eval::NNUE::hint_common_parent_position(pos, networks[numaAccessToken], refreshTable); From b209f14b1ee0cda8cbd7fa3a8349e65701d1869f Mon Sep 17 00:00:00 2001 From: Linmiao Xu Date: Sun, 7 Jul 2024 15:13:40 -0400 Subject: [PATCH 178/315] Update default main net to nn-e8bac1c07a5a.nnue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created by modifying L2 weights from the previous main net (nn-74f1d263ae9a.nnue) with params found by spsa around 9k / 120k games at 120+1.2. 370 spsa params - L2 weights in nn-74f1d263ae9a.nnue where |val| >= 50 A: 6000, alpha: 0.602, gamma: 0.101 weights: [-127, 127], c_end = 6 To print the spsa params with nnue-pytorch: ``` import features from serialize import NNUEReader feature_set = features.get_feature_set_from_name("HalfKAv2_hm") with open("nn-74f1d263ae9a.nnue", "rb") as f: model = NNUEReader(f, feature_set).model c_end = 6 for i in range(8): for j in range(32): for k in range(30): value = int(model.layer_stacks.l2.weight[32 * i + j, k] * 64) if abs(value) >= 50: print(f"twoW[{i}][{j}][{k}],{value},-127,127,{c_end},0.0020") ``` Among the 370 params, 229 weights were changed. avg change: 0.0961 ± 1.67 range: [-4, 3] The number of weights changed, grouped by layer stack index, shows more weights were modified in the lower piece count buckets: [54, 52, 29, 23, 22, 18, 14, 17] Found with the same method described in: https://github.com/official-stockfish/Stockfish/pull/5459 Passed STC: https://tests.stockfishchess.org/tests/view/668aec9a58083e5fd88239e7 LLR: 3.00 (-2.94,2.94) <0.00,2.00> Total: 52384 W: 13569 L: 13226 D: 25589 Ptnml(0-2): 127, 6141, 13335, 6440, 149 Passed LTC: https://tests.stockfishchess.org/tests/view/668af50658083e5fd8823a0b LLR: 2.94 (-2.94,2.94) <0.50,2.50> Total: 46974 W: 12006 L: 11668 D: 23300 Ptnml(0-2): 25, 4992, 13121, 5318, 31 closes https://github.com/official-stockfish/Stockfish/pull/5466 bench 1300471 --- src/evaluate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/evaluate.h b/src/evaluate.h index 047c4a56..4b5f447e 100644 --- a/src/evaluate.h +++ b/src/evaluate.h @@ -33,7 +33,7 @@ namespace Eval { // for the build process (profile-build and fishtest) to work. Do not change the // name of the macro or the location where this macro is defined, as it is used // in the Makefile/Fishtest. -#define EvalFileDefaultNameBig "nn-74f1d263ae9a.nnue" +#define EvalFileDefaultNameBig "nn-e8bac1c07a5a.nnue" #define EvalFileDefaultNameSmall "nn-37f18f62d772.nnue" namespace NNUE { From 362a77a3450335e1940020c080bf3b7b361c594a Mon Sep 17 00:00:00 2001 From: yl25946 Date: Tue, 9 Jul 2024 00:53:04 -0500 Subject: [PATCH 179/315] Move Loop Consistency in Probcut In probcut move loop, everything is enclosed within a large if statement. I've changed it to guard clauses to stay consistent with other move loops. closes https://github.com/official-stockfish/Stockfish/pull/5463 No functional change --- AUTHORS | 1 + src/search.cpp | 83 +++++++++++++++++++++++++++----------------------- 2 files changed, 46 insertions(+), 38 deletions(-) diff --git a/AUTHORS b/AUTHORS index 6eefb56d..6957682f 100644 --- a/AUTHORS +++ b/AUTHORS @@ -129,6 +129,7 @@ Kojirion Krystian Kuzniarek (kuzkry) Leonardo Ljubičić (ICCF World Champion) Leonid Pechenik (lp--) +Li Ying (yl25946) Liam Keegan (lkeegan) Linmiao Xu (linrock) Linus Arver (listx) diff --git a/src/search.cpp b/src/search.cpp index 3e6c56a6..47c5dc88 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -864,47 +864,54 @@ Value Search::Worker::search( Piece captured; while ((move = mp.next_move()) != Move::none()) - if (move != excludedMove && pos.legal(move)) + { + assert(move.is_ok()); + + if (move == excludedMove) + continue; + + // Check for legality + if (!pos.legal(move)) + continue; + + assert(pos.capture_stage(move)); + + movedPiece = pos.moved_piece(move); + captured = pos.piece_on(move.to_sq()); + + + // Prefetch the TT entry for the resulting position + prefetch(tt.first_entry(pos.key_after(move))); + + ss->currentMove = move; + ss->continuationHistory = + &this->continuationHistory[ss->inCheck][true][pos.moved_piece(move)][move.to_sq()]; + + thisThread->nodes.fetch_add(1, std::memory_order_relaxed); + pos.do_move(move, st); + + // Perform a preliminary qsearch to verify that the move holds + value = -qsearch(pos, ss + 1, -probCutBeta, -probCutBeta + 1); + + // If the qsearch held, perform the regular search + if (value >= probCutBeta) + value = + -search(pos, ss + 1, -probCutBeta, -probCutBeta + 1, depth - 4, !cutNode); + + pos.undo_move(move); + + if (value >= probCutBeta) { - assert(pos.capture_stage(move)); + thisThread->captureHistory[movedPiece][move.to_sq()][type_of(captured)] + << stat_bonus(depth - 2); - movedPiece = pos.moved_piece(move); - captured = pos.piece_on(move.to_sq()); - - - // Prefetch the TT entry for the resulting position - prefetch(tt.first_entry(pos.key_after(move))); - - ss->currentMove = move; - ss->continuationHistory = - &this - ->continuationHistory[ss->inCheck][true][pos.moved_piece(move)][move.to_sq()]; - - thisThread->nodes.fetch_add(1, std::memory_order_relaxed); - pos.do_move(move, st); - - // Perform a preliminary qsearch to verify that the move holds - value = -qsearch(pos, ss + 1, -probCutBeta, -probCutBeta + 1); - - // If the qsearch held, perform the regular search - if (value >= probCutBeta) - value = -search(pos, ss + 1, -probCutBeta, -probCutBeta + 1, depth - 4, - !cutNode); - - pos.undo_move(move); - - if (value >= probCutBeta) - { - thisThread->captureHistory[movedPiece][move.to_sq()][type_of(captured)] - << stat_bonus(depth - 2); - - // Save ProbCut data into transposition table - ttWriter.write(posKey, value_to_tt(value, ss->ply), ss->ttPv, BOUND_LOWER, - depth - 3, move, unadjustedStaticEval, tt.generation()); - return std::abs(value) < VALUE_TB_WIN_IN_MAX_PLY ? value - (probCutBeta - beta) - : value; - } + // Save ProbCut data into transposition table + ttWriter.write(posKey, value_to_tt(value, ss->ply), ss->ttPv, BOUND_LOWER, + depth - 3, move, unadjustedStaticEval, tt.generation()); + return std::abs(value) < VALUE_TB_WIN_IN_MAX_PLY ? value - (probCutBeta - beta) + : value; } + } Eval::NNUE::hint_common_parent_position(pos, networks[numaAccessToken], refreshTable); } From 98a7bb4436f04505a17f37befab0207252e97897 Mon Sep 17 00:00:00 2001 From: Disservin Date: Wed, 10 Jul 2024 17:56:43 +0200 Subject: [PATCH 180/315] CI give correct permissions for the clang-format action closes https://github.com/official-stockfish/Stockfish/pull/5470 No functional change --- .github/workflows/clang-format.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/clang-format.yml b/.github/workflows/clang-format.yml index 630edbf9..452c2f2a 100644 --- a/.github/workflows/clang-format.yml +++ b/.github/workflows/clang-format.yml @@ -11,6 +11,10 @@ on: paths: - "**.cpp" - "**.h" + +permissions: + pull-requests: write + jobs: Clang-Format: name: Clang-Format @@ -39,6 +43,7 @@ jobs: _(execution **${{ github.run_id }}** / attempt **${{ github.run_attempt }}**)_ comment_tag: execution + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - name: Comment on PR if: steps.clang-format.outcome != 'failure' @@ -49,3 +54,4 @@ jobs: create_if_not_exists: false comment_tag: execution mode: delete + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} From 7e72b37e4ce3351399bb0ac08686bd84cdc4fba9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ste=CC=81phane=20Nicolet?= Date: Wed, 10 Jul 2024 14:51:48 +0200 Subject: [PATCH 181/315] Clean up comments in code - Capitalize comments - Reformat multi-lines comments to equalize the widths of the lines - Try to keep the width of comments around 85 characters - Remove periods at the end of single-line comments closes https://github.com/official-stockfish/Stockfish/pull/5469 No functional change --- src/engine.h | 2 +- src/memory.cpp | 26 ++- src/memory.h | 27 +-- src/misc.cpp | 19 +- src/movepick.cpp | 8 +- src/nnue/nnue_feature_transformer.h | 40 +++-- src/numa.h | 203 +++++++++++---------- src/position.h | 4 +- src/search.cpp | 268 ++++++++++++++-------------- src/search.h | 9 +- src/thread.cpp | 31 ++-- src/types.h | 30 ++-- 12 files changed, 356 insertions(+), 311 deletions(-) diff --git a/src/engine.h b/src/engine.h index 0d6f0f2b..127f7d7c 100644 --- a/src/engine.h +++ b/src/engine.h @@ -49,7 +49,7 @@ class Engine { Engine(std::string path = ""); - // Can't be movable due to components holding backreferences to fields + // Cannot be movable due to components holding backreferences to fields Engine(const Engine&) = delete; Engine(Engine&&) = delete; Engine& operator=(const Engine&) = delete; diff --git a/src/memory.cpp b/src/memory.cpp index 1769a661..ae303c53 100644 --- a/src/memory.cpp +++ b/src/memory.cpp @@ -49,10 +49,12 @@ #include // std::cerr #include // std::endl #include + // The needed Windows API for processor groups could be missed from old Windows // versions, so instead of calling them directly (forcing the linker to resolve // the calls at compile time), try to load them at runtime. To do this we need // first to define the corresponding function pointers. + extern "C" { using OpenProcessToken_t = bool (*)(HANDLE, DWORD, PHANDLE); using LookupPrivilegeValueA_t = bool (*)(LPCSTR, LPCSTR, PLUID); @@ -64,9 +66,10 @@ using AdjustTokenPrivileges_t = namespace Stockfish { -// Wrapper for systems where the c++17 implementation -// does not guarantee the availability of aligned_alloc(). Memory allocated with -// std_aligned_alloc() must be freed with std_aligned_free(). +// Wrappers for systems where the c++17 implementation does not guarantee the +// availability of aligned_alloc(). Memory allocated with std_aligned_alloc() +// must be freed with std_aligned_free(). + void* std_aligned_alloc(size_t alignment, size_t size) { #if defined(_ISOC11_SOURCE) return aligned_alloc(alignment, size); @@ -96,7 +99,8 @@ void std_aligned_free(void* ptr) { #endif } -// aligned_large_pages_alloc() will return suitably aligned memory, if possible using large pages. +// aligned_large_pages_alloc() will return suitably aligned memory, +// if possible using large pages. #if defined(_WIN32) @@ -135,6 +139,7 @@ static void* aligned_large_pages_alloc_windows([[maybe_unused]] size_t allocSize return nullptr; // We need SeLockMemoryPrivilege, so try to enable it for the process + if (!OpenProcessToken_f( // OpenProcessToken() GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hProcessToken)) return nullptr; @@ -149,8 +154,10 @@ static void* aligned_large_pages_alloc_windows([[maybe_unused]] size_t allocSize tp.Privileges[0].Luid = luid; tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; - // Try to enable SeLockMemoryPrivilege. Note that even if AdjustTokenPrivileges() succeeds, - // we still need to query GetLastError() to ensure that the privileges were actually obtained. + // Try to enable SeLockMemoryPrivilege. Note that even if AdjustTokenPrivileges() + // succeeds, we still need to query GetLastError() to ensure that the privileges + // were actually obtained. + if (AdjustTokenPrivileges_f(hProcessToken, FALSE, &tp, sizeof(TOKEN_PRIVILEGES), &prevTp, &prevTpLen) && GetLastError() == ERROR_SUCCESS) @@ -189,9 +196,9 @@ void* aligned_large_pages_alloc(size_t allocSize) { void* aligned_large_pages_alloc(size_t allocSize) { #if defined(__linux__) - constexpr size_t alignment = 2 * 1024 * 1024; // assumed 2MB page size + constexpr size_t alignment = 2 * 1024 * 1024; // 2MB page size assumed #else - constexpr size_t alignment = 4096; // assumed small page size + constexpr size_t alignment = 4096; // small page size assumed #endif // Round up to multiples of alignment @@ -206,7 +213,8 @@ void* aligned_large_pages_alloc(size_t allocSize) { #endif -// aligned_large_pages_free() will free the previously allocated ttmem +// aligned_large_pages_free() will free the previously memory allocated +// by aligned_large_pages_alloc(). The effect is a nop if mem == nullptr. #if defined(_WIN32) diff --git a/src/memory.h b/src/memory.h index ad7ca602..3155a5aa 100644 --- a/src/memory.h +++ b/src/memory.h @@ -33,13 +33,13 @@ namespace Stockfish { void* std_aligned_alloc(size_t alignment, size_t size); void std_aligned_free(void* ptr); -// memory aligned by page size, min alignment: 4096 bytes -void* aligned_large_pages_alloc(size_t size); -// nop if mem == nullptr -void aligned_large_pages_free(void* mem); -// frees memory which was placed there with placement new. -// works for both single objects and arrays of unknown bound +// Memory aligned by page size, min alignment: 4096 bytes +void* aligned_large_pages_alloc(size_t size); +void aligned_large_pages_free(void* mem); + +// Frees memory which was placed there with placement new. +// Works for both single objects and arrays of unknown bound. template void memory_deleter(T* ptr, FREE_FUNC free_func) { if (!ptr) @@ -53,15 +53,15 @@ void memory_deleter(T* ptr, FREE_FUNC free_func) { return; } -// frees memory which was placed there with placement new. -// works for both single objects and arrays of unknown bound +// Frees memory which was placed there with placement new. +// Works for both single objects and arrays of unknown bound. template void memory_deleter_array(T* ptr, FREE_FUNC free_func) { if (!ptr) return; - // Move back on the pointer to where the size is allocated. + // Move back on the pointer to where the size is allocated const size_t array_offset = std::max(sizeof(size_t), alignof(T)); char* raw_memory = reinterpret_cast(ptr) - array_offset; @@ -77,7 +77,7 @@ void memory_deleter_array(T* ptr, FREE_FUNC free_func) { free_func(raw_memory); } -// Allocates memory for a single object and places it there with placement new. +// Allocates memory for a single object and places it there with placement new template inline std::enable_if_t, T*> memory_allocator(ALLOC_FUNC alloc_func, Args&&... args) { @@ -86,7 +86,7 @@ inline std::enable_if_t, T*> memory_allocator(ALLOC_FUNC all return new (raw_memory) T(std::forward(args)...); } -// Allocates memory for an array of unknown bound and places it there with placement new. +// Allocates memory for an array of unknown bound and places it there with placement new template inline std::enable_if_t, std::remove_extent_t*> memory_allocator(ALLOC_FUNC alloc_func, size_t num) { @@ -94,7 +94,7 @@ memory_allocator(ALLOC_FUNC alloc_func, size_t num) { const size_t array_offset = std::max(sizeof(size_t), alignof(ElementType)); - // save the array size in the memory location + // Save the array size in the memory location char* raw_memory = reinterpret_cast(alloc_func(array_offset + num * sizeof(ElementType))); ASSERT_ALIGNED(raw_memory, alignof(T)); @@ -104,7 +104,8 @@ memory_allocator(ALLOC_FUNC alloc_func, size_t num) { for (size_t i = 0; i < num; ++i) new (raw_memory + array_offset + i * sizeof(ElementType)) ElementType(); - // Need to return the pointer at the start of the array so that the indexing in unique_ptr works + // Need to return the pointer at the start of the array so that + // the indexing in unique_ptr works. return reinterpret_cast(raw_memory + array_offset); } diff --git a/src/misc.cpp b/src/misc.cpp index b68c12b9..664ab4b8 100644 --- a/src/misc.cpp +++ b/src/misc.cpp @@ -112,14 +112,16 @@ class Logger { // Returns the full name of the current Stockfish version. -// For local dev compiles we try to append the commit sha and commit date -// from git if that fails only the local compilation date is set and "nogit" is specified: -// Stockfish dev-YYYYMMDD-SHA -// or -// Stockfish dev-YYYYMMDD-nogit +// +// For local dev compiles we try to append the commit SHA and +// commit date from git. If that fails only the local compilation +// date is set and "nogit" is specified: +// Stockfish dev-YYYYMMDD-SHA +// or +// Stockfish dev-YYYYMMDD-nogit // // For releases (non-dev builds) we only include the version number: -// Stockfish version +// Stockfish version std::string engine_info(bool to_uci) { std::stringstream ss; ss << "Stockfish " << version << std::setfill('0'); @@ -131,8 +133,9 @@ std::string engine_info(bool to_uci) { ss << stringify(GIT_DATE); #else constexpr std::string_view months("Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec"); - std::string month, day, year; - std::stringstream date(__DATE__); // From compiler, format is "Sep 21 2008" + + std::string month, day, year; + std::stringstream date(__DATE__); // From compiler, format is "Sep 21 2008" date >> month >> day >> year; ss << year << std::setw(2) << std::setfill('0') << (1 + months.find(month) / 4) diff --git a/src/movepick.cpp b/src/movepick.cpp index f6f9f0dc..c21b14a9 100644 --- a/src/movepick.cpp +++ b/src/movepick.cpp @@ -59,8 +59,8 @@ enum Stages { QCHECK }; -// Sort moves in descending order up to and including -// a given limit. The order of moves smaller than the limit is left unspecified. +// Sort moves in descending order up to and including a given limit. +// The order of moves smaller than the limit is left unspecified. void partial_insertion_sort(ExtMove* begin, ExtMove* end, int limit) { for (ExtMove *sortedEnd = begin, *p = begin + 1; p < end; ++p) @@ -125,8 +125,8 @@ MovePicker::MovePicker(const Position& p, stage = (pos.checkers() ? EVASION_TT : QSEARCH_TT) + !(ttm && pos.pseudo_legal(ttm)); } -// Constructor for ProbCut: we generate captures with SEE greater -// than or equal to the given threshold. +// Constructor for ProbCut: we generate captures with SEE greater than or equal +// to the given threshold. MovePicker::MovePicker(const Position& p, Move ttm, int th, const CapturePieceToHistory* cph) : pos(p), captureHistory(cph), diff --git a/src/nnue/nnue_feature_transformer.h b/src/nnue/nnue_feature_transformer.h index 483b84a8..ad0fb1b4 100644 --- a/src/nnue/nnue_feature_transformer.h +++ b/src/nnue/nnue_feature_transformer.h @@ -354,13 +354,18 @@ class FeatureTransformer { for (IndexType j = 0; j < NumOutputChunks; ++j) { - // What we want to do is multiply inputs in a pairwise manner (after clipping), and then shift right by 9. - // Instead, we shift left by 7, and use mulhi, stripping the bottom 16 bits, effectively shifting right by 16, - // resulting in a net shift of 9 bits. We use mulhi because it maintains the sign of the multiplication (unlike mullo), - // allowing us to make use of packus to clip 2 of the inputs, resulting in a save of 2 "vec_max_16" calls. - // A special case is when we use NEON, where we shift left by 6 instead, because the instruction "vqdmulhq_s16" - // also doubles the return value after the multiplication, adding an extra shift to the left by 1, so we - // compensate by shifting less before the multiplication. + // What we want to do is multiply inputs in a pairwise manner + // (after clipping), and then shift right by 9. Instead, we + // shift left by 7, and use mulhi, stripping the bottom 16 bits, + // effectively shifting right by 16, resulting in a net shift + // of 9 bits. We use mulhi because it maintains the sign of + // the multiplication (unlike mullo), allowing us to make use + // of packus to clip 2 of the inputs, resulting in a save of 2 + // "vec_max_16" calls. A special case is when we use NEON, + // where we shift left by 6 instead, because the instruction + // "vqdmulhq_s16" also doubles the return value after the + // multiplication, adding an extra shift to the left by 1, so + // we compensate by shifting less before the multiplication. #if defined(USE_SSE2) constexpr int shift = 7; @@ -426,10 +431,10 @@ class FeatureTransformer { } // NOTE: The parameter states_to_update is an array of position states. - // All states must be sequential, that is states_to_update[i] must either be reachable - // by repeatedly applying ->previous from states_to_update[i+1]. - // computed_st must be reachable by repeatedly applying ->previous on - // states_to_update[0]. + // All states must be sequential, that is states_to_update[i] must + // either be reachable by repeatedly applying ->previous from + // states_to_update[i+1], and computed_st must be reachable by + // repeatedly applying ->previous on states_to_update[0]. template void update_accumulator_incremental(const Position& pos, StateInfo* computed_st, @@ -446,7 +451,7 @@ class FeatureTransformer { #ifdef VECTOR // Gcc-10.2 unnecessarily spills AVX2 registers if this array - // is defined in the VECTOR code below, once in each branch + // is defined in the VECTOR code below, once in each branch. vec_t acc[NumRegs]; psqt_vec_t psqt[NumPsqtRegs]; #endif @@ -474,7 +479,8 @@ class FeatureTransformer { StateInfo* st = computed_st; - // Now update the accumulators listed in states_to_update[], where the last element is a sentinel. + // Now update the accumulators listed in states_to_update[], + // where the last element is a sentinel. #ifdef VECTOR if (N == 1 && (removed[0].size() == 1 || removed[0].size() == 2) && added[0].size() == 1) @@ -794,7 +800,7 @@ class FeatureTransformer { } // The accumulator of the refresh entry has been updated. - // Now copy its content to the actual accumulator we were refreshing + // Now copy its content to the actual accumulator we were refreshing. std::memcpy(accumulator.accumulation[Perspective], entry.accumulation, sizeof(BiasType) * HalfDimensions); @@ -827,7 +833,7 @@ class FeatureTransformer { if ((oldest_st->*accPtr).computed[Perspective]) { - // Only update current position accumulator to minimize work. + // Only update current position accumulator to minimize work StateInfo* states_to_update[1] = {pos.state()}; update_accumulator_incremental(pos, oldest_st, states_to_update); } @@ -846,8 +852,8 @@ class FeatureTransformer { if (next == nullptr) return; - // Now update the accumulators listed in states_to_update[], where the last element is a sentinel. - // Currently we update 2 accumulators. + // Now update the accumulators listed in states_to_update[], where + // the last element is a sentinel. Currently we update two accumulators: // 1. for the current position // 2. the next accumulator after the computed one // The heuristic may change in the future. diff --git a/src/numa.h b/src/numa.h index fd9abd4d..3de8281d 100644 --- a/src/numa.h +++ b/src/numa.h @@ -37,8 +37,8 @@ #include "memory.h" -// We support linux very well, but we explicitly do NOT support Android, because there's -// no affected systems, not worth maintaining. +// We support linux very well, but we explicitly do NOT support Android, +// because there is no affected systems, not worth maintaining. #if defined(__linux__) && !defined(__ANDROID__) #if !defined(_GNU_SOURCE) #define _GNU_SOURCE @@ -81,9 +81,9 @@ using NumaIndex = size_t; inline CpuIndex get_hardware_concurrency() { CpuIndex concurrency = std::thread::hardware_concurrency(); - // Get all processors across all processor groups on windows, since ::hardware_concurrency - // only returns the number of processors in the first group, because only these - // are available to std::thread. + // Get all processors across all processor groups on windows, since + // hardware_concurrency() only returns the number of processors in + // the first group, because only these are available to std::thread. #ifdef _WIN64 concurrency = std::max(concurrency, GetActiveProcessorCount(ALL_PROCESSOR_GROUPS)); #endif @@ -101,7 +101,7 @@ struct WindowsAffinity { // We also provide diagnostic for when the affinity is set to nullopt // whether it was due to being indeterminate. If affinity is indeterminate - // it's best to assume it is not set at all, so consistent with the meaning + // it is best to assume it is not set at all, so consistent with the meaning // of the nullopt affinity. bool isNewDeterminate = true; bool isOldDeterminate = true; @@ -119,23 +119,25 @@ struct WindowsAffinity { } // Since Windows 11 and Windows Server 2022 thread affinities can span - // processor groups and can be set as such by a new WinAPI function. - // However, we may need to force using the old API if we detect - // that the process has affinity set by the old API already and we want to override that. - // Due to the limitations of the old API we can't detect its use reliably. - // There will be cases where we detect not use but it has actually been used and vice versa. + // processor groups and can be set as such by a new WinAPI function. However, + // we may need to force using the old API if we detect that the process has + // affinity set by the old API already and we want to override that. Due to the + // limitations of the old API we cannot detect its use reliably. There will be + // cases where we detect not use but it has actually been used and vice versa. + bool likely_used_old_api() const { return oldApi.has_value() || !isOldDeterminate; } }; inline std::pair> get_process_group_affinity() { + // GetProcessGroupAffinity requires the GroupArray argument to be // aligned to 4 bytes instead of just 2. static constexpr size_t GroupArrayMinimumAlignment = 4; static_assert(GroupArrayMinimumAlignment >= alignof(USHORT)); // The function should succeed the second time, but it may fail if the group - // affinity has changed between GetProcessGroupAffinity calls. - // In such case we consider this a hard error, as we can't work with unstable affinities + // affinity has changed between GetProcessGroupAffinity calls. In such case + // we consider this a hard error, as we Cannot work with unstable affinities // anyway. static constexpr int MAX_TRIES = 2; USHORT GroupCount = 1; @@ -165,10 +167,10 @@ inline std::pair> get_process_group_affinity() { } // On Windows there are two ways to set affinity, and therefore 2 ways to get it. -// These are not consistent, so we have to check both. -// In some cases it is actually not possible to determine affinity. -// For example when two different threads have affinity on different processor groups, -// set using SetThreadAffinityMask, we can't retrieve the actual affinities. +// These are not consistent, so we have to check both. In some cases it is actually +// not possible to determine affinity. For example when two different threads have +// affinity on different processor groups, set using SetThreadAffinityMask, we cannot +// retrieve the actual affinities. // From documentation on GetProcessAffinityMask: // > If the calling process contains threads in multiple groups, // > the function returns zero for both affinity masks. @@ -196,8 +198,8 @@ inline WindowsAffinity get_process_affinity() { } else if (RequiredMaskCount > 0) { - // If RequiredMaskCount then these affinities were never set, but it's not consistent - // so GetProcessAffinityMask may still return some affinity. + // If RequiredMaskCount then these affinities were never set, but it's + // not consistent so GetProcessAffinityMask may still return some affinity. auto groupAffinities = std::make_unique(RequiredMaskCount); status = GetThreadSelectedCpuSetMasks_f(GetCurrentThread(), groupAffinities.get(), @@ -233,7 +235,7 @@ inline WindowsAffinity get_process_affinity() { DWORD_PTR proc, sys; status = GetProcessAffinityMask(GetCurrentProcess(), &proc, &sys); - // If proc == 0 then we can't determine affinity because it spans processor groups. + // If proc == 0 then we cannot determine affinity because it spans processor groups. // On Windows 11 and Server 2022 it will instead // > If, however, hHandle specifies a handle to the current process, the function // > always uses the calling thread's primary group (which by default is the same @@ -246,10 +248,12 @@ inline WindowsAffinity get_process_affinity() { return affinity; } - // If SetProcessAffinityMask was never called the affinity - // must span all processor groups, but if it was called it must only span one. + // If SetProcessAffinityMask was never called the affinity must span + // all processor groups, but if it was called it must only span one. + std::vector groupAffinity; // We need to capture this later and capturing // from structured bindings requires c++20. + std::tie(status, groupAffinity) = get_process_group_affinity(); if (status == 0) { @@ -282,11 +286,12 @@ inline WindowsAffinity get_process_affinity() { // If we got here it means that either SetProcessAffinityMask was never set // or we're on Windows 11/Server 2022. - // Since Windows 11 and Windows Server 2022 the behaviour of GetProcessAffinityMask changed - // > If, however, hHandle specifies a handle to the current process, the function - // > always uses the calling thread's primary group (which by default is the same - // > as the process' primary group) in order to set the - // > lpProcessAffinityMask and lpSystemAffinityMask. + // Since Windows 11 and Windows Server 2022 the behaviour of + // GetProcessAffinityMask changed: + // > If, however, hHandle specifies a handle to the current process, + // > the function always uses the calling thread's primary group + // > (which by default is the same as the process' primary group) + // > in order to set the lpProcessAffinityMask and lpSystemAffinityMask. // In which case we can actually retrieve the full affinity. if (GetThreadSelectedCpuSetMasks_f != nullptr) @@ -300,9 +305,11 @@ inline WindowsAffinity get_process_affinity() { const int numActiveProcessors = GetActiveProcessorCount(static_cast(procGroupIndex)); - // We have to schedule to 2 different processors and & the affinities we get. - // Otherwise our processor choice could influence the resulting affinity. - // We assume the processor IDs within the group are filled sequentially from 0. + // We have to schedule to two different processors + // and & the affinities we get. Otherwise our processor + // choice could influence the resulting affinity. + // We assume the processor IDs within the group are + // filled sequentially from 0. uint64_t procCombined = std::numeric_limits::max(); uint64_t sysCombined = std::numeric_limits::max(); @@ -346,8 +353,9 @@ inline WindowsAffinity get_process_affinity() { } } - // We have to detect the case where the affinity was not set, or is set to all processors - // so that we correctly produce as std::nullopt result. + // We have to detect the case where the affinity was not set, + // or is set to all processors so that we correctly produce as + // std::nullopt result. if (!isAffinityFull) { affinity.oldApi = std::move(cpus); @@ -369,16 +377,16 @@ inline std::set get_process_affinity() { std::set cpus; - // For unsupported systems, or in case of a soft error, we may assume all processors - // are available for use. + // For unsupported systems, or in case of a soft error, we may assume + // all processors are available for use. [[maybe_unused]] auto set_to_all_cpus = [&]() { for (CpuIndex c = 0; c < SYSTEM_THREADS_NB; ++c) cpus.insert(c); }; // cpu_set_t by default holds 1024 entries. This may not be enough soon, - // but there is no easy way to determine how many threads there actually is. - // In this case we just choose a reasonable upper bound. + // but there is no easy way to determine how many threads there actually + // is. In this case we just choose a reasonable upper bound. static constexpr CpuIndex MaxNumCpus = 1024 * 64; cpu_set_t* mask = CPU_ALLOC(MaxNumCpus); @@ -437,19 +445,19 @@ class NumaReplicatedAccessToken { NumaIndex n; }; -// Designed as immutable, because there is no good reason to alter an already existing config -// in a way that doesn't require recreating it completely, and it would be complex and expensive -// to maintain class invariants. -// The CPU (processor) numbers always correspond to the actual numbering used by the system. -// The NUMA node numbers MAY NOT correspond to the system's numbering of the NUMA nodes. -// In particular, empty nodes may be removed, or the user may create custom nodes. -// It is guaranteed that NUMA nodes are NOT empty, i.e. every node exposed by NumaConfig -// has at least one processor assigned. +// Designed as immutable, because there is no good reason to alter an already +// existing config in a way that doesn't require recreating it completely, and +// it would be complex and expensive to maintain class invariants. +// The CPU (processor) numbers always correspond to the actual numbering used +// by the system. The NUMA node numbers MAY NOT correspond to the system's +// numbering of the NUMA nodes. In particular, empty nodes may be removed, or +// the user may create custom nodes. It is guaranteed that NUMA nodes are NOT +// empty: every node exposed by NumaConfig has at least one processor assigned. // // We use startup affinities so as not to modify its own behaviour in time. // -// Until Stockfish doesn't support exceptions all places where an exception should be thrown -// are replaced by std::exit. +// Since Stockfish doesn't support exceptions all places where an exception +// should be thrown are replaced by std::exit. class NumaConfig { public: NumaConfig() : @@ -460,9 +468,9 @@ class NumaConfig { } // This function queries the system for the mapping of processors to NUMA nodes. - // On Linux we read from standardized kernel sysfs, with a fallback to single NUMA node. - // On Windows we utilize GetNumaProcessorNodeEx, which has its quirks, see - // comment for Windows implementation of get_process_affinity + // On Linux we read from standardized kernel sysfs, with a fallback to single NUMA + // node. On Windows we utilize GetNumaProcessorNodeEx, which has its quirks, see + // comment for Windows implementation of get_process_affinity. static NumaConfig from_system([[maybe_unused]] bool respectProcessAffinity = true) { NumaConfig cfg = empty(); @@ -479,7 +487,6 @@ class NumaConfig { // On Linux things are straightforward, since there's no processor groups and // any thread can be scheduled on all processors. - // We try to gather this information from the sysfs first // https://www.kernel.org/doc/Documentation/ABI/stable/sysfs-devices-node @@ -504,9 +511,9 @@ class NumaConfig { std::string path = std::string("/sys/devices/system/node/node") + std::to_string(n) + "/cpulist"; auto cpuIdsStr = read_file_to_string(path); - // Now, we only bail if the file does not exist. Some nodes may be empty, that's fine. - // An empty node still has a file that appears to have some whitespace, so we need - // to handle that. + // Now, we only bail if the file does not exist. Some nodes may be + // empty, that's fine. An empty node still has a file that appears + // to have some whitespace, so we need to handle that. if (!cpuIdsStr.has_value()) { fallback(); @@ -538,9 +545,10 @@ class NumaConfig { if (respectProcessAffinity) allowedCpus = STARTUP_PROCESSOR_AFFINITY.get_combined(); - // The affinity can't be determined in all cases on Windows, but we at least guarantee - // that the number of allowed processors is >= number of processors in the affinity mask. - // In case the user is not satisfied they must set the processor numbers explicitly. + // The affinity cannot be determined in all cases on Windows, + // but we at least guarantee that the number of allowed processors + // is >= number of processors in the affinity mask. In case the user + // is not satisfied they must set the processor numbers explicitly. auto is_cpu_allowed = [&allowedCpus](CpuIndex c) { return !allowedCpus.has_value() || allowedCpus->count(c) == 1; }; @@ -711,21 +719,22 @@ class NumaConfig { } bool suggests_binding_threads(CpuIndex numThreads) const { - // If we can reasonably determine that the threads can't be contained + // If we can reasonably determine that the threads cannot be contained // by the OS within the first NUMA node then we advise distributing // and binding threads. When the threads are not bound we can only use // NUMA memory replicated objects from the first node, so when the OS - // has to schedule on other nodes we lose performance. - // We also suggest binding if there's enough threads to distribute among nodes - // with minimal disparity. - // We try to ignore small nodes, in particular the empty ones. + // has to schedule on other nodes we lose performance. We also suggest + // binding if there's enough threads to distribute among nodes with minimal + // disparity. We try to ignore small nodes, in particular the empty ones. - // If the affinity set by the user does not match the affinity given by the OS - // then binding is necessary to ensure the threads are running on correct processors. + // If the affinity set by the user does not match the affinity given by + // the OS then binding is necessary to ensure the threads are running on + // correct processors. if (customAffinity) return true; - // We obviously can't distribute a single thread, so a single thread should never be bound. + // We obviously cannot distribute a single thread, so a single thread + // should never be bound. if (numThreads <= 1) return false; @@ -754,8 +763,8 @@ class NumaConfig { if (nodes.size() == 1) { - // special case for when there's no NUMA nodes - // doesn't buy us much, but let's keep the default path simple + // Special case for when there's no NUMA nodes. This doesn't buy us + // much, but let's keep the default path simple. ns.resize(numThreads, NumaIndex{0}); } else @@ -769,9 +778,11 @@ class NumaConfig { { float fill = static_cast(occupation[n] + 1) / static_cast(nodes[n].size()); - // NOTE: Do we want to perhaps fill the first available node up to 50% first before considering other nodes? - // Probably not, because it would interfere with running multiple instances. We basically shouldn't - // favor any particular node. + // NOTE: Do we want to perhaps fill the first available node + // up to 50% first before considering other nodes? + // Probably not, because it would interfere with running + // multiple instances. We basically shouldn't favor any + // particular node. if (fill < bestNodeFill) { bestNode = n; @@ -816,18 +827,19 @@ class NumaConfig { #elif defined(_WIN64) - // Requires Windows 11. No good way to set thread affinity spanning processor groups before that. + // Requires Windows 11. No good way to set thread affinity spanning + // processor groups before that. HMODULE k32 = GetModuleHandle(TEXT("Kernel32.dll")); auto SetThreadSelectedCpuSetMasks_f = SetThreadSelectedCpuSetMasks_t( (void (*)()) GetProcAddress(k32, "SetThreadSelectedCpuSetMasks")); - // We ALWAYS set affinity with the new API if available, - // because there's no downsides, and we forcibly keep it consistent - // with the old API should we need to use it. I.e. we always keep this as a superset - // of what we set with SetThreadGroupAffinity. + // We ALWAYS set affinity with the new API if available, because + // there's no downsides, and we forcibly keep it consistent with + // the old API should we need to use it. I.e. we always keep this + // as a superset of what we set with SetThreadGroupAffinity. if (SetThreadSelectedCpuSetMasks_f != nullptr) { - // Only available on Windows 11 and Windows Server 2022 onwards. + // Only available on Windows 11 and Windows Server 2022 onwards const USHORT numProcGroups = USHORT( ((highestCpuIndex + 1) + WIN_PROCESSOR_GROUP_SIZE - 1) / WIN_PROCESSOR_GROUP_SIZE); auto groupAffinities = std::make_unique(numProcGroups); @@ -857,22 +869,25 @@ class NumaConfig { // Sometimes we need to force the old API, but do not use it unless necessary. if (SetThreadSelectedCpuSetMasks_f == nullptr || STARTUP_USE_OLD_AFFINITY_API) { - // On earlier windows version (since windows 7) we can't run a single thread + // On earlier windows version (since windows 7) we cannot run a single thread // on multiple processor groups, so we need to restrict the group. // We assume the group of the first processor listed for this node. // Processors from outside this group will not be assigned for this thread. // Normally this won't be an issue because windows used to assign NUMA nodes - // such that they can't span processor groups. However, since Windows 10 Build 20348 - // the behaviour changed, so there's a small window of versions between this and Windows 11 - // that might exhibit problems with not all processors being utilized. - // We handle this in NumaConfig::from_system by manually splitting the nodes when - // we detect that there's no function to set affinity spanning processor nodes. - // This is required because otherwise our thread distribution code may produce - // suboptimal results. + // such that they cannot span processor groups. However, since Windows 10 + // Build 20348 the behaviour changed, so there's a small window of versions + // between this and Windows 11 that might exhibit problems with not all + // processors being utilized. + // + // We handle this in NumaConfig::from_system by manually splitting the + // nodes when we detect that there is no function to set affinity spanning + // processor nodes. This is required because otherwise our thread distribution + // code may produce suboptimal results. + // // See https://learn.microsoft.com/en-us/windows/win32/procthread/numa-support GROUP_AFFINITY affinity; std::memset(&affinity, 0, sizeof(GROUP_AFFINITY)); - // We use an ordered set so we're guaranteed to get the smallest cpu number here. + // We use an ordered set to be sure to get the smallest cpu number here. const size_t forcedProcGroupIndex = *(nodes[n].begin()) / WIN_PROCESSOR_GROUP_SIZE; affinity.Group = static_cast(forcedProcGroupIndex); for (CpuIndex c : nodes[n]) @@ -894,8 +909,8 @@ class NumaConfig { if (status == 0) std::exit(EXIT_FAILURE); - // We yield this thread just to be sure it gets rescheduled. - // This is defensive, allowed because this code is not performance critical. + // We yield this thread just to be sure it gets rescheduled. This is + // defensive, allowed because this code is not performance critical. SwitchToThread(); } @@ -1013,8 +1028,8 @@ class NumaConfig { class NumaReplicationContext; -// Instances of this class are tracked by the NumaReplicationContext instance -// NumaReplicationContext informs all tracked instances whenever NUMA configuration changes. +// Instances of this class are tracked by the NumaReplicationContext instance. +// NumaReplicationContext informs all tracked instances when NUMA configuration changes. class NumaReplicatedBase { public: NumaReplicatedBase(NumaReplicationContext& ctx); @@ -1034,9 +1049,9 @@ class NumaReplicatedBase { NumaReplicationContext* context; }; -// We force boxing with a unique_ptr. If this becomes an issue due to added indirection we -// may need to add an option for a custom boxing type. -// When the NUMA config changes the value stored at the index 0 is replicated to other nodes. +// We force boxing with a unique_ptr. If this becomes an issue due to added +// indirection we may need to add an option for a custom boxing type. When the +// NUMA config changes the value stored at the index 0 is replicated to other nodes. template class NumaReplicated: public NumaReplicatedBase { public: @@ -1090,8 +1105,8 @@ class NumaReplicated: public NumaReplicatedBase { } void on_numa_config_changed() override { - // Use the first one as the source. It doesn't matter which one we use, because they all must - // be identical, but the first one is guaranteed to exist. + // Use the first one as the source. It doesn't matter which one we use, + // because they all must be identical, but the first one is guaranteed to exist. auto source = std::move(instances[0]); replicate_from(std::move(*source)); } @@ -1167,7 +1182,7 @@ class NumaReplicationContext { private: NumaConfig config; - // std::set uses std::less by default, which is required for pointer comparison to be defined. + // std::set uses std::less by default, which is required for pointer comparison std::set trackedReplicatedObjects; }; diff --git a/src/position.h b/src/position.h index 3cfb87d0..064dd5fa 100644 --- a/src/position.h +++ b/src/position.h @@ -315,8 +315,8 @@ inline bool Position::capture(Move m) const { } // Returns true if a move is generated from the capture stage, having also -// queen promotions covered, i.e. consistency with the capture stage move generation -// is needed to avoid the generation of duplicate moves. +// queen promotions covered, i.e. consistency with the capture stage move +// generation is needed to avoid the generation of duplicate moves. inline bool Position::capture_stage(Move m) const { assert(m.is_ok()); return capture(m) || m.promotion_type() == QUEEN; diff --git a/src/search.cpp b/src/search.cpp index 47c5dc88..d3d95eda 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -78,7 +78,8 @@ constexpr int futility_move_count(bool improving, Depth depth) { return improving ? (3 + depth * depth) : (3 + depth * depth) / 2; } -// Add correctionHistory value to raw staticEval and guarantee evaluation does not hit the tablebase range +// Add correctionHistory value to raw staticEval and guarantee evaluation +// does not hit the tablebase range. Value to_corrected_static_eval(Value v, const Worker& w, const Position& pos) { auto cv = w.correctionHistory[pos.side_to_move()][pawn_structure_index(pos)]; v += cv * std::abs(cv) / 5073; @@ -333,8 +334,8 @@ void Search::Worker::iterative_deepening() { int failedHighCnt = 0; while (true) { - // Adjust the effective depth searched, but ensure at least one effective increment - // for every four searchAgain steps (see issue #2717). + // Adjust the effective depth searched, but ensure at least one + // effective increment for every four searchAgain steps (see issue #2717). Depth adjustedDepth = std::max(1, rootDepth - failedHighCnt - 3 * (searchAgainCounter + 1) / 4); rootDelta = beta - alpha; @@ -354,15 +355,15 @@ void Search::Worker::iterative_deepening() { if (threads.stop) break; - // When failing high/low give some update before a re-search. - // To avoid excessive output that could hang GUIs like Fritz 19, only start + // When failing high/low give some update before a re-search. To avoid + // excessive output that could hang GUIs like Fritz 19, only start // at nodes > 10M (rather than depth N, which can be reached quickly) if (mainThread && multiPV == 1 && (bestValue <= alpha || bestValue >= beta) && nodes > 10000000) main_manager()->pv(*this, threads, tt, rootDepth); - // In case of failing low/high increase aspiration window and - // re-search, otherwise exit the loop. + // In case of failing low/high increase aspiration window and re-search, + // otherwise exit the loop. if (bestValue <= alpha) { beta = (alpha + beta) / 2; @@ -390,10 +391,11 @@ void Search::Worker::iterative_deepening() { if (mainThread && (threads.stop || pvIdx + 1 == multiPV || nodes > 10000000) - // A thread that aborted search can have mated-in/TB-loss PV and score - // that cannot be trusted, i.e. it can be delayed or refuted if we would have - // had time to fully search other root-moves. Thus we suppress this output and - // below pick a proven score/PV for this thread (from the previous iteration). + // A thread that aborted search can have mated-in/TB-loss PV and + // score that cannot be trusted, i.e. it can be delayed or refuted + // if we would have had time to fully search other root-moves. Thus + // we suppress this output and below pick a proven score/PV for this + // thread (from the previous iteration). && !(threads.abortedSearch && rootMoves[0].uciScore <= VALUE_TB_LOSS_IN_MAX_PLY)) main_manager()->pv(*this, threads, tt, rootDepth); @@ -504,6 +506,7 @@ void Search::Worker::iterative_deepening() { skill.best ? skill.best : skill.pick_best(rootMoves, multiPV))); } +// Reset histories, usually before a new game void Search::Worker::clear() { mainHistory.fill(0); captureHistory.fill(-700); @@ -523,7 +526,7 @@ void Search::Worker::clear() { } -// Main search function for both PV and non-PV nodes. +// Main search function for both PV and non-PV nodes template Value Search::Worker::search( Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, bool cutNode) { @@ -538,7 +541,7 @@ Value Search::Worker::search( // Limit the depth if extensions made it too large depth = std::min(depth, MAX_PLY - 1); - // Check if we have an upcoming move that draws by repetition. + // Check if we have an upcoming move that draws by repetition if (!rootNode && alpha < VALUE_DRAW && pos.upcoming_repetition(ss->ply)) { alpha = value_draw(this->nodes); @@ -611,7 +614,7 @@ Value Search::Worker::search( Square prevSq = ((ss - 1)->currentMove).is_ok() ? ((ss - 1)->currentMove).to_sq() : SQ_NONE; ss->statScore = 0; - // Step 4. Transposition table lookup. + // Step 4. Transposition table lookup excludedMove = ss->excludedMove; posKey = pos.key(); auto [ttHit, ttData, ttWriter] = tt.probe(posKey); @@ -676,7 +679,7 @@ Value Search::Worker::search( Value tbValue = VALUE_TB - ss->ply; - // use the range VALUE_TB to VALUE_TB_WIN_IN_MAX_PLY to score + // Use the range VALUE_TB to VALUE_TB_WIN_IN_MAX_PLY to score value = wdl < -drawScore ? -tbValue : wdl > drawScore ? tbValue : VALUE_DRAW + 2 * wdl * drawScore; @@ -771,8 +774,8 @@ Value Search::Worker::search( opponentWorsening = ss->staticEval + (ss - 1)->staticEval > 2; // Step 7. Razoring (~1 Elo) - // If eval is really low check with qsearch if it can exceed alpha, if it can't, - // return a fail low. + // If eval is really low, check with qsearch if we can exceed alpha. If the + // search suggests we cannot exceed alpha, return a speculative fail low. if (eval < alpha - 494 - 290 * depth * depth) { value = qsearch(pos, ss, alpha - 1, alpha); @@ -836,27 +839,26 @@ Value Search::Worker::search( if (PvNode && !ttData.move) depth -= 3; - // Use qsearch if depth <= 0. + // Use qsearch if depth <= 0 if (depth <= 0) return qsearch(pos, ss, alpha, beta); - // For cutNodes, if depth is high enough, decrease depth by 2 if there is no ttMove, or - // by 1 if there is a ttMove with an upper bound. + // For cutNodes, if depth is high enough, decrease depth by 2 if there is no ttMove, + // or by 1 if there is a ttMove with an upper bound. if (cutNode && depth >= 7 && (!ttData.move || ttData.bound == BOUND_UPPER)) depth -= 1 + !ttData.move; // Step 11. ProbCut (~10 Elo) - // If we have a good enough capture (or queen promotion) and a reduced search returns a value - // much above beta, we can (almost) safely prune the previous move. + // If we have a good enough capture (or queen promotion) and a reduced search + // returns a value much above beta, we can (almost) safely prune the previous move. probCutBeta = beta + 184 - 53 * improving; - if ( - !PvNode && depth > 3 - && std::abs(beta) < VALUE_TB_WIN_IN_MAX_PLY - // If value from transposition table is lower than probCutBeta, don't attempt probCut - // there and in further interactions with transposition table cutoff depth is set to depth - 3 - // because probCut search has depth set to depth - 4 but we also do a move before it - // So effective depth is equal to depth - 3 - && !(ttData.depth >= depth - 3 && ttData.value != VALUE_NONE && ttData.value < probCutBeta)) + if (!PvNode && depth > 3 + && std::abs(beta) < VALUE_TB_WIN_IN_MAX_PLY + // If value from transposition table is lower than probCutBeta, don't attempt + // probCut there and in further interactions with transposition table cutoff + // depth is set to depth - 3 because probCut search has depth set to depth - 4 + // but we also do a move before it. So effective depth is equal to depth - 3. + && !(ttData.depth >= depth - 3 && ttData.value != VALUE_NONE && ttData.value < probCutBeta)) { assert(probCutBeta < VALUE_INFINITE && probCutBeta > beta); @@ -870,7 +872,6 @@ Value Search::Worker::search( if (move == excludedMove) continue; - // Check for legality if (!pos.legal(move)) continue; @@ -1050,18 +1051,18 @@ moves_loop: // When in check, search starts here // We take care to not overdo to avoid search getting stuck. if (ss->ply < thisThread->rootDepth * 2) { - // Singular extension search (~76 Elo, ~170 nElo). If all moves but one fail - // low on a search of (alpha-s, beta-s), and just one fails high on (alpha, beta), - // then that move is singular and should be extended. To verify this we do - // a reduced search on the position excluding the ttMove and if the result - // is lower than ttValue minus a margin, then we will extend the ttMove. - // Recursive singular search is avoided. + // Singular extension search (~76 Elo, ~170 nElo). If all moves but one + // fail low on a search of (alpha-s, beta-s), and just one fails high on + // (alpha, beta), then that move is singular and should be extended. To + // verify this we do a reduced search on the position excluding the ttMove + // and if the result is lower than ttValue minus a margin, then we will + // extend the ttMove. Recursive singular search is avoided. - // Note: the depth margin and singularBeta margin are known for having non-linear - // scaling. Their values are optimized to time controls of 180+1.8 and longer - // so changing them requires tests at these types of time controls. - // Generally, higher singularBeta (i.e closer to ttValue) and lower extension - // margins scale well. + // Note: the depth margin and singularBeta margin are known for having + // non-linear scaling. Their values are optimized to time controls of + // 180+1.8 and longer so changing them requires tests at these types of + // time controls. Generally, higher singularBeta (i.e closer to ttValue) + // and lower extension margins scale well. if (!rootNode && move == ttData.move && !excludedMove && depth >= 4 - (thisThread->completedDepth > 36) + ss->ttPv @@ -1089,28 +1090,31 @@ moves_loop: // When in check, search starts here // Multi-cut pruning // Our ttMove is assumed to fail high based on the bound of the TT entry, - // and if after excluding the ttMove with a reduced search we fail high over the original beta, - // we assume this expected cut-node is not singular (multiple moves fail high), - // and we can prune the whole subtree by returning a softbound. + // and if after excluding the ttMove with a reduced search we fail high + // over the original beta, we assume this expected cut-node is not + // singular (multiple moves fail high), and we can prune the whole + // subtree by returning a softbound. else if (value >= beta && std::abs(value) < VALUE_TB_WIN_IN_MAX_PLY) return value; // Negative extensions - // If other moves failed high over (ttValue - margin) without the ttMove on a reduced search, - // but we cannot do multi-cut because (ttValue - margin) is lower than the original beta, - // we do not know if the ttMove is singular or can do a multi-cut, - // so we reduce the ttMove in favor of other moves based on some conditions: + // If other moves failed high over (ttValue - margin) without the + // ttMove on a reduced search, but we cannot do multi-cut because + // (ttValue - margin) is lower than the original beta, we do not know + // if the ttMove is singular or can do a multi-cut, so we reduce the + // ttMove in favor of other moves based on some conditions: // If the ttMove is assumed to fail high over current beta (~7 Elo) else if (ttData.value >= beta) extension = -3; - // If we are on a cutNode but the ttMove is not assumed to fail high over current beta (~1 Elo) + // If we are on a cutNode but the ttMove is not assumed to fail high + // over current beta (~1 Elo) else if (cutNode) extension = -2; } - // Extension for capturing the previous moved piece (~0 Elo on STC, ~1 Elo on LTC) + // Extension for capturing the previous moved piece (~1 Elo at LTC) else if (PvNode && move.to_sq() == prevSq && thisThread->captureHistory[movedPiece][move.to_sq()] [type_of(pos.piece_on(move.to_sq()))] @@ -1136,9 +1140,9 @@ moves_loop: // When in check, search starts here pos.do_move(move, st, givesCheck); // These reduction adjustments have proven non-linear scaling. - // They are optimized to time controls of 180 + 1.8 and longer so - // changing them or adding conditions that are similar - // requires tests at these types of time controls. + // They are optimized to time controls of 180 + 1.8 and longer, + // so changing them or adding conditions that are similar requires + // tests at these types of time controls. // Decrease reduction if position is or has been on the PV (~7 Elo) if (ss->ttPv) @@ -1148,7 +1152,7 @@ moves_loop: // When in check, search starts here if (PvNode) r--; - // These reduction adjustments have no proven non-linear scaling. + // These reduction adjustments have no proven non-linear scaling // Increase reduction for cut nodes (~4 Elo) if (cutNode) @@ -1163,8 +1167,8 @@ moves_loop: // When in check, search starts here if ((ss + 1)->cutoffCnt > 3) r += 1 + !(PvNode || cutNode); - // For first picked move (ttMove) reduce reduction - // but never allow it to go below 0 (~3 Elo) + // For first picked move (ttMove) reduce reduction, but never allow + // reduction to go below 0 (~3 Elo) else if (move == ttData.move) r = std::max(0, r - 2); @@ -1190,8 +1194,8 @@ moves_loop: // When in check, search starts here // Do a full-depth search when reduced LMR search fails high if (value > alpha && d < newDepth) { - // Adjust full-depth search based on LMR results - if the result - // was good enough search deeper, if it was bad enough search shallower. + // Adjust full-depth search based on LMR results - if the result was + // good enough search deeper, if it was bad enough search shallower. const bool doDeeperSearch = value > (bestValue + 35 + 2 * newDepth); // (~1 Elo) const bool doShallowerSearch = value < bestValue + newDepth; // (~2 Elo) @@ -1237,8 +1241,8 @@ moves_loop: // When in check, search starts here // Step 20. Check for a new best move // Finished searching the move. If a stop occurred, the return value of - // the search cannot be trusted, and we return immediately without - // updating best move, PV and TT. + // the search cannot be trusted, and we return immediately without updating + // best move, principal variation nor transposition table. if (threads.stop.load(std::memory_order_relaxed)) return VALUE_ZERO; @@ -1351,7 +1355,8 @@ moves_loop: // When in check, search starts here if (!moveCount) bestValue = excludedMove ? alpha : ss->inCheck ? mated_in(ss->ply) : VALUE_DRAW; - // If there is a move that produces search value greater than alpha we update the stats of searched moves + // If there is a move that produces search value greater than alpha, + // we update the stats of searched moves. else if (bestMove) update_all_stats(pos, ss, *this, bestMove, prevSq, quietsSearched, quietCount, capturesSearched, captureCount, depth); @@ -1385,8 +1390,8 @@ moves_loop: // When in check, search starts here if (bestValue <= alpha) ss->ttPv = ss->ttPv || ((ss - 1)->ttPv && depth > 3); - // Write gathered information in transposition table - // Static evaluation is saved as it was before correction history + // Write gathered information in transposition table. Note that the + // static evaluation is saved as it was before correction history. if (!excludedMove && !(rootNode && thisThread->pvIdx)) ttWriter.write(posKey, value_to_tt(bestValue, ss->ply), ss->ttPv, bestValue >= beta ? BOUND_LOWER @@ -1410,12 +1415,12 @@ moves_loop: // When in check, search starts here } -// Quiescence search function, which is called by the main search function with zero depth, or -// recursively with further decreasing depth per call. With depth <= 0, we "should" be using -// static eval only, but tactical moves may confuse the static eval. To fight this horizon effect, -// we implement this qsearch of tactical moves only. -// See https://www.chessprogramming.org/Horizon_Effect and https://www.chessprogramming.org/Quiescence_Search -// (~155 Elo) +// Quiescence search function, which is called by the main search function with +// depth zero, or recursively with further decreasing depth. With depth <= 0, we +// "should" be using static eval only, but tactical moves may confuse the static eval. +// To fight this horizon effect, we implement this qsearch of tactical moves (~155 Elo). +// See https://www.chessprogramming.org/Horizon_Effect +// and https://www.chessprogramming.org/Quiescence_Search template Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth) { @@ -1426,7 +1431,7 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta, assert(PvNode || (alpha == beta - 1)); assert(depth <= 0); - // Check if we have an upcoming move that draws by repetition. (~1 Elo) + // Check if we have an upcoming move that draws by repetition (~1 Elo) if (alpha < VALUE_DRAW && pos.upcoming_repetition(ss->ply)) { alpha = value_draw(this->nodes); @@ -1469,9 +1474,10 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta, assert(0 <= ss->ply && ss->ply < MAX_PLY); - // Note that unlike regular search, which stores the literal depth into the TT, from QS we - // only store the current movegen stage as "depth". If in check, we search all evasions and - // thus store DEPTH_QS_CHECKS. (Evasions may be quiet, and _CHECKS includes quiets.) + // Note that unlike regular search, which stores the literal depth into the + // transposition table, from qsearch we only store the current movegen stage + // as "depth". If in check, we search all evasions and thus store DEPTH_QS_CHECKS. + // Evasions may be quiet, and _CHECKS includes quiets. Depth qsTtDepth = ss->inCheck || depth >= DEPTH_QS_CHECKS ? DEPTH_QS_CHECKS : DEPTH_QS_NORMAL; // Step 3. Transposition table lookup @@ -1512,7 +1518,7 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta, } else { - // In case of null move search, use previous static eval with a different sign + // In case of null move search, use previous static eval with opposite sign unadjustedStaticEval = (ss - 1)->currentMove != Move::null() ? evaluate(networks[numaAccessToken], pos, refreshTable, thisThread->optimism[us]) @@ -1542,21 +1548,20 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta, const PieceToHistory* contHist[] = {(ss - 1)->continuationHistory, (ss - 2)->continuationHistory}; - // Initialize a MovePicker object for the current position, and prepare to search the moves. - // We presently use two stages of qs movegen, first captures+checks, then captures only. - // (When in check, we simply search all evasions.) - // (Presently, having the checks stage is worth only 1 Elo, and may be removable in the near future, - // which would result in only a single stage of QS movegen.) + // Initialize a MovePicker object for the current position, and prepare to search + // the moves. We presently use two stages of move generator in quiescence search: + // first captures+checks, then captures only (but when in check, we simply search + // all evasions). Square prevSq = ((ss - 1)->currentMove).is_ok() ? ((ss - 1)->currentMove).to_sq() : SQ_NONE; MovePicker mp(pos, ttData.move, depth, &thisThread->mainHistory, &thisThread->captureHistory, contHist, &thisThread->pawnHistory); - // Step 5. Loop through all pseudo-legal moves until no moves remain or a beta cutoff occurs. + // Step 5. Loop through all pseudo-legal moves until no moves remain or a beta + // cutoff occurs. while ((move = mp.next_move()) != Move::none()) { assert(move.is_ok()); - // Check for legality if (!pos.legal(move)) continue; @@ -1577,24 +1582,24 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta, Value futilityValue = futilityBase + PieceValue[pos.piece_on(move.to_sq())]; - // If static eval + value of piece we are going to capture is much lower - // than alpha we can prune this move. (~2 Elo) + // If static eval + value of piece we are going to capture is + // much lower than alpha, we can prune this move. (~2 Elo) if (futilityValue <= alpha) { bestValue = std::max(bestValue, futilityValue); continue; } - // If static eval is much lower than alpha and move is not winning material - // we can prune this move. (~2 Elo) + // If static eval is much lower than alpha and move is + // not winning material, we can prune this move. (~2 Elo) if (futilityBase <= alpha && !pos.see_ge(move, 1)) { bestValue = std::max(bestValue, futilityBase); continue; } - // If static exchange evaluation is much worse than what is needed to not - // fall below alpha we can prune this move. + // If static exchange evaluation is much worse than what + // is needed to not fall below alpha, we can prune this move. if (futilityBase > alpha && !pos.see_ge(move, (alpha - futilityBase) * 4)) { bestValue = alpha; @@ -1654,8 +1659,8 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta, } // Step 9. Check for mate - // All legal moves have been searched. A special case: if we're in check - // and no legal moves were found, it is checkmate. + // All legal moves have been searched. A special case: if we are + // in check and no legal moves were found, it is checkmate. if (ss->inCheck && bestValue == -VALUE_INFINITE) { assert(!MoveList(pos).size()); @@ -1665,8 +1670,8 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta, if (std::abs(bestValue) < VALUE_TB_WIN_IN_MAX_PLY && bestValue >= beta) bestValue = (3 * bestValue + beta) / 4; - // Save gathered info in transposition table - // Static evaluation is saved as it was before adjustment by correction history + // Save gathered info in transposition table. The static evaluation + // is saved as it was before adjustment by correction history. ttWriter.write(posKey, value_to_tt(bestValue, ss->ply), pvHit, bestValue >= beta ? BOUND_LOWER : BOUND_UPPER, qsTtDepth, bestMove, unadjustedStaticEval, tt.generation()); @@ -1697,8 +1702,8 @@ TimePoint Search::Worker::elapsed_time() const { return main_manager()->tm.elaps namespace { -// Adjusts a mate or TB score from "plies to mate from the root" -// to "plies to mate from the current position". Standard scores are unchanged. +// Adjusts a mate or TB score from "plies to mate from the root" to +// "plies to mate from the current position". Standard scores are unchanged. // The function is called before storing a value in the transposition table. Value value_to_tt(Value v, int ply) { @@ -1707,11 +1712,11 @@ Value value_to_tt(Value v, int ply) { } -// Inverse of value_to_tt(): it adjusts a mate or TB score -// from the transposition table (which refers to the plies to mate/be mated from -// current position) to "plies to mate/be mated (TB win/loss) from the root". -// However, to avoid potentially false mate or TB scores related to the 50 moves rule -// and the graph history interaction, we return the highest non-TB score instead. +// Inverse of value_to_tt(): it adjusts a mate or TB score from the transposition +// table (which refers to the plies to mate/be mated from current position) to +// "plies to mate/be mated (TB win/loss) from the root". However, to avoid +// potentially false mate or TB scores related to the 50 moves rule and the +// graph history interaction, we return the highest non-TB score instead. Value value_from_tt(Value v, int ply, int r50c) { if (v == VALUE_NONE) @@ -1810,8 +1815,8 @@ void update_all_stats(const Position& pos, } -// Updates histories of the move pairs formed -// by moves at ply -1, -2, -3, -4, and -6 with current move. +// Updates histories of the move pairs formed by moves +// at ply -1, -2, -3, -4, and -6 with current move. void update_continuation_histories(Stack* ss, Piece pc, Square to, int bonus) { bonus = bonus * 52 / 64; @@ -1859,8 +1864,8 @@ void update_quiet_stats( } -// When playing with strength handicap, choose the best move among a set of RootMoves -// using a statistical rule dependent on 'level'. Idea by Heinz van Saanen. +// When playing with strength handicap, choose the best move among a set of +// RootMoves using a statistical rule dependent on 'level'. Idea by Heinz van Saanen. Move Skill::pick_best(const RootMoves& rootMoves, size_t multiPV) { static PRNG rng(now()); // PRNG sequence should be non-deterministic @@ -1891,8 +1896,8 @@ Move Skill::pick_best(const RootMoves& rootMoves, size_t multiPV) { } -// Used to print debug info and, more importantly, -// to detect when we are out of available time and thus stop the search. +// Used to print debug info and, more importantly, to detect +// when we are out of available time and thus stop the search. void SearchManager::check_time(Search::Worker& worker) { if (--callsCnt > 0) return; @@ -1926,8 +1931,9 @@ void SearchManager::check_time(Search::Worker& worker) { } // Used to correct and extend PVs for moves that have a TB (but not a mate) score. -// Keeps the search based PV for as long as it is verified to maintain the game outcome, truncates afterwards. -// Finally, extends to mate the PV, providing a possible continuation (but not a proven mating line). +// Keeps the search based PV for as long as it is verified to maintain the game +// outcome, truncates afterwards. Finally, extends to mate the PV, providing a +// possible continuation (but not a proven mating line). void syzygy_extend_pv(const OptionsMap& options, const Search::LimitsType& limits, Position& pos, @@ -1937,7 +1943,7 @@ void syzygy_extend_pv(const OptionsMap& options, auto t_start = std::chrono::steady_clock::now(); int moveOverhead = int(options["Move Overhead"]); - // Do not use more than moveOverhead / 2 time, if time management is active. + // Do not use more than moveOverhead / 2 time, if time management is active auto time_abort = [&t_start, &moveOverhead, &limits]() -> bool { auto t_end = std::chrono::steady_clock::now(); return limits.use_time_management() @@ -1968,7 +1974,7 @@ void syzygy_extend_pv(const OptionsMap& options, auto& st = sts.emplace_back(); pos.do_move(pvMove, st); - // don't allow for repetitions or drawing moves along the PV in TB regime. + // Do not allow for repetitions or drawing moves along the PV in TB regime if (config.rootInTB && pos.is_draw(ply)) { pos.undo_move(pvMove); @@ -1976,17 +1982,18 @@ void syzygy_extend_pv(const OptionsMap& options, break; } - // Full PV shown will thus be validated and end TB. - // If we can't validate the full PV in time, we don't show it. + // Full PV shown will thus be validated and end in TB. + // If we cannot validate the full PV in time, we do not show it. if (config.rootInTB && time_abort()) break; } - // resize the PV to the correct part + // Resize the PV to the correct part rootMove.pv.resize(ply); - // Step 2, now extend the PV to mate, as if the user explores syzygy-tables.info using - // top ranked moves (minimal DTZ), which gives optimal mates only for simple endgames e.g. KRvK + // Step 2, now extend the PV to mate, as if the user explored syzygy-tables.info + // using top ranked moves (minimal DTZ), which gives optimal mates only for simple + // endgames e.g. KRvK. while (!pos.is_draw(0)) { if (time_abort()) @@ -1998,8 +2005,8 @@ void syzygy_extend_pv(const OptionsMap& options, auto& rm = legalMoves.emplace_back(m); StateInfo tmpSI; pos.do_move(m, tmpSI); - // Give a score of each move to break DTZ ties - // restricting opponent mobility, but not giving the opponent a capture. + // Give a score of each move to break DTZ ties restricting opponent mobility, + // but not giving the opponent a capture. for (const auto& mOpp : MoveList(pos)) rm.tbRank -= pos.capture(mOpp) ? 100 : 1; pos.undo_move(m); @@ -2009,16 +2016,16 @@ void syzygy_extend_pv(const OptionsMap& options, if (legalMoves.size() == 0) break; - // sort moves according to their above assigned rank, + // Sort moves according to their above assigned rank. // This will break ties for moves with equal DTZ in rank_root_moves. std::stable_sort( legalMoves.begin(), legalMoves.end(), [](const Search::RootMove& a, const Search::RootMove& b) { return a.tbRank > b.tbRank; }); - // The winning side tries to minimize DTZ, the losing side maximizes it. + // The winning side tries to minimize DTZ, the losing side maximizes it Tablebases::Config config = Tablebases::rank_root_moves(options, pos, legalMoves, true); - // If DTZ is not available we might not find a mate, so we bail out. + // If DTZ is not available we might not find a mate, so we bail out if (!config.rootInTB || config.cardinality > 0) break; @@ -2030,23 +2037,24 @@ void syzygy_extend_pv(const OptionsMap& options, pos.do_move(pvMove, st); } - // Finding a draw in this function is an exceptional case, that cannot happen during engine game play, - // since we have a winning score, and play correctly with TB support. - // However, it can be that a position is draw due to the 50 move rule if it has been been reached - // on the board with a non-optimal 50 move counter e.g. 8/8/6k1/3B4/3K4/4N3/8/8 w - - 54 106 - // which TB with dtz counter rounding cannot always correctly rank. See also + // Finding a draw in this function is an exceptional case, that cannot happen + // during engine game play, since we have a winning score, and play correctly + // with TB support. However, it can be that a position is draw due to the 50 move + // rule if it has been been reached on the board with a non-optimal 50 move counter + // (e.g. 8/8/6k1/3B4/3K4/4N3/8/8 w - - 54 106 ) which TB with dtz counter rounding + // cannot always correctly rank. See also // https://github.com/official-stockfish/Stockfish/issues/5175#issuecomment-2058893495 - // We adjust the score to match the found PV. Note that a TB loss score can be displayed - // if the engine did not find a drawing move yet, but eventually search will figure it out. - // E.g. 1kq5/q2r4/5K2/8/8/8/8/7Q w - - 96 1 + // We adjust the score to match the found PV. Note that a TB loss score can be + // displayed if the engine did not find a drawing move yet, but eventually search + // will figure it out (e.g. 1kq5/q2r4/5K2/8/8/8/8/7Q w - - 96 1 ) if (pos.is_draw(0)) v = VALUE_DRAW; - // Undo the PV moves. + // Undo the PV moves for (auto it = rootMove.pv.rbegin(); it != rootMove.pv.rend(); ++it) pos.undo_move(*it); - // Inform if we couldn't get a full extension in time. + // Inform if we couldn't get a full extension in time if (time_abort()) sync_cout << "info string Syzygy based PV extension requires more time, increase Move Overhead as needed." @@ -2092,7 +2100,7 @@ void SearchManager::pv(Search::Worker& worker, for (Move m : rootMoves[i].pv) pv += UCIEngine::move(m, pos.is_chess960()) + " "; - // remove last whitespace + // Remove last whitespace if (!pv.empty()) pv.pop_back(); diff --git a/src/search.h b/src/search.h index 122cd549..57596754 100644 --- a/src/search.h +++ b/src/search.h @@ -236,8 +236,8 @@ class Worker { public: Worker(SharedState&, std::unique_ptr, size_t, NumaReplicatedAccessToken); - // Called at instantiation to initialize Reductions tables - // Reset histories, usually before a new game + // Called at instantiation to initialize reductions tables. + // Reset histories, usually before a new game. void clear(); // Called when the program receives the UCI 'go' command. @@ -256,7 +256,7 @@ class Worker { private: void iterative_deepening(); - // Main search function for both PV and non-PV nodes + // This is the main search function, for both PV and non-PV nodes template Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, bool cutNode); @@ -266,8 +266,7 @@ class Worker { Depth reduction(bool i, Depth d, int mn, int delta) const; - // Get a pointer to the search manager, only allowed to be called by the - // main thread. + // Pointer to the search manager, only allowed to be called by the main thread SearchManager* main_manager() const { assert(threadIdx == 0); return static_cast(manager.get()); diff --git a/src/thread.cpp b/src/thread.cpp index 4acb9854..f17fc4a5 100644 --- a/src/thread.cpp +++ b/src/thread.cpp @@ -50,8 +50,8 @@ Thread::Thread(Search::SharedState& sharedState, run_custom_job([this, &binder, &sharedState, &sm, n]() { // Use the binder to [maybe] bind the threads to a NUMA node before doing - // the Worker allocation. - // Ideally we would also allocate the SearchManager here, but that's minor. + // the Worker allocation. Ideally we would also allocate the SearchManager + // here, but that's minor. this->numaAccessToken = binder(); this->worker = std::make_unique(sharedState, std::move(sm), n, this->numaAccessToken); @@ -72,27 +72,26 @@ Thread::~Thread() { stdThread.join(); } - // Wakes up the thread that will start the search void Thread::start_searching() { assert(worker != nullptr); run_custom_job([this]() { worker->start_searching(); }); } -// Wakes up the thread that will start the search +// Clears the histories for the thread worker (usually before a new game) void Thread::clear_worker() { assert(worker != nullptr); run_custom_job([this]() { worker->clear(); }); } -// Blocks on the condition variable -// until the thread has finished searching. +// Blocks on the condition variable until the thread has finished searching void Thread::wait_for_search_finished() { std::unique_lock lk(mutex); cv.wait(lk, [&] { return !searching; }); } +// Launching a function in the thread void Thread::run_custom_job(std::function f) { { std::unique_lock lk(mutex); @@ -103,8 +102,8 @@ void Thread::run_custom_job(std::function f) { cv.notify_one(); } -// Thread gets parked here, blocked on the -// condition variable, when it has no work to do. +// Thread gets parked here, blocked on the condition variable +// when the thread has no work to do. void Thread::idle_loop() { while (true) @@ -233,8 +232,9 @@ void ThreadPool::wait_on_thread(size_t threadId) { size_t ThreadPool::num_threads() const { return threads.size(); } -// Wakes up main thread waiting in idle_loop() and -// returns immediately. Main thread will wake up other threads and start the search. + +// Wakes up main thread waiting in idle_loop() and returns immediately. +// Main thread will wake up other threads and start the search. void ThreadPool::start_thinking(const OptionsMap& options, Position& pos, StateListPtr& states, @@ -274,8 +274,8 @@ void ThreadPool::start_thinking(const OptionsMap& options, // We use Position::set() to set root position across threads. But there are // some StateInfo fields (previous, pliesFromNull, capturedPiece) that cannot // be deduced from a fen string, so set() clears them and they are set from - // setupStates->back() later. The rootState is per thread, earlier states are shared - // since they are read-only. + // setupStates->back() later. The rootState is per thread, earlier states are + // shared since they are read-only. for (auto&& th : threads) { th->run_custom_job([&]() { @@ -335,7 +335,7 @@ Thread* ThreadPool::get_best_thread() const { const bool newThreadInProvenLoss = newThreadScore != -VALUE_INFINITE && newThreadScore <= VALUE_TB_LOSS_IN_MAX_PLY; - // Note that we make sure not to pick a thread with truncated-PV for better viewer experience. + // We make sure not to pick a thread with truncated principal variation const bool betterVotingValue = thread_voting_value(th.get()) * int(newThreadPV.size() > 2) > thread_voting_value(bestThread) * int(bestThreadPV.size() > 2); @@ -363,8 +363,8 @@ Thread* ThreadPool::get_best_thread() const { } -// Start non-main threads -// Will be invoked by main thread after it has started searching +// Start non-main threads. +// Will be invoked by main thread after it has started searching. void ThreadPool::start_searching() { for (auto&& th : threads) @@ -374,7 +374,6 @@ void ThreadPool::start_searching() { // Wait for non-main threads - void ThreadPool::wait_for_search_finished() const { for (auto&& th : threads) diff --git a/src/types.h b/src/types.h index 10ad1fac..8a9400bb 100644 --- a/src/types.h +++ b/src/types.h @@ -137,9 +137,9 @@ enum Bound { BOUND_EXACT = BOUND_UPPER | BOUND_LOWER }; -// Value is used as an alias for int16_t, this is done to differentiate between -// a search value and any other integer value. The values used in search are always -// supposed to be in the range (-VALUE_NONE, VALUE_NONE] and should not exceed this range. +// Value is used as an alias for int, this is done to differentiate between a search +// value and any other integer value. The values used in search are always supposed +// to be in the range (-VALUE_NONE, VALUE_NONE] and should not exceed this range. using Value = int; constexpr Value VALUE_ZERO = 0; @@ -187,15 +187,20 @@ constexpr Value PieceValue[PIECE_NB] = { using Depth = int; enum : int { - // The following DEPTH_ constants are used for TT entries and QS movegen stages. In regular search, - // TT depth is literal: the search depth (effort) used to make the corresponding TT value. - // In qsearch, however, TT entries only store the current QS movegen stage (which should thus compare + // The following DEPTH_ constants are used for transposition table entries + // and quiescence search move generation stages. In regular search, the + // depth stored in the transposition table is literal: the search depth + // (effort) used to make the corresponding transposition table value. In + // quiescence search, however, the transposition table entries only store + // the current quiescence move generation stage (which should thus compare // lower than any regular search depth). DEPTH_QS_CHECKS = 0, DEPTH_QS_NORMAL = -1, - // For TT entries where no searching at all was done (whether regular or qsearch) we use - // _UNSEARCHED, which should thus compare lower than any QS or regular depth. _ENTRY_OFFSET is used - // only for the TT entry occupancy check (see tt.cpp), and should thus be lower than _UNSEARCHED. + // For transposition table entries where no searching at all was done + // (whether regular or qsearch) we use DEPTH_UNSEARCHED, which should thus + // compare lower than any quiescence or regular depth. DEPTH_ENTRY_OFFSET + // is used only for the transposition table entry occupancy check (see tt.cpp), + // and should thus be lower than DEPTH_UNSEARCHED. DEPTH_UNSEARCHED = -2, DEPTH_ENTRY_OFFSET = -3 }; @@ -356,9 +361,10 @@ enum MoveType { // bit 14-15: special move flag: promotion (1), en passant (2), castling (3) // NOTE: en passant bit is set only when a pawn can be captured // -// Special cases are Move::none() and Move::null(). We can sneak these in because in -// any normal move destination square is always different from origin square -// while Move::none() and Move::null() have the same origin and destination square. +// Special cases are Move::none() and Move::null(). We can sneak these in because +// in any normal move the destination square and origin square are always different, +// but Move::none() and Move::null() have the same origin and destination square. + class Move { public: Move() = default; From 6135a0e2f830a587d2ac7a332bb62188fa924aad Mon Sep 17 00:00:00 2001 From: Joost VandeVondele Date: Wed, 10 Jul 2024 21:13:59 +0200 Subject: [PATCH 182/315] Provide more info on found TB files now uses the following format: `info string Found 510 WDL and 510 DTZ tablebase files (up to 6-man).` this clarifies exactly what has been found, as the difference matters, e.g. for the PV extension of TB scores. closes https://github.com/official-stockfish/Stockfish/pull/5471 No functional change --- src/syzygy/tbprobe.cpp | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/src/syzygy/tbprobe.cpp b/src/syzygy/tbprobe.cpp index fc2a092a..e2344fda 100644 --- a/src/syzygy/tbprobe.cpp +++ b/src/syzygy/tbprobe.cpp @@ -443,6 +443,8 @@ class TBTables { std::deque> wdlTable; std::deque> dtzTable; + size_t foundDTZFiles = 0; + size_t foundWDLFiles = 0; void insert(Key key, TBTable* wdl, TBTable* dtz) { uint32_t homeBucket = uint32_t(key) & (Size - 1); @@ -486,9 +488,16 @@ class TBTables { memset(hashTable, 0, sizeof(hashTable)); wdlTable.clear(); dtzTable.clear(); + foundDTZFiles = 0; + foundWDLFiles = 0; } - size_t size() const { return wdlTable.size(); } - void add(const std::vector& pieces); + + void info() const { + sync_cout << "info string Found " << foundWDLFiles << " WDL and " << foundDTZFiles + << " DTZ tablebase files (up to " << MaxCardinality << "-man)." << sync_endl; + } + + void add(const std::vector& pieces); }; TBTables TBTables; @@ -501,13 +510,22 @@ void TBTables::add(const std::vector& pieces) { for (PieceType pt : pieces) code += PieceToChar[pt]; + code.insert(code.find('K', 1), "v"); - TBFile file(code.insert(code.find('K', 1), "v") + ".rtbw"); // KRK -> KRvK + TBFile file_dtz(code + ".rtbz"); // KRK -> KRvK + if (file_dtz.is_open()) + { + file_dtz.close(); + foundDTZFiles++; + } + + TBFile file(code + ".rtbw"); // KRK -> KRvK if (!file.is_open()) // Only WDL file is checked return; file.close(); + foundWDLFiles++; MaxCardinality = std::max(int(pieces.size()), MaxCardinality); @@ -1466,7 +1484,7 @@ void Tablebases::init(const std::string& paths) { } } - sync_cout << "info string Found " << TBTables.size() << " tablebases" << sync_endl; + TBTables.info(); } // Probe the WDL table for a particular position. From 8d1e41458e1fd12aaf42a13fcc0676ae487531f0 Mon Sep 17 00:00:00 2001 From: yl25946 Date: Wed, 10 Jul 2024 23:49:16 -0500 Subject: [PATCH 183/315] removed second killer move STC with movepicker rewrite: LLR: 2.93 (-2.94,2.94) <-1.75,0.25> Total: 46656 W: 12208 L: 11995 D: 22453 Ptnml(0-2): 203, 5461, 11777, 5694, 193 https://tests.stockfishchess.org/tests/view/668d98a15034141ae5999e68 Earlier version passed STC: LLR: 2.94 (-2.94,2.94) <0.00,2.00> Total: 468896 W: 120999 L: 120054 D: 227843 Ptnml(0-2): 1207, 55209, 120639, 56218, 1175 https://tests.stockfishchess.org/tests/view/668b17d2cf91c430fca58630 Earlier version passed LTC: LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 550524 W: 139553 L: 139877 D: 271094 Ptnml(0-2): 333, 61646, 151616, 61346, 321 https://tests.stockfishchess.org/tests/view/668b2e04cf91c430fca586b1 closes https://github.com/official-stockfish/Stockfish/pull/5472 bench 1234309 Co-authored-by: rn5f107s2 --- src/movepick.cpp | 28 ++++++++++++---------------- src/movepick.h | 4 ++-- src/search.cpp | 24 ++++++++++-------------- src/search.h | 2 +- 4 files changed, 25 insertions(+), 33 deletions(-) diff --git a/src/movepick.cpp b/src/movepick.cpp index c21b14a9..d54bcbc7 100644 --- a/src/movepick.cpp +++ b/src/movepick.cpp @@ -20,7 +20,6 @@ #include #include -#include #include #include "bitboard.h" @@ -35,7 +34,7 @@ enum Stages { MAIN_TT, CAPTURE_INIT, GOOD_CAPTURE, - REFUTATION, + KILLER, QUIET_INIT, GOOD_QUIET, BAD_CAPTURE, @@ -91,14 +90,14 @@ MovePicker::MovePicker(const Position& p, const CapturePieceToHistory* cph, const PieceToHistory** ch, const PawnHistory* ph, - const Move* killers) : + Move km) : pos(p), mainHistory(mh), captureHistory(cph), continuationHistory(ch), pawnHistory(ph), ttMove(ttm), - refutations{{killers[0], 0}, {killers[1], 0}}, + killer{km, 0}, depth(d) { assert(d > 0); @@ -268,19 +267,17 @@ top: })) return *(cur - 1); - // Prepare the pointers to loop over the refutations array - cur = std::begin(refutations); - endMoves = std::end(refutations); - ++stage; [[fallthrough]]; - case REFUTATION : - if (select([&]() { - return *cur != Move::none() && !pos.capture_stage(*cur) && pos.pseudo_legal(*cur); - })) - return *(cur - 1); + case KILLER : + // increment it before so if we aren't stuck here indefinitely ++stage; + + if (killer != ttMove && killer != Move::none() && !pos.capture_stage(killer) + && pos.pseudo_legal(killer)) + return killer; + [[fallthrough]]; case QUIET_INIT : @@ -297,8 +294,7 @@ top: [[fallthrough]]; case GOOD_QUIET : - if (!skipQuiets - && select([&]() { return *cur != refutations[0] && *cur != refutations[1]; })) + if (!skipQuiets && select([&]() { return *cur != killer; })) { if ((cur - 1)->value > -7998 || (cur - 1)->value <= quiet_threshold(depth)) return *(cur - 1); @@ -327,7 +323,7 @@ top: case BAD_QUIET : if (!skipQuiets) - return select([&]() { return *cur != refutations[0] && *cur != refutations[1]; }); + return select([&]() { return *cur != killer; }); return Move::none(); diff --git a/src/movepick.h b/src/movepick.h index 2564f730..86a2a583 100644 --- a/src/movepick.h +++ b/src/movepick.h @@ -160,7 +160,7 @@ class MovePicker { const CapturePieceToHistory*, const PieceToHistory**, const PawnHistory*, - const Move*); + Move); MovePicker(const Position&, Move, Depth, @@ -185,7 +185,7 @@ class MovePicker { const PieceToHistory** continuationHistory; const PawnHistory* pawnHistory; Move ttMove; - ExtMove refutations[2], *cur, *endMoves, *endBadCaptures, *beginBadQuiets, *endBadQuiets; + ExtMove killer, *cur, *endMoves, *endBadCaptures, *beginBadQuiets, *endBadQuiets; int stage; int threshold; Depth depth; diff --git a/src/search.cpp b/src/search.cpp index d3d95eda..a4da8cb0 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -123,7 +123,7 @@ Value value_to_tt(Value v, int ply); Value value_from_tt(Value v, int ply, int r50c); void update_pv(Move* pv, Move move, const Move* childPv); void update_continuation_histories(Stack* ss, Piece pc, Square to, int bonus); -void update_refutations(Stack* ss, Move move); +void update_killer(Stack* ss, Move move); void update_quiet_histories( const Position& pos, Stack* ss, Search::Worker& workerThread, Move move, int bonus); void update_quiet_stats( @@ -608,9 +608,9 @@ Value Search::Worker::search( assert(0 <= ss->ply && ss->ply < MAX_PLY); - bestMove = Move::none(); - (ss + 2)->killers[0] = (ss + 2)->killers[1] = Move::none(); - (ss + 2)->cutoffCnt = 0; + bestMove = Move::none(); + (ss + 1)->killer = Move::none(); + (ss + 2)->cutoffCnt = 0; Square prevSq = ((ss - 1)->currentMove).is_ok() ? ((ss - 1)->currentMove).to_sq() : SQ_NONE; ss->statScore = 0; @@ -934,7 +934,7 @@ moves_loop: // When in check, search starts here MovePicker mp(pos, ttData.move, depth, &thisThread->mainHistory, &thisThread->captureHistory, - contHist, &thisThread->pawnHistory, ss->killers); + contHist, &thisThread->pawnHistory, ss->killer); value = bestValue; moveCountPruning = false; @@ -1157,7 +1157,7 @@ moves_loop: // When in check, search starts here // Increase reduction for cut nodes (~4 Elo) if (cutNode) r += 2 - (ttData.depth >= depth && ss->ttPv) - + (!ss->ttPv && move != ttData.move && move != ss->killers[0]); + + (!ss->ttPv && move != ttData.move && move != ss->killer); // Increase reduction if ttMove is a capture (~3 Elo) if (ttCapture) @@ -1801,7 +1801,7 @@ void update_all_stats(const Position& pos, // main killer move in previous ply when it gets refuted. if (prevSq != SQ_NONE && ((ss - 1)->moveCount == 1 + (ss - 1)->ttHit - || ((ss - 1)->currentMove == (ss - 1)->killers[0])) + || ((ss - 1)->currentMove == (ss - 1)->killer)) && !pos.captured_piece()) update_continuation_histories(ss - 1, pos.piece_on(prevSq), prevSq, -quietMoveMalus); @@ -1832,14 +1832,10 @@ void update_continuation_histories(Stack* ss, Piece pc, Square to, int bonus) { } // Updates move sorting heuristics -void update_refutations(Stack* ss, Move move) { +void update_killer(Stack* ss, Move move) { // Update killers - if (ss->killers[0] != move) - { - ss->killers[1] = ss->killers[0]; - ss->killers[0] = move; - } + ss->killer = move; } void update_quiet_histories( @@ -1858,7 +1854,7 @@ void update_quiet_histories( void update_quiet_stats( const Position& pos, Stack* ss, Search::Worker& workerThread, Move move, int bonus) { - update_refutations(ss, move); + update_killer(ss, move); update_quiet_histories(pos, ss, workerThread, move, bonus); } diff --git a/src/search.h b/src/search.h index 57596754..65394bc0 100644 --- a/src/search.h +++ b/src/search.h @@ -65,7 +65,7 @@ struct Stack { int ply; Move currentMove; Move excludedMove; - Move killers[2]; + Move killer; Value staticEval; int statScore; int moveCount; From 42aae5fe8b3f41dac7b0e080ea2e55fa3816d802 Mon Sep 17 00:00:00 2001 From: Andyson007 Date: Thu, 11 Jul 2024 10:09:57 +0200 Subject: [PATCH 184/315] Fixed non UCI compliance print `` and accept `` for UCI string options, accepting empty strings as well. Internally use empty strings (`""`). closes https://github.com/official-stockfish/Stockfish/pull/5474 No functional change --- AUTHORS | 1 + src/engine.cpp | 2 +- src/syzygy/tbprobe.cpp | 2 +- src/ucioption.cpp | 14 +++++++++++--- 4 files changed, 14 insertions(+), 5 deletions(-) diff --git a/AUTHORS b/AUTHORS index 6957682f..1ac40d87 100644 --- a/AUTHORS +++ b/AUTHORS @@ -20,6 +20,7 @@ Alexander Kure Alexander Pagel (Lolligerhans) Alfredo Menezes (lonfom169) Ali AlZhrani (Cooffe) +Andreas Jan van der Meulen (Andyson007) Andreas Matthies (Matthies) Andrei Vetrov (proukornew) Andrew Grant (AndyGrant) diff --git a/src/engine.cpp b/src/engine.cpp index 2bc0db6a..41b19ac6 100644 --- a/src/engine.cpp +++ b/src/engine.cpp @@ -93,7 +93,7 @@ Engine::Engine(std::string path) : options["UCI_LimitStrength"] << Option(false); options["UCI_Elo"] << Option(1320, 1320, 3190); options["UCI_ShowWDL"] << Option(false); - options["SyzygyPath"] << Option("", [](const Option& o) { + options["SyzygyPath"] << Option("", [](const Option& o) { Tablebases::init(o); return std::nullopt; }); diff --git a/src/syzygy/tbprobe.cpp b/src/syzygy/tbprobe.cpp index e2344fda..9b24e700 100644 --- a/src/syzygy/tbprobe.cpp +++ b/src/syzygy/tbprobe.cpp @@ -1344,7 +1344,7 @@ void Tablebases::init(const std::string& paths) { MaxCardinality = 0; TBFile::Paths = paths; - if (paths.empty() || paths == "") + if (paths.empty()) return; // MapB1H1H7[] encodes a square below a1-h8 diagonal to 0..27 diff --git a/src/ucioption.cpp b/src/ucioption.cpp index 1cd028c9..455803cf 100644 --- a/src/ucioption.cpp +++ b/src/ucioption.cpp @@ -166,7 +166,9 @@ Option& Option::operator=(const std::string& v) { return *this; } - if (type != "button") + if (type == "string") + currentValue = v == "" ? "" : v; + else if (type != "button") currentValue = v; if (on_change) @@ -188,10 +190,16 @@ std::ostream& operator<<(std::ostream& os, const OptionsMap& om) { const Option& o = it.second; os << "\noption name " << it.first << " type " << o.type; - if (o.type == "string" || o.type == "check" || o.type == "combo") + if (o.type == "check" || o.type == "combo") os << " default " << o.defaultValue; - if (o.type == "spin") + else if (o.type == "string") + { + std::string defaultValue = o.defaultValue.empty() ? "" : o.defaultValue; + os << " default " << defaultValue; + } + + else if (o.type == "spin") os << " default " << int(stof(o.defaultValue)) << " min " << o.min << " max " << o.max; From 3df09c04d7081d341cb0c5bcc3adc498ba877f9a Mon Sep 17 00:00:00 2001 From: Shawn Xu Date: Tue, 9 Jul 2024 13:04:47 -0500 Subject: [PATCH 185/315] Simplify Away Refutation Stage Simplify away killer stage to a constant bonus given to the killer move during quiet move scoring. Passed Non-regression STC (Against then-pending PR #5472): LLR: 2.93 (-2.94,2.94) <-1.75,0.25> Total: 106176 W: 27685 L: 27539 D: 50952 Ptnml(0-2): 410, 12765, 26637, 12821, 455 https://tests.stockfishchess.org/tests/view/668dd0835034141ae5999e8f Passed Non-regression LTC: LLR: 2.95 (-2.94,2.94) <-1.75,0.25> Total: 92472 W: 23426 L: 23276 D: 45770 Ptnml(0-2): 55, 10376, 25215, 10544, 46 https://tests.stockfishchess.org/tests/view/669019e45034141ae5999fd2 closes https://github.com/official-stockfish/Stockfish/pull/5476 Bench 1459677 --- src/movepick.cpp | 19 +++++-------------- src/movepick.h | 12 ++++++------ 2 files changed, 11 insertions(+), 20 deletions(-) diff --git a/src/movepick.cpp b/src/movepick.cpp index d54bcbc7..7619471f 100644 --- a/src/movepick.cpp +++ b/src/movepick.cpp @@ -34,7 +34,6 @@ enum Stages { MAIN_TT, CAPTURE_INIT, GOOD_CAPTURE, - KILLER, QUIET_INIT, GOOD_QUIET, BAD_CAPTURE, @@ -97,7 +96,7 @@ MovePicker::MovePicker(const Position& p, continuationHistory(ch), pawnHistory(ph), ttMove(ttm), - killer{km, 0}, + killer(km), depth(d) { assert(d > 0); @@ -184,6 +183,8 @@ void MovePicker::score() { m.value += (*continuationHistory[3])[pc][to]; m.value += (*continuationHistory[5])[pc][to]; + m.value += (m == killer) * 65536; + // bonus for checks m.value += bool(pos.check_squares(pt) & to) * 16384; @@ -270,16 +271,6 @@ top: ++stage; [[fallthrough]]; - case KILLER : - // increment it before so if we aren't stuck here indefinitely - ++stage; - - if (killer != ttMove && killer != Move::none() && !pos.capture_stage(killer) - && pos.pseudo_legal(killer)) - return killer; - - [[fallthrough]]; - case QUIET_INIT : if (!skipQuiets) { @@ -294,7 +285,7 @@ top: [[fallthrough]]; case GOOD_QUIET : - if (!skipQuiets && select([&]() { return *cur != killer; })) + if (!skipQuiets && select([]() { return true; })) { if ((cur - 1)->value > -7998 || (cur - 1)->value <= quiet_threshold(depth)) return *(cur - 1); @@ -323,7 +314,7 @@ top: case BAD_QUIET : if (!skipQuiets) - return select([&]() { return *cur != killer; }); + return select([]() { return true; }); return Move::none(); diff --git a/src/movepick.h b/src/movepick.h index 86a2a583..c6a5d25a 100644 --- a/src/movepick.h +++ b/src/movepick.h @@ -184,12 +184,12 @@ class MovePicker { const CapturePieceToHistory* captureHistory; const PieceToHistory** continuationHistory; const PawnHistory* pawnHistory; - Move ttMove; - ExtMove killer, *cur, *endMoves, *endBadCaptures, *beginBadQuiets, *endBadQuiets; - int stage; - int threshold; - Depth depth; - ExtMove moves[MAX_MOVES]; + Move ttMove, killer; + ExtMove * cur, *endMoves, *endBadCaptures, *beginBadQuiets, *endBadQuiets; + int stage; + int threshold; + Depth depth; + ExtMove moves[MAX_MOVES]; }; } // namespace Stockfish From 024eb6f453e06e37ceca81d5f759b8fe6006b03b Mon Sep 17 00:00:00 2001 From: Shawn Xu Date: Thu, 11 Jul 2024 14:07:38 -0700 Subject: [PATCH 186/315] Unify Movepick Initializer Passed Non-regression STC: LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 168704 W: 43524 L: 43455 D: 81725 Ptnml(0-2): 414, 17173, 49076, 17308, 381 https://tests.stockfishchess.org/tests/view/66904b7b5034141ae599a197 Passed Non-regression LTC: LLR: 2.95 (-2.94,2.94) <-1.75,0.25> Total: 120294 W: 30473 L: 30364 D: 59457 Ptnml(0-2): 40, 10974, 38032, 11039, 62 https://tests.stockfishchess.org/tests/view/66905b235034141ae599a223 closes https://github.com/official-stockfish/Stockfish/pull/5477 bench 1459677 --- src/movepick.cpp | 25 ++++--------------------- src/movepick.h | 9 +-------- 2 files changed, 5 insertions(+), 29 deletions(-) diff --git a/src/movepick.cpp b/src/movepick.cpp index 7619471f..55bacf6e 100644 --- a/src/movepick.cpp +++ b/src/movepick.cpp @@ -98,29 +98,12 @@ MovePicker::MovePicker(const Position& p, ttMove(ttm), killer(km), depth(d) { - assert(d > 0); - stage = (pos.checkers() ? EVASION_TT : MAIN_TT) + !(ttm && pos.pseudo_legal(ttm)); -} + if (pos.checkers()) + stage = EVASION_TT + !(ttm && pos.pseudo_legal(ttm)); -// Constructor for quiescence search -MovePicker::MovePicker(const Position& p, - Move ttm, - Depth d, - const ButterflyHistory* mh, - const CapturePieceToHistory* cph, - const PieceToHistory** ch, - const PawnHistory* ph) : - pos(p), - mainHistory(mh), - captureHistory(cph), - continuationHistory(ch), - pawnHistory(ph), - ttMove(ttm), - depth(d) { - assert(d <= 0); - - stage = (pos.checkers() ? EVASION_TT : QSEARCH_TT) + !(ttm && pos.pseudo_legal(ttm)); + else + stage = (depth > 0 ? MAIN_TT : QSEARCH_TT) + !(ttm && pos.pseudo_legal(ttm)); } // Constructor for ProbCut: we generate captures with SEE greater than or equal diff --git a/src/movepick.h b/src/movepick.h index c6a5d25a..92e11de2 100644 --- a/src/movepick.h +++ b/src/movepick.h @@ -160,14 +160,7 @@ class MovePicker { const CapturePieceToHistory*, const PieceToHistory**, const PawnHistory*, - Move); - MovePicker(const Position&, - Move, - Depth, - const ButterflyHistory*, - const CapturePieceToHistory*, - const PieceToHistory**, - const PawnHistory*); + Move killer = Move::none()); MovePicker(const Position&, Move, int, const CapturePieceToHistory*); Move next_move(bool skipQuiets = false); From 563d268519885a411e9a3b784875e457aeb26929 Mon Sep 17 00:00:00 2001 From: MinetaS Date: Sat, 13 Jul 2024 00:53:34 +0900 Subject: [PATCH 187/315] Simplify futility_move_count This patch reverts changes from #4032 which was introduced as a speedup. Modern compilers no longer use DIV/IDIV instructions, potentially making the explicit branch perform worse. Since evaluations spend significantly more time now, the impact of the speedup in search diminishes with old compilers as well. GCC 14.1.0 profile-build, x86-64-vnni512 ``` .text:000000014010FEA9 mov ecx, [rsp+3FB8h+var_3F5C] ... .text:000000014010FEBD mov r10d, ecx .text:000000014010FEC0 imul r10d, ecx .text:000000014010FEC4 mov ecx, dword ptr [rsp+3FB8h+var_3F44+4] .text:000000014010FEC8 add r10d, 3 .text:000000014010FECC mov r11d, r10d .text:000000014010FECF sar r11d, 1 .text:000000014010FED2 cmp [rsp+3FB8h+var_3EE7], 0 .text:000000014010FEDA cmovnz r11d, r10d ``` LLVM 18.1.18 profile-build, x86-64-vnni512 ``` .text:0000000140001EDC mov [rsp+40h+arg_E0], r13 .text:0000000140001EE4 movsxd rcx, r13d .text:0000000140001EE7 mov rax, rcx .text:0000000140001EEA mov [rsp+40h+arg_B8], rcx .text:0000000140001EF2 imul eax, eax .text:0000000140001EF5 add eax, 3 .text:0000000140001EF8 mov ecx, [rsp+40h+arg_8C] .text:0000000140001EFF shrx eax, eax, ecx .text:0000000140001F04 mov [rsp+40h+arg_190], rax ``` Passed non-regression STC: LLR: 2.96 (-2.94,2.94) <-1.75,0.25> Total: 109504 W: 28420 L: 28280 D: 52804 Ptnml(0-2): 355, 12326, 29273, 12420, 378 https://tests.stockfishchess.org/tests/view/6690dc095034141ae599c5fe closes https://github.com/official-stockfish/Stockfish/pull/5478 No functional change --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index a4da8cb0..26bee2c1 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -75,7 +75,7 @@ Value futility_margin(Depth d, bool noTtCutNode, bool improving, bool oppWorseni } constexpr int futility_move_count(bool improving, Depth depth) { - return improving ? (3 + depth * depth) : (3 + depth * depth) / 2; + return (3 + depth * depth) / (2 - improving); } // Add correctionHistory value to raw staticEval and guarantee evaluation From 930915de901b89c7f7d4bf1495c7e949c0d5e546 Mon Sep 17 00:00:00 2001 From: Michael Chaly Date: Sat, 13 Jul 2024 05:34:09 +0300 Subject: [PATCH 188/315] Decrease delta Decrease delta in aspiration windows - both initial value and quadratic function of previous best value. Passed STC: https://tests.stockfishchess.org/tests/view/6691a52ec6827afcdcee1569 LLR: 2.93 (-2.94,2.94) <0.00,2.00> Total: 55456 W: 14449 L: 14107 D: 26900 Ptnml(0-2): 174, 6416, 14193, 6784, 161 Passed LTC: https://tests.stockfishchess.org/tests/view/6691aac1c6827afcdcee1625 LLR: 2.94 (-2.94,2.94) <0.50,2.50> Total: 107940 W: 27530 L: 27065 D: 53345 Ptnml(0-2): 52, 11787, 29840, 12226, 65 closes https://github.com/official-stockfish/Stockfish/pull/5479 bench 1547707 --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index 26bee2c1..d1e0b321 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -320,7 +320,7 @@ void Search::Worker::iterative_deepening() { // Reset aspiration window starting size Value avg = rootMoves[pvIdx].averageScore; - delta = 9 + avg * avg / 10424; + delta = 5 + avg * avg / 13424; alpha = std::max(avg - delta, -VALUE_INFINITE); beta = std::min(avg + delta, VALUE_INFINITE); From 558abdbe8a1262b7f15f20ccf961b335c4713364 Mon Sep 17 00:00:00 2001 From: Linmiao Xu Date: Fri, 12 Jul 2024 10:10:00 -0400 Subject: [PATCH 189/315] Set best value to futility value after pruned quiet move Passed non-regression STC: https://tests.stockfishchess.org/tests/view/6691592f5034141ae599c68d LLR: 2.93 (-2.94,2.94) <-1.75,0.25> Total: 278496 W: 71818 L: 71865 D: 134813 Ptnml(0-2): 865, 33311, 70978, 33194, 900 Passed non-regression LTC: https://tests.stockfishchess.org/tests/view/66918fca5034141ae599e761 LLR: 2.95 (-2.94,2.94) <-1.75,0.25> Total: 202986 W: 51048 L: 51013 D: 100925 Ptnml(0-2): 107, 22552, 56133, 22601, 100 closes https://github.com/official-stockfish/Stockfish/pull/5480 bench 1715206 --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index d1e0b321..ebae94ef 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1035,7 +1035,7 @@ moves_loop: // When in check, search starts here { if (bestValue <= futilityValue && std::abs(bestValue) < VALUE_TB_WIN_IN_MAX_PLY && futilityValue < VALUE_TB_WIN_IN_MAX_PLY) - bestValue = (bestValue + futilityValue * 3) / 4; + bestValue = futilityValue; continue; } From 7395d568329f404cd4dc3f4c2fe093059ac2b391 Mon Sep 17 00:00:00 2001 From: FauziAkram Date: Sat, 13 Jul 2024 14:44:23 +0300 Subject: [PATCH 190/315] bonus calculation for prior countermoves Introduce a new term to the bonus calculation for prior countermoves Passed STC: LLR: 2.93 (-2.94,2.94) <0.00,2.00> Total: 140896 W: 36545 L: 36079 D: 68272 Ptnml(0-2): 383, 16505, 36217, 16949, 394 https://tests.stockfishchess.org/tests/view/6691c73cc6827afcdcee1816 Passed LTC: LLR: 2.94 (-2.94,2.94) <0.50,2.50> Total: 126660 W: 32089 L: 31587 D: 62984 Ptnml(0-2): 63, 13774, 35154, 14276, 63 https://tests.stockfishchess.org/tests/view/6691cdc4c6827afcdcee1930 closes https://github.com/official-stockfish/Stockfish/pull/5483 bench: 1250388 --- src/search.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index ebae94ef..87310301 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1364,12 +1364,13 @@ moves_loop: // When in check, search starts here // Bonus for prior countermove that caused the fail low else if (!priorCapture && prevSq != SQ_NONE) { - int bonus = (114 * (depth > 5) + 116 * (PvNode || cutNode) + 123 * ((ss - 1)->moveCount > 8) - + 64 * (!ss->inCheck && bestValue <= ss->staticEval - 108) - + 153 * (!(ss - 1)->inCheck && bestValue <= -(ss - 1)->staticEval - 76)); + int bonus = (138 * (depth > 5) + 58 * (PvNode || cutNode) + 160 * ((ss - 1)->moveCount > 8) + + 84 * (!ss->inCheck && bestValue <= ss->staticEval - 108) + + 153 * (!(ss - 1)->inCheck && bestValue <= -(ss - 1)->staticEval - 76) + + 32 * (!(ss - 1)->inCheck && bestValue > -(ss - 1)->staticEval + 76)); // Proportional to "how much damage we have to undo" - bonus += std::clamp(-(ss - 1)->statScore / 100, -50, 274); + bonus += std::clamp(-(ss - 1)->statScore / 100, -64, 300); update_continuation_histories(ss - 1, pos.piece_on(prevSq), prevSq, stat_bonus(depth) * bonus / 100); From 2b37b151dd8c4374353d9e185bddbea1cfe300b0 Mon Sep 17 00:00:00 2001 From: MinetaS Date: Sun, 14 Jul 2024 01:03:49 +0900 Subject: [PATCH 191/315] Use ValueList to represent searched moves array This PR replaces a pair of array and size with existing ValueList class. Removes two local variables in search and two parameters of update_all_stats. Passed non-regression STC: LLR: 2.93 (-2.94,2.94) <-1.75,0.25> Total: 227040 W: 58472 L: 58463 D: 110105 Ptnml(0-2): 495, 23572, 65427, 23481, 545 https://tests.stockfishchess.org/tests/view/669299204ff211be9d4e98dc closes https://github.com/official-stockfish/Stockfish/pull/5484 No functional change --- src/search.cpp | 75 +++++++++++++++++++++++++------------------------- 1 file changed, 37 insertions(+), 38 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 87310301..1d709749 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -128,16 +128,14 @@ void update_quiet_histories( const Position& pos, Stack* ss, Search::Worker& workerThread, Move move, int bonus); void update_quiet_stats( const Position& pos, Stack* ss, Search::Worker& workerThread, Move move, int bonus); -void update_all_stats(const Position& pos, - Stack* ss, - Search::Worker& workerThread, - Move bestMove, - Square prevSq, - Move* quietsSearched, - int quietCount, - Move* capturesSearched, - int captureCount, - Depth depth); +void update_all_stats(const Position& pos, + Stack* ss, + Search::Worker& workerThread, + Move bestMove, + Square prevSq, + ValueList& quietsSearched, + ValueList& capturesSearched, + Depth depth); } // namespace @@ -554,7 +552,7 @@ Value Search::Worker::search( assert(0 < depth && depth < MAX_PLY); assert(!(PvNode && cutNode)); - Move pv[MAX_PLY + 1], capturesSearched[32], quietsSearched[32]; + Move pv[MAX_PLY + 1]; StateInfo st; ASSERT_ALIGNED(&st, Eval::NNUE::CacheLineSize); @@ -563,18 +561,20 @@ Value Search::Worker::search( Depth extension, newDepth; Value bestValue, value, eval, maxValue, probCutBeta; bool givesCheck, improving, priorCapture, opponentWorsening; - bool capture, moveCountPruning, ttCapture; + bool capture, ttCapture; Piece movedPiece; - int moveCount, captureCount, quietCount; + + ValueList capturesSearched; + ValueList quietsSearched; // Step 1. Initialize node Worker* thisThread = this; ss->inCheck = pos.checkers(); priorCapture = pos.captured_piece(); Color us = pos.side_to_move(); - moveCount = captureCount = quietCount = ss->moveCount = 0; - bestValue = -VALUE_INFINITE; - maxValue = VALUE_INFINITE; + ss->moveCount = 0; + bestValue = -VALUE_INFINITE; + maxValue = VALUE_INFINITE; // Check for the available remaining time if (is_mainthread()) @@ -936,8 +936,10 @@ moves_loop: // When in check, search starts here MovePicker mp(pos, ttData.move, depth, &thisThread->mainHistory, &thisThread->captureHistory, contHist, &thisThread->pawnHistory, ss->killer); - value = bestValue; - moveCountPruning = false; + value = bestValue; + + int moveCount = 0; + bool moveCountPruning = false; // Step 13. Loop through all pseudo-legal moves until no moves remain // or a beta cutoff occurs. @@ -1334,9 +1336,9 @@ moves_loop: // When in check, search starts here if (move != bestMove && moveCount <= 32) { if (capture) - capturesSearched[captureCount++] = move; + capturesSearched.push_back(move); else - quietsSearched[quietCount++] = move; + quietsSearched.push_back(move); } } @@ -1358,8 +1360,7 @@ moves_loop: // When in check, search starts here // If there is a move that produces search value greater than alpha, // we update the stats of searched moves. else if (bestMove) - update_all_stats(pos, ss, *this, bestMove, prevSq, quietsSearched, quietCount, - capturesSearched, captureCount, depth); + update_all_stats(pos, ss, *this, bestMove, prevSq, quietsSearched, capturesSearched, depth); // Bonus for prior countermove that caused the fail low else if (!priorCapture && prevSq != SQ_NONE) @@ -1765,16 +1766,14 @@ void update_pv(Move* pv, Move move, const Move* childPv) { // Updates stats at the end of search() when a bestMove is found -void update_all_stats(const Position& pos, - Stack* ss, - Search::Worker& workerThread, - Move bestMove, - Square prevSq, - Move* quietsSearched, - int quietCount, - Move* capturesSearched, - int captureCount, - Depth depth) { +void update_all_stats(const Position& pos, + Stack* ss, + Search::Worker& workerThread, + Move bestMove, + Square prevSq, + ValueList& quietsSearched, + ValueList& capturesSearched, + Depth depth) { CapturePieceToHistory& captureHistory = workerThread.captureHistory; Piece moved_piece = pos.moved_piece(bestMove); @@ -1788,8 +1787,8 @@ void update_all_stats(const Position& pos, update_quiet_stats(pos, ss, workerThread, bestMove, quietMoveBonus); // Decrease stats for all non-best quiet moves - for (int i = 0; i < quietCount; ++i) - update_quiet_histories(pos, ss, workerThread, quietsSearched[i], -quietMoveMalus); + for (Move move : quietsSearched) + update_quiet_histories(pos, ss, workerThread, move, -quietMoveMalus); } else { @@ -1807,11 +1806,11 @@ void update_all_stats(const Position& pos, update_continuation_histories(ss - 1, pos.piece_on(prevSq), prevSq, -quietMoveMalus); // Decrease stats for all non-best capture moves - for (int i = 0; i < captureCount; ++i) + for (Move move : capturesSearched) { - moved_piece = pos.moved_piece(capturesSearched[i]); - captured = type_of(pos.piece_on(capturesSearched[i].to_sq())); - captureHistory[moved_piece][capturesSearched[i].to_sq()][captured] << -quietMoveMalus; + moved_piece = pos.moved_piece(move); + captured = type_of(pos.piece_on(move.to_sq())); + captureHistory[moved_piece][move.to_sq()][captured] << -quietMoveMalus; } } From de2bf1a186ef036a7df06b448f41b00ff62f9322 Mon Sep 17 00:00:00 2001 From: Michael Chaly Date: Sat, 13 Jul 2024 22:18:38 +0300 Subject: [PATCH 192/315] Remove quiet history pruning depth limit This patch removes lmrDepth limit for quiet moves history based pruning. Previously removal of this type of depth limits was considered bad because it was performing bad for matetrack - but with this pruning heuristic this shouldn't be that bad because it's "naturally" depth limited by history threshold and should be completely disabled at depth >= 15 or so. Also this heuristic in previous years was known to scale non-linearly - bigger lmrDepth thresholds were better at longer time controls and removing it completely probably should scale pretty well. Passed STC: https://tests.stockfishchess.org/tests/view/6692b89b4ff211be9d4eab21 LLR: 2.93 (-2.94,2.94) <-1.75,0.25> Total: 114464 W: 29675 L: 29545 D: 55244 Ptnml(0-2): 372, 12516, 31329, 12640, 375 Passed LTC: https://tests.stockfishchess.org/tests/view/6692c4554ff211be9d4eab3d LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 67746 W: 17182 L: 17014 D: 33550 Ptnml(0-2): 28, 6993, 19652, 7183, 17 closes https://github.com/official-stockfish/Stockfish/pull/5485 Bench: 1250388 --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index 1d709749..3c6617eb 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1022,7 +1022,7 @@ moves_loop: // When in check, search starts here + thisThread->pawnHistory[pawn_structure_index(pos)][movedPiece][move.to_sq()]; // Continuation history based pruning (~2 Elo) - if (lmrDepth < 6 && history < -4165 * depth) + if (history < -4165 * depth) continue; history += 2 * thisThread->mainHistory[us][move.from_to()]; From e443b2459e973c47dbf7e46104bf3bb02ffbb6f7 Mon Sep 17 00:00:00 2001 From: Linmiao Xu Date: Sat, 13 Jul 2024 00:40:07 -0400 Subject: [PATCH 193/315] Separate eval params for smallnet and main net Values found with spsa around 80% of 120k games at 60+0.6: https://tests.stockfishchess.org/tests/view/669205dac6827afcdcee3ea4 Passed STC: https://tests.stockfishchess.org/tests/view/6692928b4ff211be9d4e98a9 LLR: 2.96 (-2.94,2.94) <0.00,2.00> Total: 313696 W: 81107 L: 80382 D: 152207 Ptnml(0-2): 934, 36942, 80363, 37683, 926 Passed LTC: https://tests.stockfishchess.org/tests/view/6692aab54ff211be9d4e9915 LLR: 2.94 (-2.94,2.94) <0.50,2.50> Total: 228420 W: 57903 L: 57190 D: 113327 Ptnml(0-2): 131, 25003, 63243, 25688, 145 closes https://github.com/official-stockfish/Stockfish/pull/5486 bench 1319322 --- src/evaluate.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index 44890a36..1cff6478 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -79,11 +79,11 @@ Value Eval::evaluate(const Eval::NNUE::Networks& networks, } // Blend optimism and eval with nnue complexity - optimism += optimism * nnueComplexity / 457; - nnue -= nnue * nnueComplexity / 19157; + optimism += optimism * nnueComplexity / (smallNet ? 433 : 453); + nnue -= nnue * nnueComplexity / (smallNet ? 18815 : 17864); - int material = 554 * pos.count() + pos.non_pawn_material(); - v = (nnue * (73921 + material) + optimism * (8112 + material)) / 73260; + int material = (smallNet ? 553 : 532) * pos.count() + pos.non_pawn_material(); + v = (nnue * (73921 + material) + optimism * (8112 + material)) / (smallNet ? 68104 : 74715); // Evaluation grain (to get more alpha-beta cuts) with randomization (for robustness) v = (v / 16) * 16 - 1 + (pos.key() & 0x2); From c755bc1a73bb10ec0357ca3c98b6de2eb3d9ad63 Mon Sep 17 00:00:00 2001 From: Guenther Demetz Date: Thu, 18 Jul 2024 09:38:17 +0200 Subject: [PATCH 194/315] Simplify improving condition if we were in check at our previous move we look back until we weren't in check and take the staticEval of that position as reference. Passed STC: https://tests.stockfishchess.org/tests/view/668ba7b65034141ae5996665 LLR: 2.93 (-2.94,2.94) <-1.75,0.25> Total: 74784 W: 19454 L: 19274 D: 36056 Ptnml(0-2): 260, 8874, 18952, 9038, 268 Passted LTC: https://tests.stockfishchess.org/tests/view/668cb2db5034141ae599678b LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 241488 W: 61166 L: 61171 D: 119151 Ptnml(0-2): 190, 27154, 66062, 27147, 191 closes https://github.com/official-stockfish/Stockfish/pull/5492 bench: 1368313 --- src/search.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 3c6617eb..09918bfd 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -713,7 +713,7 @@ Value Search::Worker::search( if (ss->inCheck) { // Skip early pruning when in check - ss->staticEval = eval = VALUE_NONE; + ss->staticEval = eval = (ss - 2)->staticEval; improving = false; goto moves_loop; } @@ -764,12 +764,9 @@ Value Search::Worker::search( // Set up the improving flag, which is true if current static evaluation is // bigger than the previous static evaluation at our turn (if we were in - // check at our previous move we look at static evaluation at move prior to it - // and if we were in check at move prior to it flag is set to true) and is + // check at our previous move we go back until we weren't in check) and is // false otherwise. The improving flag is used in various pruning heuristics. - improving = (ss - 2)->staticEval != VALUE_NONE - ? ss->staticEval > (ss - 2)->staticEval - : (ss - 4)->staticEval != VALUE_NONE && ss->staticEval > (ss - 4)->staticEval; + improving = ss->staticEval > (ss - 2)->staticEval; opponentWorsening = ss->staticEval + (ss - 1)->staticEval > 2; From 7bb45d05faa62463f4c791749907f4c50ceee990 Mon Sep 17 00:00:00 2001 From: yl25946 Date: Mon, 15 Jul 2024 14:55:38 -0500 Subject: [PATCH 195/315] Replace ternary with std::min equivalent and more readable. closes https://github.com/official-stockfish/Stockfish/pull/5488 No functional change --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index 09918bfd..e34aabba 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -90,7 +90,7 @@ Value to_corrected_static_eval(Value v, const Worker& w, const Position& pos) { int stat_bonus(Depth d) { return std::min(190 * d - 108, 1596); } // History and stats update malus, based on depth -int stat_malus(Depth d) { return (d < 4 ? 736 * d - 268 : 2044); } +int stat_malus(Depth d) { return std::min(736 * d - 268, 2044); } // Add a small random component to draw evaluations to avoid 3-fold blindness Value value_draw(size_t nodes) { return VALUE_DRAW - 1 + Value(nodes & 0x2); } From 27042fe9497f721abbfccab50ebb6a0641e63b21 Mon Sep 17 00:00:00 2001 From: Dubslow Date: Sat, 6 Jul 2024 22:31:45 -0400 Subject: [PATCH 196/315] Linearize corrHist Passed STC: https://tests.stockfishchess.org/tests/view/66919cdec6827afcdcee146f LLR: 2.95 (-2.94,2.94) <-1.75,0.25> Total: 130656 W: 33579 L: 33461 D: 63616 Ptnml(0-2): 394, 15548, 33318, 15682, 386 Passed VVLTC: https://tests.stockfishchess.org/tests/view/6691acb2c6827afcdcee1645 LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 160314 W: 40925 L: 40854 D: 78535 Ptnml(0-2): 12, 14754, 50551, 14831, 9 closes https://github.com/official-stockfish/Stockfish/pull/5489 bench 1380295 --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index e34aabba..218d1ce4 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -82,7 +82,7 @@ constexpr int futility_move_count(bool improving, Depth depth) { // does not hit the tablebase range. Value to_corrected_static_eval(Value v, const Worker& w, const Position& pos) { auto cv = w.correctionHistory[pos.side_to_move()][pawn_structure_index(pos)]; - v += cv * std::abs(cv) / 5073; + v += 66 * cv / 512; return std::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1); } From c8d8e362fcf58238da07aeb31f6fa029cf9828c6 Mon Sep 17 00:00:00 2001 From: "Shahin M. Shahin" Date: Sun, 14 Jul 2024 21:36:19 +0300 Subject: [PATCH 197/315] Try nullmoves only on cutnodes since master only tries nullmoves on cutNodes already with 99.0224% of the cases running bench, We can try null moves at 100% of cutNodes and achieve such simplification, by making passing false already equivalent to passing !cutNode This is a more correct form of PR #5482 Passed non-regression STC: https://tests.stockfishchess.org/tests/view/66941c044ff211be9d4ebf5f LLR: 2.93 (-2.94,2.94) <-1.75,0.25> Total: 153216 W: 39856 L: 39764 D: 73596 Ptnml(0-2): 590, 18174, 38979, 18284, 581 Passed non-regression LTC: https://tests.stockfishchess.org/tests/view/6694e5cd4ff211be9d4ebfdf LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 67842 W: 17178 L: 17004 D: 33660 Ptnml(0-2): 52, 7437, 18759, 7631, 42 closes https://github.com/official-stockfish/Stockfish/pull/5490 bench: 1345400 Co-Authored-By: FauziAkram <11150271+fauziakram@users.noreply.github.com> --- src/search.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 218d1ce4..c03a30f5 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -791,7 +791,7 @@ Value Search::Worker::search( return beta + (eval - beta) / 3; // Step 9. Null move search with verification search (~35 Elo) - if (!PvNode && (ss - 1)->currentMove != Move::null() && (ss - 1)->statScore < 14389 + if (cutNode && (ss - 1)->currentMove != Move::null() && (ss - 1)->statScore < 14389 && eval >= beta && ss->staticEval >= beta - 21 * depth + 390 && !excludedMove && pos.non_pawn_material(us) && ss->ply >= thisThread->nmpMinPly && beta > VALUE_TB_LOSS_IN_MAX_PLY) @@ -806,7 +806,7 @@ Value Search::Worker::search( pos.do_null_move(st, tt); - Value nullValue = -search(pos, ss + 1, -beta, -beta + 1, depth - R, !cutNode); + Value nullValue = -search(pos, ss + 1, -beta, -beta + 1, depth - R, false); pos.undo_null_move(); From c2837769e0d43f1195081c2aa97b7028b27dee73 Mon Sep 17 00:00:00 2001 From: Linmiao Xu Date: Wed, 17 Jul 2024 00:15:44 -0400 Subject: [PATCH 198/315] Avoid calculating nnue complexity twice Passed non-regression STC: https://tests.stockfishchess.org/tests/view/6697459d4ff211be9d4ec236 LLR: 2.96 (-2.94,2.94) <-1.75,0.25> Total: 146848 W: 38289 L: 38189 D: 70370 Ptnml(0-2): 503, 16665, 39046, 16649, 561 closes https://github.com/official-stockfish/Stockfish/pull/5493 No functional change --- src/evaluate.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index 1cff6478..d0c553ff 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -66,19 +66,18 @@ Value Eval::evaluate(const Eval::NNUE::Networks& networks, auto [psqt, positional] = smallNet ? networks.small.evaluate(pos, &caches.small) : networks.big.evaluate(pos, &caches.big); - Value nnue = (125 * psqt + 131 * positional) / 128; - int nnueComplexity = std::abs(psqt - positional); + Value nnue = (125 * psqt + 131 * positional) / 128; // Re-evaluate the position when higher eval accuracy is worth the time spent if (smallNet && (nnue * simpleEval < 0 || std::abs(nnue) < 227)) { std::tie(psqt, positional) = networks.big.evaluate(pos, &caches.big); nnue = (125 * psqt + 131 * positional) / 128; - nnueComplexity = std::abs(psqt - positional); smallNet = false; } // Blend optimism and eval with nnue complexity + int nnueComplexity = std::abs(psqt - positional); optimism += optimism * nnueComplexity / (smallNet ? 433 : 453); nnue -= nnue * nnueComplexity / (smallNet ? 18815 : 17864); From a8401e803d37ec7dbf0650f4d79475214655477e Mon Sep 17 00:00:00 2001 From: Michael Chaly Date: Thu, 18 Jul 2024 16:30:42 +0300 Subject: [PATCH 199/315] Adjust bonus to move that caused a fail low This is an elo gainer and simultaneously a minor logical fix to bonuses that caused a fail low. It increases maximum of statscore based subtraction - but disallows negative bonuses. Passed STC: https://tests.stockfishchess.org/tests/view/66955e6f4ff211be9d4ec063 LLR: 2.94 (-2.94,2.94) <0.00,2.00> Total: 44640 W: 11805 L: 11472 D: 21363 Ptnml(0-2): 166, 5178, 11335, 5439, 202 Passed LTC: https://tests.stockfishchess.org/tests/view/66963fde4ff211be9d4ec190 LLR: 2.95 (-2.94,2.94) <0.50,2.50> Total: 72288 W: 18478 L: 18082 D: 35728 Ptnml(0-2): 50, 7919, 19825, 8285, 65 closes https://github.com/official-stockfish/Stockfish/pull/5494 Bench: 1477054 --- src/search.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index c03a30f5..945f8b40 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1368,7 +1368,9 @@ moves_loop: // When in check, search starts here + 32 * (!(ss - 1)->inCheck && bestValue > -(ss - 1)->staticEval + 76)); // Proportional to "how much damage we have to undo" - bonus += std::clamp(-(ss - 1)->statScore / 100, -64, 300); + bonus += std::clamp(-(ss - 1)->statScore / 100, -94, 300); + + bonus = std::max(bonus, 0); update_continuation_histories(ss - 1, pos.piece_on(prevSq), prevSq, stat_bonus(depth) * bonus / 100); From 1fb4dc2e0f0dbeddff889bcd75466e4be4fe1ad6 Mon Sep 17 00:00:00 2001 From: Joost VandeVondele Date: Fri, 19 Jul 2024 21:26:51 +0200 Subject: [PATCH 200/315] Enable syzygy in the matetrack action now checks correctness of PV lines with TB score. uses 3-4-5 man table bases, downloaded from lichess, which are cached with the appropriate action. closes https://github.com/official-stockfish/Stockfish/pull/5500 No functional change --- .github/workflows/matetrack.yml | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/.github/workflows/matetrack.yml b/.github/workflows/matetrack.yml index de65209f..dc8dff8d 100644 --- a/.github/workflows/matetrack.yml +++ b/.github/workflows/matetrack.yml @@ -24,15 +24,31 @@ jobs: with: repository: vondele/matetrack path: matetrack - ref: 20287a1a145f30a166b7ef251eddb611e4e44fbf + ref: 814160f82e6428ed2f6522dc06c2a6fa539cd413 persist-credentials: false - name: matetrack install deps working-directory: matetrack run: pip install -r requirements.txt + - name: cache syzygy + id: cache-syzygy + uses: actions/cache@v4 + with: + path: | + matetrack/3-4-5-wdl/ + matetrack/3-4-5-dtz/ + key: key-syzygy + + - name: download syzygy 3-4-5 if needed + working-directory: matetrack + if: steps.cache-syzygy.outputs.cache-hit != 'true' + run: | + wget --no-verbose -r -nH --cut-dirs=2 --no-parent --reject="index.html*" -e robots=off https://tablebase.lichess.ovh/tables/standard/3-4-5-wdl/ + wget --no-verbose -r -nH --cut-dirs=2 --no-parent --reject="index.html*" -e robots=off https://tablebase.lichess.ovh/tables/standard/3-4-5-dtz/ + - name: Run matetrack working-directory: matetrack run: | - python matecheck.py --engine /home/runner/work/Stockfish/Stockfish/Stockfish/src/stockfish --epdFile mates2000.epd --nodes 100000 | tee matecheckout.out + python matecheck.py --syzygyPath 3-4-5-wdl/:3-4-5-dtz/ --engine /home/runner/work/Stockfish/Stockfish/Stockfish/src/stockfish --epdFile mates2000.epd --nodes 100000 | tee matecheckout.out ! grep "issues were detected" matecheckout.out > /dev/null From e57fba7fc9be461cbb97c063b269a1e231cdd284 Mon Sep 17 00:00:00 2001 From: Joost VandeVondele Date: Sun, 21 Jul 2024 13:15:12 +0200 Subject: [PATCH 201/315] Fix TB PV extension and MultiPV in the case of MultiPV, the first move of the Nth multiPV could actually turn a winning position in a losing one, so don't attempt to correct it. Instead, always perform the first move without correction. Fixes #5505 Closes https://github.com/official-stockfish/Stockfish/pull/5506 No functional change --- src/search.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index 945f8b40..435af4b2 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1948,8 +1948,12 @@ void syzygy_extend_pv(const OptionsMap& options, std::list sts; + // Step 0, do the rootMove, no correction allowed, as needed for MultiPV in TB. + auto& stRoot = sts.emplace_back(); + pos.do_move(rootMove.pv[0], stRoot); + int ply = 1; + // Step 1, walk the PV to the last position in TB with correct decisive score - int ply = 0; while (size_t(ply) < rootMove.pv.size()) { Move& pvMove = rootMove.pv[ply]; From 703f17975bd9c29172a27f795ca6b5a7d0a32b25 Mon Sep 17 00:00:00 2001 From: Dubslow Date: Thu, 2 May 2024 05:35:15 -0500 Subject: [PATCH 202/315] Remove QS_CHECKS movepick stage Passed STC: https://tests.stockfishchess.org/tests/view/669597cf4ff211be9d4ec147 LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 199072 W: 52100 L: 52058 D: 94914 Ptnml(0-2): 829, 23679, 50406, 23865, 757 Passed LTC: https://tests.stockfishchess.org/tests/view/66988f5f4ff211be9d4ec33e LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 119778 W: 30420 L: 30299 D: 59059 Ptnml(0-2): 106, 13293, 32957, 13440, 93 closes https://github.com/official-stockfish/Stockfish/pull/5498 Bench 1499842 --- src/movegen.cpp | 54 ++++++++++++++---------------------------------- src/movegen.h | 1 - src/movepick.cpp | 22 +------------------- src/search.cpp | 10 ++------- src/types.h | 3 +-- 5 files changed, 19 insertions(+), 71 deletions(-) diff --git a/src/movegen.cpp b/src/movegen.cpp index e6923067..69b8fe6a 100644 --- a/src/movegen.cpp +++ b/src/movegen.cpp @@ -75,17 +75,6 @@ ExtMove* generate_pawn_moves(const Position& pos, ExtMove* moveList, Bitboard ta b2 &= target; } - if constexpr (Type == QUIET_CHECKS) - { - // To make a quiet check, you either make a direct check by pushing a pawn - // or push a blocker pawn that is not on the same file as the enemy king. - // Discovered check promotion has been already generated amongst the captures. - Square ksq = pos.square(Them); - Bitboard dcCandidatePawns = pos.blockers_for_king(Them) & ~file_bb(ksq); - b1 &= pawn_attacks_bb(Them, ksq) | shift(dcCandidatePawns); - b2 &= pawn_attacks_bb(Them, ksq) | shift(dcCandidatePawns); - } - while (b1) { Square to = pop_lsb(b1); @@ -158,7 +147,7 @@ ExtMove* generate_pawn_moves(const Position& pos, ExtMove* moveList, Bitboard ta } -template +template ExtMove* generate_moves(const Position& pos, ExtMove* moveList, Bitboard target) { static_assert(Pt != KING && Pt != PAWN, "Unsupported piece type in generate_moves()"); @@ -170,10 +159,6 @@ ExtMove* generate_moves(const Position& pos, ExtMove* moveList, Bitboard target) Square from = pop_lsb(bb); Bitboard b = attacks_bb(from, pos.pieces()) & target; - // To check, you either move freely a blocker or make a direct check. - if (Checks && (Pt == QUEEN || !(pos.blockers_for_king(~Us) & from))) - b &= pos.check_squares(Pt); - while (b) *moveList++ = Move(from, pop_lsb(b)); } @@ -187,9 +172,8 @@ ExtMove* generate_all(const Position& pos, ExtMove* moveList) { static_assert(Type != LEGAL, "Unsupported type in generate_all()"); - constexpr bool Checks = Type == QUIET_CHECKS; // Reduce template instantiations - const Square ksq = pos.square(Us); - Bitboard target; + const Square ksq = pos.square(Us); + Bitboard target; // Skip generating non-king moves when in double check if (Type != EVASIONS || !more_than_one(pos.checkers())) @@ -197,29 +181,24 @@ ExtMove* generate_all(const Position& pos, ExtMove* moveList) { target = Type == EVASIONS ? between_bb(ksq, lsb(pos.checkers())) : Type == NON_EVASIONS ? ~pos.pieces(Us) : Type == CAPTURES ? pos.pieces(~Us) - : ~pos.pieces(); // QUIETS || QUIET_CHECKS + : ~pos.pieces(); // QUIETS moveList = generate_pawn_moves(pos, moveList, target); - moveList = generate_moves(pos, moveList, target); - moveList = generate_moves(pos, moveList, target); - moveList = generate_moves(pos, moveList, target); - moveList = generate_moves(pos, moveList, target); + moveList = generate_moves(pos, moveList, target); + moveList = generate_moves(pos, moveList, target); + moveList = generate_moves(pos, moveList, target); + moveList = generate_moves(pos, moveList, target); } - if (!Checks || pos.blockers_for_king(~Us) & ksq) - { - Bitboard b = attacks_bb(ksq) & (Type == EVASIONS ? ~pos.pieces(Us) : target); - if (Checks) - b &= ~attacks_bb(pos.square(~Us)); + Bitboard b = attacks_bb(ksq) & (Type == EVASIONS ? ~pos.pieces(Us) : target); - while (b) - *moveList++ = Move(ksq, pop_lsb(b)); + while (b) + *moveList++ = Move(ksq, pop_lsb(b)); - if ((Type == QUIETS || Type == NON_EVASIONS) && pos.can_castle(Us & ANY_CASTLING)) - for (CastlingRights cr : {Us & KING_SIDE, Us & QUEEN_SIDE}) - if (!pos.castling_impeded(cr) && pos.can_castle(cr)) - *moveList++ = Move::make(ksq, pos.castling_rook_square(cr)); - } + if ((Type == QUIETS || Type == NON_EVASIONS) && pos.can_castle(Us & ANY_CASTLING)) + for (CastlingRights cr : {Us & KING_SIDE, Us & QUEEN_SIDE}) + if (!pos.castling_impeded(cr) && pos.can_castle(cr)) + *moveList++ = Move::make(ksq, pos.castling_rook_square(cr)); return moveList; } @@ -231,8 +210,6 @@ ExtMove* generate_all(const Position& pos, ExtMove* moveList) { // Generates all pseudo-legal non-captures and underpromotions // Generates all pseudo-legal check evasions // Generates all pseudo-legal captures and non-captures -// Generates all pseudo-legal non-captures giving check, -// except castling and promotions // // Returns a pointer to the end of the move list. template @@ -251,7 +228,6 @@ ExtMove* generate(const Position& pos, ExtMove* moveList) { template ExtMove* generate(const Position&, ExtMove*); template ExtMove* generate(const Position&, ExtMove*); template ExtMove* generate(const Position&, ExtMove*); -template ExtMove* generate(const Position&, ExtMove*); template ExtMove* generate(const Position&, ExtMove*); diff --git a/src/movegen.h b/src/movegen.h index 5f650d2e..f067f880 100644 --- a/src/movegen.h +++ b/src/movegen.h @@ -31,7 +31,6 @@ class Position; enum GenType { CAPTURES, QUIETS, - QUIET_CHECKS, EVASIONS, NON_EVASIONS, LEGAL diff --git a/src/movepick.cpp b/src/movepick.cpp index 55bacf6e..81384328 100644 --- a/src/movepick.cpp +++ b/src/movepick.cpp @@ -52,9 +52,7 @@ enum Stages { // generate qsearch moves QSEARCH_TT, QCAPTURE_INIT, - QCAPTURE, - QCHECK_INIT, - QCHECK + QCAPTURE }; // Sort moves in descending order up to and including a given limit. @@ -316,24 +314,6 @@ top: return select([&]() { return pos.see_ge(*cur, threshold); }); case QCAPTURE : - if (select([]() { return true; })) - return *(cur - 1); - - // If we found no move and the depth is too low to try checks, then we have finished - if (depth <= DEPTH_QS_NORMAL) - return Move::none(); - - ++stage; - [[fallthrough]]; - - case QCHECK_INIT : - cur = moves; - endMoves = generate(pos, cur); - - ++stage; - [[fallthrough]]; - - case QCHECK : return select([]() { return true; }); } diff --git a/src/search.cpp b/src/search.cpp index 435af4b2..fd9fa6da 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1475,12 +1475,6 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta, assert(0 <= ss->ply && ss->ply < MAX_PLY); - // Note that unlike regular search, which stores the literal depth into the - // transposition table, from qsearch we only store the current movegen stage - // as "depth". If in check, we search all evasions and thus store DEPTH_QS_CHECKS. - // Evasions may be quiet, and _CHECKS includes quiets. - Depth qsTtDepth = ss->inCheck || depth >= DEPTH_QS_CHECKS ? DEPTH_QS_CHECKS : DEPTH_QS_NORMAL; - // Step 3. Transposition table lookup posKey = pos.key(); auto [ttHit, ttData, ttWriter] = tt.probe(posKey); @@ -1491,7 +1485,7 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta, pvHit = ttHit && ttData.is_pv; // At non-PV nodes we check for an early TT cutoff - if (!PvNode && ttData.depth >= qsTtDepth + if (!PvNode && ttData.depth >= DEPTH_QS && ttData.value != VALUE_NONE // Can happen when !ttHit or when access race in probe() && (ttData.bound & (ttData.value >= beta ? BOUND_LOWER : BOUND_UPPER))) return ttData.value; @@ -1674,7 +1668,7 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta, // Save gathered info in transposition table. The static evaluation // is saved as it was before adjustment by correction history. ttWriter.write(posKey, value_to_tt(bestValue, ss->ply), pvHit, - bestValue >= beta ? BOUND_LOWER : BOUND_UPPER, qsTtDepth, bestMove, + bestValue >= beta ? BOUND_LOWER : BOUND_UPPER, DEPTH_QS, bestMove, unadjustedStaticEval, tt.generation()); assert(bestValue > -VALUE_INFINITE && bestValue < VALUE_INFINITE); diff --git a/src/types.h b/src/types.h index 8a9400bb..b12491d6 100644 --- a/src/types.h +++ b/src/types.h @@ -194,8 +194,7 @@ enum : int { // quiescence search, however, the transposition table entries only store // the current quiescence move generation stage (which should thus compare // lower than any regular search depth). - DEPTH_QS_CHECKS = 0, - DEPTH_QS_NORMAL = -1, + DEPTH_QS = 0, // For transposition table entries where no searching at all was done // (whether regular or qsearch) we use DEPTH_UNSEARCHED, which should thus // compare lower than any quiescence or regular depth. DEPTH_ENTRY_OFFSET From a2ba3e33628bed0930f50c54a5ae4f30b853b3b8 Mon Sep 17 00:00:00 2001 From: FauziAkram Date: Sat, 20 Jul 2024 13:34:27 +0300 Subject: [PATCH 203/315] Bonus Simplification This tune removes completely a recently added term. Passed STC: LLR: 2.93 (-2.94,2.94) <0.00,2.00> Total: 61376 W: 16046 L: 15693 D: 29637 Ptnml(0-2): 207, 7132, 15665, 7469, 215 https://tests.stockfishchess.org/tests/view/669512b94ff211be9d4ebffb Passed LTC: LLR: 2.94 (-2.94,2.94) <0.50,2.50> Total: 100662 W: 25474 L: 25020 D: 50168 Ptnml(0-2): 64, 11092, 27581, 11514, 80 https://tests.stockfishchess.org/tests/view/66955f194ff211be9d4ec06a Passed LTC#2: LLR: 2.96 (-2.94,2.94) <-1.75,0.25> Total: 28056 W: 7128 L: 6909 D: 14019 Ptnml(0-2): 18, 3084, 7620, 3273, 33 https://tests.stockfishchess.org/tests/view/669a541a4ff211be9d4ec52b closes https://github.com/official-stockfish/Stockfish/pull/5502 bench: 1619438 --- src/search.cpp | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index fd9fa6da..f51a7499 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -189,7 +189,7 @@ void Search::Worker::start_searching() { {} // Busy wait for a stop or a ponder reset // Stop the threads if not already stopped (also raise the stop if - // "ponderhit" just reset threads.ponder). + // "ponderhit" just reset threads.ponder) threads.stop = true; // Wait until all threads have finished @@ -1362,20 +1362,19 @@ moves_loop: // When in check, search starts here // Bonus for prior countermove that caused the fail low else if (!priorCapture && prevSq != SQ_NONE) { - int bonus = (138 * (depth > 5) + 58 * (PvNode || cutNode) + 160 * ((ss - 1)->moveCount > 8) - + 84 * (!ss->inCheck && bestValue <= ss->staticEval - 108) - + 153 * (!(ss - 1)->inCheck && bestValue <= -(ss - 1)->staticEval - 76) - + 32 * (!(ss - 1)->inCheck && bestValue > -(ss - 1)->staticEval + 76)); + int bonus = (122 * (depth > 5) + 39 * (PvNode || cutNode) + 165 * ((ss - 1)->moveCount > 8) + + 107 * (!ss->inCheck && bestValue <= ss->staticEval - 98) + + 134 * (!(ss - 1)->inCheck && bestValue <= -(ss - 1)->staticEval - 91)); // Proportional to "how much damage we have to undo" - bonus += std::clamp(-(ss - 1)->statScore / 100, -94, 300); + bonus += std::clamp(-(ss - 1)->statScore / 100, -94, 304); bonus = std::max(bonus, 0); update_continuation_histories(ss - 1, pos.piece_on(prevSq), prevSq, - stat_bonus(depth) * bonus / 100); + stat_bonus(depth) * bonus / 116); thisThread->mainHistory[~us][((ss - 1)->currentMove).from_to()] - << stat_bonus(depth) * bonus / 200; + << stat_bonus(depth) * bonus / 180; if (type_of(pos.piece_on(prevSq)) != PAWN && ((ss - 1)->currentMove).type_of() != PROMOTION) From 986173264f4c03e3750bd68f904bfdf1152437d4 Mon Sep 17 00:00:00 2001 From: FauziAkram Date: Sun, 21 Jul 2024 18:52:26 +0300 Subject: [PATCH 204/315] Adding LowestElo and HighestElo constants These values represent the lowest Elo rating in the skill level calculation, and the highest one, but it's not clear from the code where these values come from other than the comment. This should improve code readability and maintainability. It makes the purpose of the values clear and allows for easy modification if the Elo range for skill level calculation changes in the future. Moved the Skill struct definition from search.cpp to search.h header file to define the Search::Skill struct, making it accessible from other files. closes https://github.com/official-stockfish/Stockfish/pull/5508 No functional change --- src/engine.cpp | 4 +++- src/search.cpp | 25 ------------------------- src/search.h | 29 +++++++++++++++++++++++++++++ 3 files changed, 32 insertions(+), 26 deletions(-) diff --git a/src/engine.cpp b/src/engine.cpp index 41b19ac6..498b7c3e 100644 --- a/src/engine.cpp +++ b/src/engine.cpp @@ -91,7 +91,9 @@ Engine::Engine(std::string path) : options["nodestime"] << Option(0, 0, 10000); options["UCI_Chess960"] << Option(false); options["UCI_LimitStrength"] << Option(false); - options["UCI_Elo"] << Option(1320, 1320, 3190); + options["UCI_Elo"] << Option(Stockfish::Search::Skill::LowestElo, + Stockfish::Search::Skill::LowestElo, + Stockfish::Search::Skill::HighestElo); options["UCI_ShowWDL"] << Option(false); options["SyzygyPath"] << Option("", [](const Option& o) { Tablebases::init(o); diff --git a/src/search.cpp b/src/search.cpp index f51a7499..0d9824b7 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -94,31 +94,6 @@ int stat_malus(Depth d) { return std::min(736 * d - 268, 2044); } // Add a small random component to draw evaluations to avoid 3-fold blindness Value value_draw(size_t nodes) { return VALUE_DRAW - 1 + Value(nodes & 0x2); } - -// Skill structure is used to implement strength limit. If we have a UCI_Elo, -// we convert it to an appropriate skill level, anchored to the Stash engine. -// This method is based on a fit of the Elo results for games played between -// Stockfish at various skill levels and various versions of the Stash engine. -// Skill 0 .. 19 now covers CCRL Blitz Elo from 1320 to 3190, approximately -// Reference: https://github.com/vondele/Stockfish/commit/a08b8d4e9711c2 -struct Skill { - Skill(int skill_level, int uci_elo) { - if (uci_elo) - { - double e = double(uci_elo - 1320) / (3190 - 1320); - level = std::clamp((((37.2473 * e - 40.8525) * e + 22.2943) * e - 0.311438), 0.0, 19.0); - } - else - level = double(skill_level); - } - bool enabled() const { return level < 20.0; } - bool time_to_pick(Depth depth) const { return depth == 1 + int(level); } - Move pick_best(const RootMoves&, size_t multiPV); - - double level; - Move best = Move::none(); -}; - Value value_to_tt(Value v, int ply); Value value_from_tt(Value v, int ply, int r50c); void update_pv(Move* pv, Move move, const Move* childPv); diff --git a/src/search.h b/src/search.h index 65394bc0..d42b5fba 100644 --- a/src/search.h +++ b/src/search.h @@ -19,6 +19,7 @@ #ifndef SEARCH_H_INCLUDED #define SEARCH_H_INCLUDED +#include #include #include #include @@ -180,6 +181,34 @@ struct InfoIteration { size_t currmovenumber; }; +// Skill structure is used to implement strength limit. If we have a UCI_Elo, +// we convert it to an appropriate skill level, anchored to the Stash engine. +// This method is based on a fit of the Elo results for games played between +// Stockfish at various skill levels and various versions of the Stash engine. +// Skill 0 .. 19 now covers CCRL Blitz Elo from 1320 to 3190, approximately +// Reference: https://github.com/vondele/Stockfish/commit/a08b8d4e9711c2 +struct Skill { + // Lowest and highest Elo ratings used in the skill level calculation + constexpr static int LowestElo = 1320; + constexpr static int HighestElo = 3190; + + Skill(int skill_level, int uci_elo) { + if (uci_elo) + { + double e = double(uci_elo - LowestElo) / (HighestElo - LowestElo); + level = std::clamp((((37.2473 * e - 40.8525) * e + 22.2943) * e - 0.311438), 0.0, 19.0); + } + else + level = double(skill_level); + } + bool enabled() const { return level < 20.0; } + bool time_to_pick(Depth depth) const { return depth == 1 + int(level); } + Move pick_best(const RootMoves&, size_t multiPV); + + double level; + Move best = Move::none(); +}; + // SearchManager manages the search from the main thread. It is responsible for // keeping track of the time, and storing data strictly related to the main thread. class SearchManager: public ISearchManager { From bb4b01e3063d5ad19679d51140d8e9f0599ac538 Mon Sep 17 00:00:00 2001 From: "Shahin M. Shahin" Date: Fri, 19 Jul 2024 13:27:30 +0300 Subject: [PATCH 205/315] Fix TB guard even if beta is below TB range, once we return probcutBeta with beta + 390 we can return wrong TB value, and guard against ttData.value being `VALUE_NONE` closes https://github.com/official-stockfish/Stockfish/pull/5499 bench: 1440277 --- src/search.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index 0d9824b7..ca346502 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -894,7 +894,8 @@ moves_loop: // When in check, search starts here // Step 12. A small Probcut idea (~4 Elo) probCutBeta = beta + 390; if ((ttData.bound & BOUND_LOWER) && ttData.depth >= depth - 4 && ttData.value >= probCutBeta - && std::abs(beta) < VALUE_TB_WIN_IN_MAX_PLY) + && std::abs(beta) < VALUE_TB_WIN_IN_MAX_PLY + && std::abs(ttData.value) < VALUE_TB_WIN_IN_MAX_PLY) return probCutBeta; const PieceToHistory* contHist[] = {(ss - 1)->continuationHistory, From 1e2f0511033945d07e1c8856980ed72cdbe42822 Mon Sep 17 00:00:00 2001 From: Linmiao Xu Date: Wed, 17 Jul 2024 00:03:10 -0400 Subject: [PATCH 206/315] Replace simple eval with psqt in re-eval condition As a result, re-eval depends only on smallnet outputs so an extra call to simple eval can be removed. Passed non-regression STC: https://tests.stockfishchess.org/tests/view/669743054ff211be9d4ec232 LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 214912 W: 55801 L: 55777 D: 103334 Ptnml(0-2): 746, 24597, 56760, 24593, 760 https://github.com/official-stockfish/Stockfish/pull/5501 Bench: 1440277 --- src/evaluate.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index d0c553ff..221ccde8 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -59,8 +59,7 @@ Value Eval::evaluate(const Eval::NNUE::Networks& networks, assert(!pos.checkers()); - int simpleEval = simple_eval(pos, pos.side_to_move()); - bool smallNet = use_smallnet(pos); + bool smallNet = use_smallnet(pos); int v; auto [psqt, positional] = smallNet ? networks.small.evaluate(pos, &caches.small) @@ -69,7 +68,7 @@ Value Eval::evaluate(const Eval::NNUE::Networks& networks, Value nnue = (125 * psqt + 131 * positional) / 128; // Re-evaluate the position when higher eval accuracy is worth the time spent - if (smallNet && (nnue * simpleEval < 0 || std::abs(nnue) < 227)) + if (smallNet && (nnue * psqt < 0 || std::abs(nnue) < 227)) { std::tie(psqt, positional) = networks.big.evaluate(pos, &caches.big); nnue = (125 * psqt + 131 * positional) / 128; From 985b9fd7b05d1d81be7a1ac90862a5790ee56176 Mon Sep 17 00:00:00 2001 From: Shawn Xu Date: Sat, 20 Jul 2024 12:40:16 -0700 Subject: [PATCH 207/315] Remove Killer Heuristic In Move Ordering Passed Non-regression STC: LLR: 2.96 (-2.94,2.94) <-1.75,0.25> Total: 80480 W: 20979 L: 20802 D: 38699 Ptnml(0-2): 279, 9610, 20337, 9683, 331 https://tests.stockfishchess.org/tests/view/669c12c14ff211be9d4ec69b Passed Non-regression LTC: LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 77988 W: 19788 L: 19624 D: 38576 Ptnml(0-2): 66, 8605, 21481, 8783, 59 https://tests.stockfishchess.org/tests/view/669d628a4ff211be9d4ec7a8 closes https://github.com/official-stockfish/Stockfish/pull/5511 bench 1367740 --- src/movepick.cpp | 6 +----- src/movepick.h | 5 ++--- src/search.cpp | 2 +- 3 files changed, 4 insertions(+), 9 deletions(-) diff --git a/src/movepick.cpp b/src/movepick.cpp index 81384328..7bd0252c 100644 --- a/src/movepick.cpp +++ b/src/movepick.cpp @@ -86,15 +86,13 @@ MovePicker::MovePicker(const Position& p, const ButterflyHistory* mh, const CapturePieceToHistory* cph, const PieceToHistory** ch, - const PawnHistory* ph, - Move km) : + const PawnHistory* ph) : pos(p), mainHistory(mh), captureHistory(cph), continuationHistory(ch), pawnHistory(ph), ttMove(ttm), - killer(km), depth(d) { if (pos.checkers()) @@ -164,8 +162,6 @@ void MovePicker::score() { m.value += (*continuationHistory[3])[pc][to]; m.value += (*continuationHistory[5])[pc][to]; - m.value += (m == killer) * 65536; - // bonus for checks m.value += bool(pos.check_squares(pt) & to) * 16384; diff --git a/src/movepick.h b/src/movepick.h index 92e11de2..671cbb9c 100644 --- a/src/movepick.h +++ b/src/movepick.h @@ -159,8 +159,7 @@ class MovePicker { const ButterflyHistory*, const CapturePieceToHistory*, const PieceToHistory**, - const PawnHistory*, - Move killer = Move::none()); + const PawnHistory*); MovePicker(const Position&, Move, int, const CapturePieceToHistory*); Move next_move(bool skipQuiets = false); @@ -177,7 +176,7 @@ class MovePicker { const CapturePieceToHistory* captureHistory; const PieceToHistory** continuationHistory; const PawnHistory* pawnHistory; - Move ttMove, killer; + Move ttMove; ExtMove * cur, *endMoves, *endBadCaptures, *beginBadQuiets, *endBadQuiets; int stage; int threshold; diff --git a/src/search.cpp b/src/search.cpp index ca346502..233dc4f7 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -907,7 +907,7 @@ moves_loop: // When in check, search starts here MovePicker mp(pos, ttData.move, depth, &thisThread->mainHistory, &thisThread->captureHistory, - contHist, &thisThread->pawnHistory, ss->killer); + contHist, &thisThread->pawnHistory); value = bestValue; From 836154acb5ba447a46196a64d6bbab5a5b31ea1b Mon Sep 17 00:00:00 2001 From: Michael Chaly Date: Tue, 23 Jul 2024 16:36:49 +0300 Subject: [PATCH 208/315] Introduce pre-qsearch ttmove extensions at pv nodes The idea is that we are about to dive into qsearch (next search depth is <= 0) but since we have the move in transposition table we should extend that move and evaluate it with more precise search - because branch seems important. Passed STC: https://tests.stockfishchess.org/tests/view/6699d2564ff211be9d4ec488 LLR: 2.93 (-2.94,2.94) <0.00,2.00> Total: 83104 W: 21789 L: 21401 D: 39914 Ptnml(0-2): 293, 9748, 21128, 10044, 339 Passed LTC: https://tests.stockfishchess.org/tests/view/669b3f1a4ff211be9d4ec602 LLR: 2.94 (-2.94,2.94) <0.50,2.50> Total: 136098 W: 34636 L: 34111 D: 67351 Ptnml(0-2): 105, 14882, 37550, 15407, 105 closes https://github.com/official-stockfish/Stockfish/pull/5512 bench 1526129 --- src/search.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/search.cpp b/src/search.cpp index 233dc4f7..5e260247 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1206,6 +1206,10 @@ moves_loop: // When in check, search starts here (ss + 1)->pv = pv; (ss + 1)->pv[0] = Move::none(); + // Extend move from transposition table if we are about to dive into qsearch. + if (move == ttData.move && ss->ply <= thisThread->rootDepth * 2) + newDepth = std::max(newDepth, 1); + value = -search(pos, ss + 1, -beta, -alpha, newDepth, false); } From b55217fd02d8e5bc0754e5f27bc84df7b01479a6 Mon Sep 17 00:00:00 2001 From: Linmiao Xu Date: Mon, 15 Jul 2024 11:39:02 -0400 Subject: [PATCH 209/315] Update default main net to nn-31337bea577c.nnue Created by updating output weights (256) and biases (8) of the previous main net with values found with spsa around 101k / 120k games at 140+1.4. 264 spsa params: output weights and biases in nn-e8bac1c07a5a.nnue A: 6000, alpha: 0.602, gamma: 0.101 weights: [-127, 127], c_end = 6 biases: [-8192, 8192], c_end = 64 Among the 264 params, 189 weights and all 8 biases were changed. Changes in the weights: - mean: -0.111 +/- 3.57 - range: [-8, 8] Found with the same method as: https://github.com/official-stockfish/Stockfish/pull/5459 Due to the original name (nn-ea8c9128c325.nnue) being too similar to the previous main net (nn-e8bac1c07a5a.nnue) and creating confusion, it was renamed by making non-functional changes to the .nnue file the same way as past nets with: https://github.com/linrock/nnue-namer To verify that bench is the same and view the modified non-functional bytes: ``` echo -e "setoption name EvalFile value nn-ea8c9128c325.nnue\nbench" | ./stockfish echo -e "setoption name EvalFile value nn-31337bea577c.nnue\nbench" | ./stockfish cmp -l nn-ea8c9128c325.nnue nn-31337bea577c.nnue diff <(xxd nn-ea8c9128c325.nnue) <(xxd nn-31337bea577c.nnue) ``` Passed STC: https://tests.stockfishchess.org/tests/view/669564154ff211be9d4ec080 LLR: 2.93 (-2.94,2.94) <0.00,2.00> Total: 57280 W: 15139 L: 14789 D: 27352 Ptnml(0-2): 209, 6685, 14522, 6995, 229 Passed LTC: https://tests.stockfishchess.org/tests/view/669694204ff211be9d4ec1b4 LLR: 2.94 (-2.94,2.94) <0.50,2.50> Total: 63030 W: 16093 L: 15720 D: 31217 Ptnml(0-2): 47, 6766, 17516, 7139, 47 closes https://github.com/official-stockfish/Stockfish/pull/5509 bench 1371485 --- src/evaluate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/evaluate.h b/src/evaluate.h index 4b5f447e..55838243 100644 --- a/src/evaluate.h +++ b/src/evaluate.h @@ -33,7 +33,7 @@ namespace Eval { // for the build process (profile-build and fishtest) to work. Do not change the // name of the macro or the location where this macro is defined, as it is used // in the Makefile/Fishtest. -#define EvalFileDefaultNameBig "nn-e8bac1c07a5a.nnue" +#define EvalFileDefaultNameBig "nn-31337bea577c.nnue" #define EvalFileDefaultNameSmall "nn-37f18f62d772.nnue" namespace NNUE { From 85893ac1cd1933f9d24700026972b278f5a37b9c Mon Sep 17 00:00:00 2001 From: Shawn Xu Date: Sat, 20 Jul 2024 12:41:56 -0700 Subject: [PATCH 210/315] Simplify Away Killer Condition in Cutnode LMR Passed Non-regression STC: LLR: 2.93 (-2.94,2.94) <-1.75,0.25> Total: 42944 W: 11240 L: 11024 D: 20680 Ptnml(0-2): 159, 5056, 10825, 5274, 158 https://tests.stockfishchess.org/tests/view/669c13384ff211be9d4ec69f Passed Non-regression LTC: LLR: 2.96 (-2.94,2.94) <-1.75,0.25> Total: 163548 W: 41366 L: 41289 D: 80893 Ptnml(0-2): 109, 18246, 45007, 18283, 129 https://tests.stockfishchess.org/tests/view/669cb1254ff211be9d4ec73a closes https://github.com/official-stockfish/Stockfish/pull/5513 Bench: 1178570 --- src/search.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 5e260247..e2df475e 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1131,8 +1131,7 @@ moves_loop: // When in check, search starts here // Increase reduction for cut nodes (~4 Elo) if (cutNode) - r += 2 - (ttData.depth >= depth && ss->ttPv) - + (!ss->ttPv && move != ttData.move && move != ss->killer); + r += 2 - (ttData.depth >= depth && ss->ttPv) + (!ss->ttPv && move != ttData.move); // Increase reduction if ttMove is a capture (~3 Elo) if (ttCapture) From 607c3e404fc706d09bd3b276ddd563d636823533 Mon Sep 17 00:00:00 2001 From: Michael Chaly Date: Wed, 24 Jul 2024 18:25:08 +0300 Subject: [PATCH 211/315] Remove unneeded depth tracking in qsearch Since simplification of quiet checks in qsearch this depth isn't used by any function at all apart movepicker, which also doesn't use passed qsearch depth in any way, so can be removed. No functional change. closes https://github.com/official-stockfish/Stockfish/pull/5514 No functional change --- src/search.cpp | 7 +++---- src/search.h | 2 +- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index e2df475e..09004ba6 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1401,14 +1401,13 @@ moves_loop: // When in check, search starts here // See https://www.chessprogramming.org/Horizon_Effect // and https://www.chessprogramming.org/Quiescence_Search template -Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth) { +Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta) { static_assert(nodeType != Root); constexpr bool PvNode = nodeType == PV; assert(alpha >= -VALUE_INFINITE && alpha < beta && beta <= VALUE_INFINITE); assert(PvNode || (alpha == beta - 1)); - assert(depth <= 0); // Check if we have an upcoming move that draws by repetition (~1 Elo) if (alpha < VALUE_DRAW && pos.upcoming_repetition(ss->ply)) @@ -1526,7 +1525,7 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta, // first captures+checks, then captures only (but when in check, we simply search // all evasions). Square prevSq = ((ss - 1)->currentMove).is_ok() ? ((ss - 1)->currentMove).to_sq() : SQ_NONE; - MovePicker mp(pos, ttData.move, depth, &thisThread->mainHistory, &thisThread->captureHistory, + MovePicker mp(pos, ttData.move, DEPTH_QS, &thisThread->mainHistory, &thisThread->captureHistory, contHist, &thisThread->pawnHistory); // Step 5. Loop through all pseudo-legal moves until no moves remain or a beta @@ -1606,7 +1605,7 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta, // Step 7. Make and search the move thisThread->nodes.fetch_add(1, std::memory_order_relaxed); pos.do_move(move, st, givesCheck); - value = -qsearch(pos, ss + 1, -beta, -alpha, depth - 1); + value = -qsearch(pos, ss + 1, -beta, -alpha); pos.undo_move(move); assert(value > -VALUE_INFINITE && value < VALUE_INFINITE); diff --git a/src/search.h b/src/search.h index d42b5fba..4872a58a 100644 --- a/src/search.h +++ b/src/search.h @@ -291,7 +291,7 @@ class Worker { // Quiescence search function, which is called by the main search template - Value qsearch(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth = 0); + Value qsearch(Position& pos, Stack* ss, Value alpha, Value beta); Depth reduction(bool i, Depth d, int mn, int delta) const; From af802da65b595f67046e97d580479ef1f7b18cab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ste=CC=81phane=20Nicolet?= Date: Fri, 26 Jul 2024 11:13:37 +0200 Subject: [PATCH 212/315] Clean up comments for movepicker Remove references to checks in MovePicker comments. Follow-up for https://github.com/official-stockfish/Stockfish/pull/5498 closes https://github.com/official-stockfish/Stockfish/pull/5516 No functional change --- src/movepick.cpp | 26 ++++++++++++-------------- src/movepick.h | 12 ++++++------ src/search.cpp | 14 +++++++------- 3 files changed, 25 insertions(+), 27 deletions(-) diff --git a/src/movepick.cpp b/src/movepick.cpp index 7bd0252c..bdc0e4af 100644 --- a/src/movepick.cpp +++ b/src/movepick.cpp @@ -74,12 +74,10 @@ void partial_insertion_sort(ExtMove* begin, ExtMove* end, int limit) { // Constructors of the MovePicker class. As arguments, we pass information -// to help it return the (presumably) good moves first, to decide which -// moves to return (in the quiescence search, for instance, we only want to -// search captures, promotions, and some checks) and how important a good -// move ordering is at the current node. +// to decide which class of moves to emit, to help sorting the (presumably) +// good moves first, and how important move ordering is at the current node. -// MovePicker constructor for the main search +// MovePicker constructor for the main search and for the quiescence search MovePicker::MovePicker(const Position& p, Move ttm, Depth d, @@ -102,8 +100,8 @@ MovePicker::MovePicker(const Position& p, stage = (depth > 0 ? MAIN_TT : QSEARCH_TT) + !(ttm && pos.pseudo_legal(ttm)); } -// Constructor for ProbCut: we generate captures with SEE greater than or equal -// to the given threshold. +// MovePicker constructor for ProbCut: we generate captures with Static Exchange +// Evaluation (SEE) greater than or equal to the given threshold. MovePicker::MovePicker(const Position& p, Move ttm, int th, const CapturePieceToHistory* cph) : pos(p), captureHistory(cph), @@ -115,9 +113,9 @@ MovePicker::MovePicker(const Position& p, Move ttm, int th, const CapturePieceTo + !(ttm && pos.capture_stage(ttm) && pos.pseudo_legal(ttm) && pos.see_ge(ttm, threshold)); } -// Assigns a numerical value to each move in a list, used -// for sorting. Captures are ordered by Most Valuable Victim (MVV), preferring -// captures with a good history. Quiets moves are ordered using the history tables. +// Assigns a numerical value to each move in a list, used for sorting. +// Captures are ordered by Most Valuable Victim (MVV), preferring captures +// with a good history. Quiets moves are ordered using the history tables. template void MovePicker::score() { @@ -191,7 +189,7 @@ void MovePicker::score() { } // Returns the next move satisfying a predicate function. -// It never returns the TT move. +// This never returns the TT move, as it was emitted before. template Move MovePicker::select(Pred filter) { @@ -208,9 +206,9 @@ Move MovePicker::select(Pred filter) { return Move::none(); } -// Most important method of the MovePicker class. It -// returns a new pseudo-legal move every time it is called until there are no more -// moves left, picking the move with the highest score from a list of generated moves. +// This is the most important method of the MovePicker class. We emit one +// new pseudo-legal move on every call until there are no more moves left, +// picking the move with the highest score from a list of generated moves. Move MovePicker::next_move(bool skipQuiets) { auto quiet_threshold = [](Depth d) { return -3560 * d; }; diff --git a/src/movepick.h b/src/movepick.h index 671cbb9c..61f6368e 100644 --- a/src/movepick.h +++ b/src/movepick.h @@ -137,12 +137,12 @@ using PawnHistory = Stats using CorrectionHistory = Stats; -// MovePicker class is used to pick one pseudo-legal move at a time from the -// current position. The most important method is next_move(), which returns a -// new pseudo-legal move each time it is called, until there are no moves left, -// when Move::none() is returned. In order to improve the efficiency of the -// alpha-beta algorithm, MovePicker attempts to return the moves which are most -// likely to get a cut-off first. +// The MovePicker class is used to pick one pseudo-legal move at a time from the +// current position. The most important method is next_move(), which emits one +// new pseudo-legal move on every call, until there are no moves left, when +// Move::none() is returned. In order to improve the efficiency of the alpha-beta +// algorithm, MovePicker attempts to return the moves which are most likely to get +// a cut-off first. class MovePicker { enum PickType { diff --git a/src/search.cpp b/src/search.cpp index 09004ba6..e8303456 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -20,18 +20,18 @@ #include #include -#include #include #include +#include #include #include #include #include +#include +#include +#include #include #include -#include -#include -#include #include "evaluate.h" #include "misc.h" @@ -1520,11 +1520,11 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta) const PieceToHistory* contHist[] = {(ss - 1)->continuationHistory, (ss - 2)->continuationHistory}; + Square prevSq = ((ss - 1)->currentMove).is_ok() ? ((ss - 1)->currentMove).to_sq() : SQ_NONE; + // Initialize a MovePicker object for the current position, and prepare to search // the moves. We presently use two stages of move generator in quiescence search: - // first captures+checks, then captures only (but when in check, we simply search - // all evasions). - Square prevSq = ((ss - 1)->currentMove).is_ok() ? ((ss - 1)->currentMove).to_sq() : SQ_NONE; + // captures, or evasions only when in check. MovePicker mp(pos, ttData.move, DEPTH_QS, &thisThread->mainHistory, &thisThread->captureHistory, contHist, &thisThread->pawnHistory); From 2343f71f3ff524e937f81b2922705081f8907980 Mon Sep 17 00:00:00 2001 From: Shawn Xu Date: Sat, 20 Jul 2024 12:41:56 -0700 Subject: [PATCH 213/315] Remove Killers The removal of killers on line 1774 resulted in a substantial decrease in pre-LMR history average, so a negative history fill is applied to counter it. Passed Non-regression STC (vs #5513): LLR: 2.95 (-2.94,2.94) <-1.75,0.25> Total: 21984 W: 5886 L: 5645 D: 10453 Ptnml(0-2): 80, 2492, 5628, 2691, 101 https://tests.stockfishchess.org/tests/view/66a095894ff211be9d4ecb9d Passed Non-regression LTC (vs #5513): LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 95430 W: 24141 L: 23995 D: 47294 Ptnml(0-2): 97, 10537, 26298, 10689, 94 https://tests.stockfishchess.org/tests/view/66a11c8d4ff211be9d4ecbf8 closes https://github.com/official-stockfish/Stockfish/pull/5517 Bench: 1660869 --- src/search.cpp | 34 +++++++--------------------------- src/search.h | 1 - 2 files changed, 7 insertions(+), 28 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index e8303456..f20bd4c9 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -98,11 +98,8 @@ Value value_to_tt(Value v, int ply); Value value_from_tt(Value v, int ply, int r50c); void update_pv(Move* pv, Move move, const Move* childPv); void update_continuation_histories(Stack* ss, Piece pc, Square to, int bonus); -void update_killer(Stack* ss, Move move); void update_quiet_histories( const Position& pos, Stack* ss, Search::Worker& workerThread, Move move, int bonus); -void update_quiet_stats( - const Position& pos, Stack* ss, Search::Worker& workerThread, Move move, int bonus); void update_all_stats(const Position& pos, Stack* ss, Search::Worker& workerThread, @@ -222,7 +219,7 @@ void Search::Worker::iterative_deepening() { // Allocate stack with extra size to allow access from (ss - 7) to (ss + 2): // (ss - 7) is needed for update_continuation_histories(ss - 1) which accesses (ss - 6), - // (ss + 2) is needed for initialization of cutOffCnt and killers. + // (ss + 2) is needed for initialization of cutOffCnt. Stack stack[MAX_PLY + 10] = {}; Stack* ss = stack + 7; @@ -490,7 +487,7 @@ void Search::Worker::clear() { for (StatsType c : {NoCaptures, Captures}) for (auto& to : continuationHistory[inCheck][c]) for (auto& h : to) - h->fill(-58); + h->fill(-658); for (size_t i = 1; i < reductions.size(); ++i) reductions[i] = int((18.62 + std::log(size_t(options["Threads"])) / 2) * std::log(i)); @@ -584,7 +581,6 @@ Value Search::Worker::search( assert(0 <= ss->ply && ss->ply < MAX_PLY); bestMove = Move::none(); - (ss + 1)->killer = Move::none(); (ss + 2)->cutoffCnt = 0; Square prevSq = ((ss - 1)->currentMove).is_ok() ? ((ss - 1)->currentMove).to_sq() : SQ_NONE; ss->statScore = 0; @@ -615,7 +611,7 @@ Value Search::Worker::search( { // Bonus for a quiet ttMove that fails high (~2 Elo) if (!ttCapture) - update_quiet_stats(pos, ss, *this, ttData.move, stat_bonus(depth)); + update_quiet_histories(pos, ss, *this, ttData.move, stat_bonus(depth)); // Extra penalty for early quiet moves of // the previous ply (~1 Elo on STC, ~2 Elo on LTC) @@ -1754,7 +1750,7 @@ void update_all_stats(const Position& pos, if (!pos.capture_stage(bestMove)) { - update_quiet_stats(pos, ss, workerThread, bestMove, quietMoveBonus); + update_quiet_histories(pos, ss, workerThread, bestMove, quietMoveBonus); // Decrease stats for all non-best quiet moves for (Move move : quietsSearched) @@ -1767,12 +1763,9 @@ void update_all_stats(const Position& pos, captureHistory[moved_piece][bestMove.to_sq()][captured] << quietMoveBonus; } - // Extra penalty for a quiet early move that was not a TT move or - // main killer move in previous ply when it gets refuted. - if (prevSq != SQ_NONE - && ((ss - 1)->moveCount == 1 + (ss - 1)->ttHit - || ((ss - 1)->currentMove == (ss - 1)->killer)) - && !pos.captured_piece()) + // Extra penalty for a quiet early move that was not a TT move in + // previous ply when it gets refuted. + if (prevSq != SQ_NONE && ((ss - 1)->moveCount == 1 + (ss - 1)->ttHit) && !pos.captured_piece()) update_continuation_histories(ss - 1, pos.piece_on(prevSq), prevSq, -quietMoveMalus); // Decrease stats for all non-best capture moves @@ -1802,11 +1795,6 @@ void update_continuation_histories(Stack* ss, Piece pc, Square to, int bonus) { } // Updates move sorting heuristics -void update_killer(Stack* ss, Move move) { - - // Update killers - ss->killer = move; -} void update_quiet_histories( const Position& pos, Stack* ss, Search::Worker& workerThread, Move move, int bonus) { @@ -1820,14 +1808,6 @@ void update_quiet_histories( workerThread.pawnHistory[pIndex][pos.moved_piece(move)][move.to_sq()] << bonus / 2; } -// Updates move sorting heuristics -void update_quiet_stats( - const Position& pos, Stack* ss, Search::Worker& workerThread, Move move, int bonus) { - - update_killer(ss, move); - update_quiet_histories(pos, ss, workerThread, move, bonus); -} - } // When playing with strength handicap, choose the best move among a set of diff --git a/src/search.h b/src/search.h index 4872a58a..bdb63ffd 100644 --- a/src/search.h +++ b/src/search.h @@ -66,7 +66,6 @@ struct Stack { int ply; Move currentMove; Move excludedMove; - Move killer; Value staticEval; int statScore; int moveCount; From 8e560c4fd347514a699bde1931912834047cc835 Mon Sep 17 00:00:00 2001 From: Tomasz Sobczyk Date: Thu, 25 Jul 2024 14:37:08 +0200 Subject: [PATCH 214/315] Replicate network weights only to used NUMA nodes On a system with multiple NUMA nodes, this patch avoids unneeded replicated (e.g. 8x for a single threaded run), reducting memory use in that case. Lazy initialization forced before search. Passed STC: https://tests.stockfishchess.org/tests/view/66a28c524ff211be9d4ecdd4 LLR: 2.96 (-2.94,2.94) <-1.75,0.25> Total: 691776 W: 179429 L: 179927 D: 332420 Ptnml(0-2): 2573, 79370, 182547, 78778, 2620 closes https://github.com/official-stockfish/Stockfish/pull/5515 No functional change --- src/engine.cpp | 5 +++ src/engine.h | 8 ++-- src/numa.h | 112 +++++++++++++++++++++++++++++++++++++++++++++++++ src/search.cpp | 6 +++ src/search.h | 26 ++++++------ src/thread.cpp | 7 ++++ src/thread.h | 4 ++ 7 files changed, 152 insertions(+), 16 deletions(-) diff --git a/src/engine.cpp b/src/engine.cpp index 498b7c3e..81bb260b 100644 --- a/src/engine.cpp +++ b/src/engine.cpp @@ -204,6 +204,7 @@ void Engine::set_numa_config_from_option(const std::string& o) { // Force reallocation of threads in case affinities need to change. resize_threads(); + threads.ensure_network_replicated(); } void Engine::resize_threads() { @@ -212,6 +213,7 @@ void Engine::resize_threads() { // Reallocate the hash with the new threadpool size set_tt_size(options["Hash"]); + threads.ensure_network_replicated(); } void Engine::set_tt_size(size_t mb) { @@ -234,18 +236,21 @@ void Engine::load_networks() { networks_.small.load(binaryDirectory, options["EvalFileSmall"]); }); threads.clear(); + threads.ensure_network_replicated(); } void Engine::load_big_network(const std::string& file) { networks.modify_and_replicate( [this, &file](NN::Networks& networks_) { networks_.big.load(binaryDirectory, file); }); threads.clear(); + threads.ensure_network_replicated(); } void Engine::load_small_network(const std::string& file) { networks.modify_and_replicate( [this, &file](NN::Networks& networks_) { networks_.small.load(binaryDirectory, file); }); threads.clear(); + threads.ensure_network_replicated(); } void Engine::save_network(const std::pair, std::string> files[2]) { diff --git a/src/engine.h b/src/engine.h index 127f7d7c..f3c78398 100644 --- a/src/engine.h +++ b/src/engine.h @@ -114,10 +114,10 @@ class Engine { StateListPtr states; Square capSq; - OptionsMap options; - ThreadPool threads; - TranspositionTable tt; - NumaReplicated networks; + OptionsMap options; + ThreadPool threads; + TranspositionTable tt; + LazyNumaReplicated networks; Search::SearchManager::UpdateContext updateContext; }; diff --git a/src/numa.h b/src/numa.h index 3de8281d..20d352c9 100644 --- a/src/numa.h +++ b/src/numa.h @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -1136,6 +1137,117 @@ class NumaReplicated: public NumaReplicatedBase { } }; +// We force boxing with a unique_ptr. If this becomes an issue due to added +// indirection we may need to add an option for a custom boxing type. +template +class LazyNumaReplicated: public NumaReplicatedBase { + public: + using ReplicatorFuncType = std::function; + + LazyNumaReplicated(NumaReplicationContext& ctx) : + NumaReplicatedBase(ctx) { + prepare_replicate_from(T{}); + } + + LazyNumaReplicated(NumaReplicationContext& ctx, T&& source) : + NumaReplicatedBase(ctx) { + prepare_replicate_from(std::move(source)); + } + + LazyNumaReplicated(const LazyNumaReplicated&) = delete; + LazyNumaReplicated(LazyNumaReplicated&& other) noexcept : + NumaReplicatedBase(std::move(other)), + instances(std::exchange(other.instances, {})) {} + + LazyNumaReplicated& operator=(const LazyNumaReplicated&) = delete; + LazyNumaReplicated& operator=(LazyNumaReplicated&& other) noexcept { + NumaReplicatedBase::operator=(*this, std::move(other)); + instances = std::exchange(other.instances, {}); + + return *this; + } + + LazyNumaReplicated& operator=(T&& source) { + prepare_replicate_from(std::move(source)); + + return *this; + } + + ~LazyNumaReplicated() override = default; + + const T& operator[](NumaReplicatedAccessToken token) const { + assert(token.get_numa_index() < instances.size()); + ensure_present(token.get_numa_index()); + return *(instances[token.get_numa_index()]); + } + + const T& operator*() const { return *(instances[0]); } + + const T* operator->() const { return instances[0].get(); } + + template + void modify_and_replicate(FuncT&& f) { + auto source = std::move(instances[0]); + std::forward(f)(*source); + prepare_replicate_from(std::move(*source)); + } + + void on_numa_config_changed() override { + // Use the first one as the source. It doesn't matter which one we use, + // because they all must be identical, but the first one is guaranteed to exist. + auto source = std::move(instances[0]); + prepare_replicate_from(std::move(*source)); + } + + private: + mutable std::vector> instances; + mutable std::mutex mutex; + + void ensure_present(NumaIndex idx) const { + assert(idx < instances.size()); + + if (instances[idx] != nullptr) + return; + + assert(idx != 0); + + std::unique_lock lock(mutex); + // Check again for races. + if (instances[idx] != nullptr) + return; + + const NumaConfig& cfg = get_numa_config(); + cfg.execute_on_numa_node( + idx, [this, idx]() { instances[idx] = std::make_unique(*instances[0]); }); + } + + void prepare_replicate_from(T&& source) { + instances.clear(); + + const NumaConfig& cfg = get_numa_config(); + if (cfg.requires_memory_replication()) + { + assert(cfg.num_numa_nodes() > 0); + + // We just need to make sure the first instance is there. + // Note that we cannot move here as we need to reallocate the data + // on the correct NUMA node. + cfg.execute_on_numa_node( + 0, [this, &source]() { instances.emplace_back(std::make_unique(source)); }); + + // Prepare others for lazy init. + instances.resize(cfg.num_numa_nodes()); + } + else + { + assert(cfg.num_numa_nodes() == 1); + // We take advantage of the fact that replication is not required + // and reuse the source value, avoiding one copy operation. + instances.emplace_back(std::make_unique(std::move(source))); + } + } +}; + class NumaReplicationContext { public: NumaReplicationContext(NumaConfig&& cfg) : diff --git a/src/search.cpp b/src/search.cpp index f20bd4c9..beafd87d 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -127,6 +127,12 @@ Search::Worker::Worker(SharedState& sharedState, clear(); } +void Search::Worker::ensure_network_replicated() { + // Access once to force lazy initialization. + // We do this because we want to avoid initialization during search. + (void) (networks[numaAccessToken]); +} + void Search::Worker::start_searching() { // Non-main threads go directly to iterative_deepening() diff --git a/src/search.h b/src/search.h index bdb63ffd..0f635186 100644 --- a/src/search.h +++ b/src/search.h @@ -131,19 +131,19 @@ struct LimitsType { // The UCI stores the uci options, thread pool, and transposition table. // This struct is used to easily forward data to the Search::Worker class. struct SharedState { - SharedState(const OptionsMap& optionsMap, - ThreadPool& threadPool, - TranspositionTable& transpositionTable, - const NumaReplicated& nets) : + SharedState(const OptionsMap& optionsMap, + ThreadPool& threadPool, + TranspositionTable& transpositionTable, + const LazyNumaReplicated& nets) : options(optionsMap), threads(threadPool), tt(transpositionTable), networks(nets) {} - const OptionsMap& options; - ThreadPool& threads; - TranspositionTable& tt; - const NumaReplicated& networks; + const OptionsMap& options; + ThreadPool& threads; + TranspositionTable& tt; + const LazyNumaReplicated& networks; }; class Worker; @@ -274,6 +274,8 @@ class Worker { bool is_mainthread() const { return threadIdx == 0; } + void ensure_network_replicated(); + // Public because they need to be updatable by the stats ButterflyHistory mainHistory; CapturePieceToHistory captureHistory; @@ -328,10 +330,10 @@ class Worker { Tablebases::Config tbConfig; - const OptionsMap& options; - ThreadPool& threads; - TranspositionTable& tt; - const NumaReplicated& networks; + const OptionsMap& options; + ThreadPool& threads; + TranspositionTable& tt; + const LazyNumaReplicated& networks; // Used by NNUE Eval::NNUE::AccumulatorCaches refreshTable; diff --git a/src/thread.cpp b/src/thread.cpp index f17fc4a5..b5d51594 100644 --- a/src/thread.cpp +++ b/src/thread.cpp @@ -102,6 +102,8 @@ void Thread::run_custom_job(std::function f) { cv.notify_one(); } +void Thread::ensure_network_replicated() { worker->ensure_network_replicated(); } + // Thread gets parked here, blocked on the condition variable // when the thread has no work to do. @@ -400,4 +402,9 @@ std::vector ThreadPool::get_bound_thread_count_by_numa_node() const { return counts; } +void ThreadPool::ensure_network_replicated() { + for (auto&& th : threads) + th->ensure_network_replicated(); +} + } // namespace Stockfish diff --git a/src/thread.h b/src/thread.h index 81ca39bb..43e2e142 100644 --- a/src/thread.h +++ b/src/thread.h @@ -83,6 +83,8 @@ class Thread { void clear_worker(); void run_custom_job(std::function f); + void ensure_network_replicated(); + // Thread has been slightly altered to allow running custom jobs, so // this name is no longer correct. However, this class (and ThreadPool) // require further work to make them properly generic while maintaining @@ -146,6 +148,8 @@ class ThreadPool { std::vector get_bound_thread_count_by_numa_node() const; + void ensure_network_replicated(); + std::atomic_bool stop, abortedSearch, increaseDepth; auto cbegin() const noexcept { return threads.cbegin(); } From b976f0a101f80d8b80aa212e92d1cc04b12c6136 Mon Sep 17 00:00:00 2001 From: MinetaS Date: Mon, 29 Jul 2024 12:10:34 +0900 Subject: [PATCH 215/315] Move DotProd code into optimized affine layer This patch moves the DotProd code into the propagation function which has sequential access optimization. To prove the speedup, the comparison is done without the sparse layer. With the sparse layer the effect is marginal (GCC 0.3%, LLVM/Clang 0.1%). For both tests, binary is compiled with GCC 14.1. Each test had 50 runs. Sparse layer included: ``` speedup = +0.0030 P(speedup > 0) = 1.0000 ``` Sparse layer excluded: ``` speedup = +0.0561 P(speedup > 0) = 1.0000 ``` closes https://github.com/official-stockfish/Stockfish/pull/5520 No functional change --- src/nnue/layers/affine_transform.h | 58 +++++++++++++++--------------- 1 file changed, 28 insertions(+), 30 deletions(-) diff --git a/src/nnue/layers/affine_transform.h b/src/nnue/layers/affine_transform.h index ad9167c0..59a6149f 100644 --- a/src/nnue/layers/affine_transform.h +++ b/src/nnue/layers/affine_transform.h @@ -39,25 +39,26 @@ namespace Stockfish::Eval::NNUE::Layers { +#if defined(USE_SSSE3) || defined(USE_NEON_DOTPROD) + #define ENABLE_SEQ_OPT +#endif + // Fallback implementation for older/other architectures. // Requires the input to be padded to at least 16 values. -#if !defined(USE_SSSE3) +#ifndef ENABLE_SEQ_OPT + template static void affine_transform_non_ssse3(std::int32_t* output, const std::int8_t* weights, const std::int32_t* biases, const std::uint8_t* input) { - #if defined(USE_SSE2) || defined(USE_NEON_DOTPROD) || defined(USE_NEON) + #if defined(USE_SSE2) || defined(USE_NEON) #if defined(USE_SSE2) // At least a multiple of 16, with SSE2. constexpr IndexType NumChunks = ceil_to_multiple(InputDimensions, 16) / 16; const __m128i Zeros = _mm_setzero_si128(); const auto inputVector = reinterpret_cast(input); - #elif defined(USE_NEON_DOTPROD) - constexpr IndexType NumChunks = ceil_to_multiple(InputDimensions, 16) / 16; - const auto inputVector = reinterpret_cast(input); - #elif defined(USE_NEON) constexpr IndexType NumChunks = ceil_to_multiple(InputDimensions, 16) / 16; const auto inputVector = reinterpret_cast(input); @@ -91,16 +92,8 @@ static void affine_transform_non_ssse3(std::int32_t* output, sum = _mm_add_epi32(sum, sum_second_32); output[i] = _mm_cvtsi128_si32(sum); - #elif defined(USE_NEON_DOTPROD) - int32x4_t sum = {biases[i]}; - const auto row = reinterpret_cast(&weights[offset]); - for (IndexType j = 0; j < NumChunks; ++j) - { - sum = vdotq_s32(sum, inputVector[j], row[j]); - } - output[i] = vaddvq_s32(sum); - #elif defined(USE_NEON) + int32x4_t sum = {biases[i]}; const auto row = reinterpret_cast(&weights[offset]); for (IndexType j = 0; j < NumChunks; ++j) @@ -127,7 +120,8 @@ static void affine_transform_non_ssse3(std::int32_t* output, } #endif } -#endif + +#endif // !ENABLE_SEQ_OPT template class AffineTransform { @@ -162,7 +156,7 @@ class AffineTransform { } static constexpr IndexType get_weight_index(IndexType i) { -#if defined(USE_SSSE3) +#ifdef ENABLE_SEQ_OPT return get_weight_index_scrambled(i); #else return i; @@ -190,29 +184,28 @@ class AffineTransform { // Forward propagation void propagate(const InputType* input, OutputType* output) const { -#if defined(USE_SSSE3) +#ifdef ENABLE_SEQ_OPT if constexpr (OutputDimensions > 1) { - #if defined(USE_AVX512) using vec_t = __m512i; - #define vec_setzero _mm512_setzero_si512 #define vec_set_32 _mm512_set1_epi32 #define vec_add_dpbusd_32 Simd::m512_add_dpbusd_epi32 - #define vec_hadd Simd::m512_hadd #elif defined(USE_AVX2) using vec_t = __m256i; - #define vec_setzero _mm256_setzero_si256 #define vec_set_32 _mm256_set1_epi32 #define vec_add_dpbusd_32 Simd::m256_add_dpbusd_epi32 - #define vec_hadd Simd::m256_hadd #elif defined(USE_SSSE3) using vec_t = __m128i; - #define vec_setzero _mm_setzero_si128 #define vec_set_32 _mm_set1_epi32 #define vec_add_dpbusd_32 Simd::m128_add_dpbusd_epi32 - #define vec_hadd Simd::m128_hadd + #elif defined(USE_NEON_DOTPROD) + using vec_t = int32x4_t; + #define vec_set_32 vdupq_n_s32 + #define vec_add_dpbusd_32(acc, a, b) \ + Simd::dotprod_m128_add_dpbusd_epi32(acc, vreinterpretq_s8_s32(a), \ + vreinterpretq_s8_s32(b)) #endif static constexpr IndexType OutputSimdWidth = sizeof(vec_t) / sizeof(OutputType); @@ -242,28 +235,33 @@ class AffineTransform { for (IndexType k = 0; k < NumRegs; ++k) outptr[k] = acc[k]; - #undef vec_setzero #undef vec_set_32 #undef vec_add_dpbusd_32 - #undef vec_hadd } else if constexpr (OutputDimensions == 1) { - // We cannot use AVX512 for the last layer because there are only 32 inputs // and the buffer is not padded to 64 elements. #if defined(USE_AVX2) using vec_t = __m256i; - #define vec_setzero _mm256_setzero_si256 + #define vec_setzero() _mm256_setzero_si256() #define vec_set_32 _mm256_set1_epi32 #define vec_add_dpbusd_32 Simd::m256_add_dpbusd_epi32 #define vec_hadd Simd::m256_hadd #elif defined(USE_SSSE3) using vec_t = __m128i; - #define vec_setzero _mm_setzero_si128 + #define vec_setzero() _mm_setzero_si128() #define vec_set_32 _mm_set1_epi32 #define vec_add_dpbusd_32 Simd::m128_add_dpbusd_epi32 #define vec_hadd Simd::m128_hadd + #elif defined(USE_NEON_DOTPROD) + using vec_t = int32x4_t; + #define vec_setzero() vdupq_n_s32(0) + #define vec_set_32 vdupq_n_s32 + #define vec_add_dpbusd_32(acc, a, b) \ + Simd::dotprod_m128_add_dpbusd_epi32(acc, vreinterpretq_s8_s32(a), \ + vreinterpretq_s8_s32(b)) + #define vec_hadd Simd::neon_m128_hadd #endif const auto inputVector = reinterpret_cast(input); From ae9e55cf530081afea34216b86b6eb5d9b2b5661 Mon Sep 17 00:00:00 2001 From: Shawn Xu Date: Mon, 29 Jul 2024 00:04:03 -0700 Subject: [PATCH 216/315] Simplify Cutnode Reduction Passed Non-regression STC: LR: 2.96 (-2.94,2.94) <-1.75,0.25> Total: 143968 W: 37439 L: 37333 D: 69196 Ptnml(0-2): 521, 17228, 36456, 17182, 597 https://tests.stockfishchess.org/tests/view/66a73f9f4ff211be9d4ed27f Passed Non-regression LTC: LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 198954 W: 50384 L: 50345 D: 98225 Ptnml(0-2): 201, 22360, 54347, 22337, 232 https://tests.stockfishchess.org/tests/view/66a906e94ff211be9d4ed423 closes https://github.com/official-stockfish/Stockfish/pull/5526 bench 1277466 --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index beafd87d..5f87f28f 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1133,7 +1133,7 @@ moves_loop: // When in check, search starts here // Increase reduction for cut nodes (~4 Elo) if (cutNode) - r += 2 - (ttData.depth >= depth && ss->ttPv) + (!ss->ttPv && move != ttData.move); + r += 2 - (ttData.depth >= depth && ss->ttPv) + !ss->ttPv; // Increase reduction if ttMove is a capture (~3 Elo) if (ttCapture) From d626af5c3a3781a8f63e1b9b7104ec69aaa4c726 Mon Sep 17 00:00:00 2001 From: Disservin Date: Sat, 17 Aug 2024 22:07:42 +0200 Subject: [PATCH 217/315] Fix failing CI for MacOS 13 GCC 11 closes https://github.com/official-stockfish/Stockfish/pull/5540 No functional change --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 836555e6..8d209a4f 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -148,7 +148,7 @@ jobs: - name: Download required macOS packages if: runner.os == 'macOS' - run: brew install coreutils + run: brew install coreutils gcc@11 - name: Setup msys and install required packages if: runner.os == 'Windows' From bc80ece6c78cafb3a89d3abcec6c71a517c29f2d Mon Sep 17 00:00:00 2001 From: Shawn Xu Date: Tue, 30 Jul 2024 01:33:56 -0700 Subject: [PATCH 218/315] Improve Comments for Pairwise Multiplication Optimization closes https://github.com/official-stockfish/Stockfish/pull/5524 no functional change --- src/nnue/nnue_feature_transformer.h | 78 ++++++++++++++++++++++------- 1 file changed, 60 insertions(+), 18 deletions(-) diff --git a/src/nnue/nnue_feature_transformer.h b/src/nnue/nnue_feature_transformer.h index ad0fb1b4..2f74dcae 100644 --- a/src/nnue/nnue_feature_transformer.h +++ b/src/nnue/nnue_feature_transformer.h @@ -352,26 +352,68 @@ class FeatureTransformer { reinterpret_cast(&(accumulation[perspectives[p]][HalfDimensions / 2])); vec_t* out = reinterpret_cast(output + offset); + // Per the NNUE architecture, here we want to multiply pairs of + // clipped elements and divide the product by 128. To do this, + // we can naively perform min/max operation to clip each of the + // four int16 vectors, mullo pairs together, then pack them into + // one int8 vector. However, there exists a faster way. + + // The idea here is to use the implicit clipping from packus to + // save us two vec_max_16 instructions. This clipping works due + // to the fact that any int16 integer below zero will be zeroed + // on packus. + + // Consider the case where the second element is negative. + // If we do standard clipping, that element will be zero, which + // means our pairwise product is zero. If we perform packus and + // remove the lower-side clip for the second element, then our + // product before packus will be negative, and is zeroed on pack. + // The two operation produce equivalent results, but the second + // one (using packus) saves one max operation per pair. + + // But here we run into a problem: mullo does not preserve the + // sign of the multiplication. We can get around this by doing + // mulhi, which keeps the sign. But that requires an additional + // tweak. + + // mulhi cuts off the last 16 bits of the resulting product, + // which is the same as performing a rightward shift of 16 bits. + // We can use this to our advantage. Recall that we want to + // divide the final product by 128, which is equivalent to a + // 7-bit right shift. Intuitively, if we shift the clipped + // value left by 9, and perform mulhi, which shifts the product + // right by 16 bits, then we will net a right shift of 7 bits. + // However, this won't work as intended. Since we clip the + // values to have a maximum value of 127, shifting it by 9 bits + // might occupy the signed bit, resulting in some positive + // values being interpreted as negative after the shift. + + // There is a way, however, to get around this limitation. When + // loading the network, scale accumulator weights and biases by + // 2. To get the same pairwise multiplication result as before, + // we need to divide the product by 128 * 2 * 2 = 512, which + // amounts to a right shift of 9 bits. So now we only have to + // shift left by 7 bits, perform mulhi (shifts right by 16 bits) + // and net a 9 bit right shift. Since we scaled everything by + // two, the values are clipped at 127 * 2 = 254, which occupies + // 8 bits. Shifting it by 7 bits left will no longer occupy the + // signed bit, so we are safe. + + // Note that on NEON processors, we shift left by 6 instead + // because the instruction "vqdmulhq_s16" also doubles the + // return value after the multiplication, adding an extra shift + // to the left by 1, so we compensate by shifting less before + // the multiplication. + + constexpr int shift = + #if defined(USE_SSE2) + 7; + #else + 6; + #endif + for (IndexType j = 0; j < NumOutputChunks; ++j) { - // What we want to do is multiply inputs in a pairwise manner - // (after clipping), and then shift right by 9. Instead, we - // shift left by 7, and use mulhi, stripping the bottom 16 bits, - // effectively shifting right by 16, resulting in a net shift - // of 9 bits. We use mulhi because it maintains the sign of - // the multiplication (unlike mullo), allowing us to make use - // of packus to clip 2 of the inputs, resulting in a save of 2 - // "vec_max_16" calls. A special case is when we use NEON, - // where we shift left by 6 instead, because the instruction - // "vqdmulhq_s16" also doubles the return value after the - // multiplication, adding an extra shift to the left by 1, so - // we compensate by shifting less before the multiplication. - - #if defined(USE_SSE2) - constexpr int shift = 7; - #else - constexpr int shift = 6; - #endif const vec_t sum0a = vec_slli_16(vec_max_16(vec_min_16(in0[j * 2 + 0], One), Zero), shift); const vec_t sum0b = From a75717ede14df4526a0990466e7b10d00e89c9ff Mon Sep 17 00:00:00 2001 From: Shawn Xu Date: Fri, 2 Aug 2024 13:23:44 -0700 Subject: [PATCH 219/315] Simplify Post-LMR Continuation History Updates Passed Non-regression STC: LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 55520 W: 14625 L: 14420 D: 26475 Ptnml(0-2): 247, 6522, 14007, 6747, 237 https://tests.stockfishchess.org/tests/view/66ad40874ff211be9d4ed8f7 Passed Non-regression LTC: LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 216168 W: 54561 L: 54540 D: 107067 Ptnml(0-2): 196, 24212, 59244, 24239, 193 https://tests.stockfishchess.org/tests/view/66aeac954ff211be9d4eda03 closes https://github.com/official-stockfish/Stockfish/pull/5530 bench 1418263 --- src/search.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 5f87f28f..3c7fc253 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1181,9 +1181,7 @@ moves_loop: // When in check, search starts here value = -search(pos, ss + 1, -(alpha + 1), -alpha, newDepth, !cutNode); // Post LMR continuation history updates (~1 Elo) - int bonus = value <= alpha ? -stat_malus(newDepth) - : value >= beta ? stat_bonus(newDepth) - : 0; + int bonus = value >= beta ? stat_bonus(newDepth) : -stat_malus(newDepth); update_continuation_histories(ss, movedPiece, move.to_sq(), bonus); } From 4995792a6c1dfca13e3fafc8e55577854b4de1dd Mon Sep 17 00:00:00 2001 From: FauziAkram Date: Sat, 10 Aug 2024 13:06:28 +0300 Subject: [PATCH 220/315] Simplify cutnode reduction formula Passed STC: LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 137994 W: 34705 L: 34603 D: 68686 Ptnml(0-2): 124, 15371, 37903, 15477, 122 https://tests.stockfishchess.org/tests/view/66aeb74b4ff211be9d4eda10 Passed LTC: LLR: 2.93 (-2.94,2.94) <-1.75,0.25> Total: 131456 W: 34148 L: 34031 D: 63277 Ptnml(0-2): 506, 15571, 33465, 15672, 514 https://tests.stockfishchess.org/tests/view/66ae258b4ff211be9d4ed95d closes https://github.com/official-stockfish/Stockfish/pull/5531 Bench: 1261995 --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index 3c7fc253..35f203b9 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1133,7 +1133,7 @@ moves_loop: // When in check, search starts here // Increase reduction for cut nodes (~4 Elo) if (cutNode) - r += 2 - (ttData.depth >= depth && ss->ttPv) + !ss->ttPv; + r += 2 - (ttData.depth >= depth && ss->ttPv); // Increase reduction if ttMove is a capture (~3 Elo) if (ttCapture) From 5d81071953bd304e57613140b694b03a8241eac9 Mon Sep 17 00:00:00 2001 From: Linmiao Xu Date: Thu, 8 Aug 2024 01:33:42 -0400 Subject: [PATCH 221/315] Update default main net to nn-1111cefa1111.nnue Created from 2 distinct spsa tunes of the latest main net (nn-31337bea577c.nnue) and applying the params to the prior main net (nn-e8bac1c07a5a.nnue). This effectively reverts the modifications to output weights and biases in https://github.com/official-stockfish/Stockfish/pull/5509 SPSA: A: 6000, alpha: 0.602, gamma: 0.101 1st - 437 feature transformer biases where values are < 25 54k / 120k games at 180+1.8 https://tests.stockfishchess.org/tests/view/66af98ac4ff211be9d4edad0 nn-808259761cca.nnue 2nd - 208 L2 weights where values are zero 112k / 120k games at 180+1.8 https://tests.stockfishchess.org/tests/view/66b0c3074ff211be9d4edbe5 nn-a56cb8c3d477.nnue When creating the above 2 nets (nn-808259761cca.nnue, nn-a56cb8c3d477.nnue), spsa params were unintentionally applied to nn-e8bac1c07a5a.nnue rather than nn-31337bea577c.nnue due to an issue in a script that creates nets by applying spsa results to base nets. Since they both passed STC and were neutral or slightly positive at LTC, they were combined to see if the elo from each set of params was additive. The 2 nets can be merged on top of nn-e8bac1c07a5a.nnue with: https://github.com/linrock/nnue-tools/blob/90942d3/spsa/combine_nnue.py ``` python3 combine_nnue.py \ nn-e8bac1c07a5a.nnue \ nn-808259761cca.nnue \ nn-a56cb8c3d477.nnue ``` Merging yields nn-87caa003fc6a.nnue which was renamed to nn-1111cefa1111.nnue with an updated nnue-namer around 10x faster than before by: - using a prefix trie for efficient prefix matches - modifying 4 non-functional bytes near the end of the file instead of 2 https://github.com/linrock/nnue-namer Thanks to @MinetaS for pointing out in #nnue-dev what the non-functional bytes are: L3 is 32, 4 bytes for biases, 32 bytes for weights. (fc_2) So -38 and -37 are technically -2 and -1 of fc_1 (type AffineTransform<30, 32>) And since InputDimension is padded to 32 there are total 32 of 2 adjacent bytes padding. So yes, it's non-functional whatever values are there. It's possible to tweak bytes at -38 - 32 * N and -37 - 32 * N given N = 0 ... 31 The net renamed with the new method passed non-regression STC vs. the original net: https://tests.stockfishchess.org/tests/view/66c0f0a821503a509c13b332 To print the spsa params with nnue-pytorch: ``` import features from serialize import NNUEReader feature_set = features.get_feature_set_from_name("HalfKAv2_hm") with open("nn-31337bea577c.nnue", "rb") as f: model = NNUEReader(f, feature_set).model c_end = 16 for i,ft_bias in enumerate(model.input.bias.data[:3072]): value = int(ft_bias * 254) if abs(value) < 25: print(f"ftB[{i}],{value},-1024,1024,{c_end},0.0020") c_end = 6 for i in range(8): for j in range(32): for k in range(30): value = int(model.layer_stacks.l2.weight.data[32 * i + j, k] * 64) if value == 0: print(f"twoW[{i}][{j}][{k}],{value},-127,127,{c_end},0.0020") ``` New params found with the same method as: https://github.com/official-stockfish/Stockfish/pull/5459 Passed STC: https://tests.stockfishchess.org/tests/view/66b4d4464ff211be9d4edf6e LLR: 2.94 (-2.94,2.94) <0.00,2.00> Total: 136416 W: 35753 L: 35283 D: 65380 Ptnml(0-2): 510, 16159, 34416, 16597, 526 Passed LTC: https://tests.stockfishchess.org/tests/view/66b76e814ff211be9d4ee1cc LLR: 2.95 (-2.94,2.94) <0.50,2.50> Total: 159336 W: 40753 L: 40178 D: 78405 Ptnml(0-2): 126, 17497, 43864, 18038, 143 closes https://github.com/official-stockfish/Stockfish/pull/5534 bench 1613043 --- src/evaluate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/evaluate.h b/src/evaluate.h index 55838243..c9041efb 100644 --- a/src/evaluate.h +++ b/src/evaluate.h @@ -33,7 +33,7 @@ namespace Eval { // for the build process (profile-build and fishtest) to work. Do not change the // name of the macro or the location where this macro is defined, as it is used // in the Makefile/Fishtest. -#define EvalFileDefaultNameBig "nn-31337bea577c.nnue" +#define EvalFileDefaultNameBig "nn-1111cefa1111.nnue" #define EvalFileDefaultNameSmall "nn-37f18f62d772.nnue" namespace NNUE { From 175021721c6042896f2b35beb251edcf107d9dc2 Mon Sep 17 00:00:00 2001 From: Shawn Xu Date: Tue, 13 Aug 2024 19:02:02 -0700 Subject: [PATCH 222/315] Simplify bestMove promotion Passed Non-regression STC: LLR: 2.95 (-2.94,2.94) <-1.75,0.25> Total: 216768 W: 56240 L: 56217 D: 104311 Ptnml(0-2): 794, 24900, 56956, 24957, 777 https://tests.stockfishchess.org/tests/view/66bc11324ff211be9d4ee78b Passed Non-regression LTC: LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 44970 W: 11391 L: 11199 D: 22380 Ptnml(0-2): 44, 4596, 13002, 4810, 33 https://tests.stockfishchess.org/tests/view/66bdbb1b4ff211be9d4eec5a closes https://github.com/official-stockfish/Stockfish/pull/5535 bench: 1613043 --- src/search.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 35f203b9..ec8a9dd2 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1274,9 +1274,9 @@ moves_loop: // When in check, search starts here // In case we have an alternative move equal in eval to the current bestmove, // promote it to bestmove by pretending it just exceeds alpha (but not beta). - int inc = (value == bestValue && (int(nodes) & 15) == 0 - && ss->ply + 2 + ss->ply / 32 >= thisThread->rootDepth - && std::abs(value) + 1 < VALUE_TB_WIN_IN_MAX_PLY); + int inc = + (value == bestValue && (int(nodes) & 15) == 0 && ss->ply + 2 >= thisThread->rootDepth + && std::abs(value) + 1 < VALUE_TB_WIN_IN_MAX_PLY); if (value + inc > bestValue) { From 87814d2fb869166f1bdbcb23893aca57729602fe Mon Sep 17 00:00:00 2001 From: Shawn Xu Date: Tue, 13 Aug 2024 19:07:08 -0700 Subject: [PATCH 223/315] Simplify doShallowerSearch Passed Non-regression STC: LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 242336 W: 62657 L: 62663 D: 117016 Ptnml(0-2): 941, 28949, 61418, 28895, 965 https://tests.stockfishchess.org/tests/view/66bc13c34ff211be9d4ee794 Passed Non-regression LTC: LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 128100 W: 32503 L: 32390 D: 63207 Ptnml(0-2): 106, 14319, 35113, 14380, 132 https://tests.stockfishchess.org/tests/view/66bdbb304ff211be9d4eec5d closes https://github.com/official-stockfish/Stockfish/pull/5537 bench 1586246 --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index ec8a9dd2..34190596 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1173,7 +1173,7 @@ moves_loop: // When in check, search starts here // Adjust full-depth search based on LMR results - if the result was // good enough search deeper, if it was bad enough search shallower. const bool doDeeperSearch = value > (bestValue + 35 + 2 * newDepth); // (~1 Elo) - const bool doShallowerSearch = value < bestValue + newDepth; // (~2 Elo) + const bool doShallowerSearch = value < bestValue + 8; // (~2 Elo) newDepth += doDeeperSearch - doShallowerSearch; From 6cf7f300acc88df277da64b754c436462f48dadf Mon Sep 17 00:00:00 2001 From: Nonlinear2 <131959792+Nonlinear2@users.noreply.github.com> Date: Fri, 16 Aug 2024 22:54:05 +0200 Subject: [PATCH 224/315] Simplify stand pat adjustement Remove && !PvNode condition for stand pat adjustement in quiescence search. Passed non-regression STC: LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 108544 W: 28228 L: 28085 D: 52231 Ptnml(0-2): 389, 12902, 27554, 13031, 396 https://tests.stockfishchess.org/tests/view/66bb402e4ff211be9d4ee688 Passed non-regression LTC: LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 193014 W: 48796 L: 48751 D: 95467 Ptnml(0-2): 188, 21481, 53116, 21542, 180 https://tests.stockfishchess.org/tests/view/66bc78774ff211be9d4ee88f closes https://github.com/official-stockfish/Stockfish/pull/5538 Bench 1787360 --- AUTHORS | 1 + src/search.cpp | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/AUTHORS b/AUTHORS index 1ac40d87..3201e7a8 100644 --- a/AUTHORS +++ b/AUTHORS @@ -171,6 +171,7 @@ Niklas Fiekas (niklasf) Nikolay Kostov (NikolayIT) Norman Schmidt (FireFather) notruck +Nour Berakdar (Nonlinear) Ofek Shochat (OfekShochat, ghostway) Ondrej Mosnáček (WOnder93) Ondřej Mišina (AndrovT) diff --git a/src/search.cpp b/src/search.cpp index 34190596..b062aa46 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1502,7 +1502,7 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta) // Stand pat. Return immediately if static value is at least beta if (bestValue >= beta) { - if (std::abs(bestValue) < VALUE_TB_WIN_IN_MAX_PLY && !PvNode) + if (std::abs(bestValue) < VALUE_TB_WIN_IN_MAX_PLY) bestValue = (3 * bestValue + beta) / 4; if (!ss->ttHit) ttWriter.write(posKey, value_to_tt(bestValue, ss->ply), false, BOUND_LOWER, From d275bf9643768cbf6472977f4262220e6c1c1bb5 Mon Sep 17 00:00:00 2001 From: Shawn Xu Date: Thu, 15 Aug 2024 16:24:56 -0600 Subject: [PATCH 225/315] Introduce Fail Low History Bonus When a node fails low, give TT move a small bonus 1/4 of normal value. Passed STC: LLR: 2.95 (-2.94,2.94) <0.00,2.00> Total: 92384 W: 24094 L: 23691 D: 44599 Ptnml(0-2): 323, 10852, 23465, 11203, 349 https://tests.stockfishchess.org/tests/view/66be80794ff211be9d4eed68 Passed LTC: LLR: 2.94 (-2.94,2.94) <0.50,2.50> Total: 114660 W: 29260 L: 28778 D: 56622 Ptnml(0-2): 97, 12506, 31653, 12966, 108 https://tests.stockfishchess.org/tests/view/66bf63ee4ff211be9d4eeef0 closes https://github.com/official-stockfish/Stockfish/pull/5539 bench 1463003 --- src/search.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/search.cpp b/src/search.cpp index b062aa46..c94d3c42 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1361,6 +1361,10 @@ moves_loop: // When in check, search starts here << stat_bonus(depth) * bonus / 25; } + // Bonus when search fails low and there is a TT move + else if (moveCount > 1 && ttData.move && (cutNode || PvNode)) + thisThread->mainHistory[us][ttData.move.from_to()] << stat_bonus(depth) / 4; + if (PvNode) bestValue = std::min(bestValue, maxValue); From 9fb58328e363d84e3cf720b018e639b139ba95c2 Mon Sep 17 00:00:00 2001 From: Taras Vuk <117687515+TarasVuk@users.noreply.github.com> Date: Sun, 18 Aug 2024 16:11:06 +0200 Subject: [PATCH 226/315] Tweak late move extensions Allow late move extensions only for PV and cut nodes. Passed STC: LLR: 2.95 (-2.94,2.94) <0.00,2.00> Total: 44512 W: 11688 L: 11355 D: 21469 Ptnml(0-2): 167, 5180, 11229, 5513, 167 https://tests.stockfishchess.org/tests/view/66c0509d4ff211be9d4ef10e Passed LTC: LLR: 2.94 (-2.94,2.94) <0.50,2.50> Total: 152970 W: 39026 L: 38466 D: 75478 Ptnml(0-2): 102, 16792, 42164, 17298, 129 https://tests.stockfishchess.org/tests/view/66c0994d21503a509c13b2b6 closes https://github.com/official-stockfish/Stockfish/pull/5541 bench: 1484730 --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index c94d3c42..531fc42f 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1163,7 +1163,7 @@ moves_loop: // When in check, search starts here // beyond the first move depth. // To prevent problems when the max value is less than the min value, // std::clamp has been replaced by a more robust implementation. - Depth d = std::max(1, std::min(newDepth - r, newDepth + 1)); + Depth d = std::max(1, std::min(newDepth - r, newDepth + (PvNode || cutNode))); value = -search(pos, ss + 1, -(alpha + 1), -alpha, d, true); From a0597b1281f22dc90dbcc2f52f4a1a0e2bc09f96 Mon Sep 17 00:00:00 2001 From: Tomasz Sobczyk Date: Mon, 26 Aug 2024 15:22:22 +0200 Subject: [PATCH 227/315] Forcibly split NUMA nodes on Windows split by processor groups due to Window's thread scheduler issues. fixes #5551 closes https://github.com/official-stockfish/Stockfish/pull/5552 No functional change --- src/numa.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/numa.h b/src/numa.h index 20d352c9..db835922 100644 --- a/src/numa.h +++ b/src/numa.h @@ -582,7 +582,21 @@ class NumaConfig { // still no way to set thread affinity spanning multiple processor groups. // See https://learn.microsoft.com/en-us/windows/win32/procthread/numa-support // We also do this is if need to force old API for some reason. - if (STARTUP_USE_OLD_AFFINITY_API) + // + // 2024-08-26: It appears that we need to actually always force this behaviour. + // While Windows allows this to work now, such assignments have bad interaction + // with the scheduler - in particular it still prefers scheduling on the thread's + // "primary" node, even if it means scheduling SMT processors first. + // See https://github.com/official-stockfish/Stockfish/issues/5551 + // See https://learn.microsoft.com/en-us/windows/win32/procthread/processor-groups + // + // Each process is assigned a primary group at creation, and by default all + // of its threads' primary group is the same. Each thread's ideal processor + // is in the thread's primary group, so threads will preferentially be + // scheduled to processors on their primary group, but they are able to + // be scheduled to processors on any other group. + // + // used to be guarded by if (STARTUP_USE_OLD_AFFINITY_API) { NumaConfig splitCfg = empty(); From 54def6f7eb7c411cba9c1e31ff4074757f64e826 Mon Sep 17 00:00:00 2001 From: Taras Vuk <117687515+TarasVuk@users.noreply.github.com> Date: Fri, 23 Aug 2024 08:37:52 +0200 Subject: [PATCH 228/315] rename !(PvNode || cutNode) to allNode Passed STC: LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 108992 W: 28178 L: 28039 D: 52775 Ptnml(0-2): 356, 12428, 28762, 12621, 329 https://tests.stockfishchess.org/tests/view/66c73a51bf8c9d8780fda532 closes https://github.com/official-stockfish/Stockfish/pull/5549 No functional change --- src/search.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 531fc42f..0b6756a7 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -509,6 +509,7 @@ Value Search::Worker::search( constexpr bool PvNode = nodeType != NonPV; constexpr bool rootNode = nodeType == Root; + const bool allNode = !(PvNode || cutNode); // Dive into quiescence search when the depth reaches zero if (depth <= 0) @@ -1141,7 +1142,7 @@ moves_loop: // When in check, search starts here // Increase reduction if next ply has a lot of fail high (~5 Elo) if ((ss + 1)->cutoffCnt > 3) - r += 1 + !(PvNode || cutNode); + r += 1 + allNode; // For first picked move (ttMove) reduce reduction, but never allow // reduction to go below 0 (~3 Elo) @@ -1163,7 +1164,7 @@ moves_loop: // When in check, search starts here // beyond the first move depth. // To prevent problems when the max value is less than the min value, // std::clamp has been replaced by a more robust implementation. - Depth d = std::max(1, std::min(newDepth - r, newDepth + (PvNode || cutNode))); + Depth d = std::max(1, std::min(newDepth - r, newDepth + !allNode)); value = -search(pos, ss + 1, -(alpha + 1), -alpha, d, true); @@ -1341,7 +1342,7 @@ moves_loop: // When in check, search starts here // Bonus for prior countermove that caused the fail low else if (!priorCapture && prevSq != SQ_NONE) { - int bonus = (122 * (depth > 5) + 39 * (PvNode || cutNode) + 165 * ((ss - 1)->moveCount > 8) + int bonus = (122 * (depth > 5) + 39 * !allNode + 165 * ((ss - 1)->moveCount > 8) + 107 * (!ss->inCheck && bestValue <= ss->staticEval - 98) + 134 * (!(ss - 1)->inCheck && bestValue <= -(ss - 1)->staticEval - 91)); @@ -1362,7 +1363,7 @@ moves_loop: // When in check, search starts here } // Bonus when search fails low and there is a TT move - else if (moveCount > 1 && ttData.move && (cutNode || PvNode)) + else if (moveCount > 1 && ttData.move && !allNode) thisThread->mainHistory[us][ttData.move.from_to()] << stat_bonus(depth) / 4; if (PvNode) From 451044202a49fbbbe908b49fab323d70fab333e7 Mon Sep 17 00:00:00 2001 From: FauziAkram Date: Thu, 22 Aug 2024 18:27:16 +0300 Subject: [PATCH 229/315] Simpler formula for ss->cutoffCnt update closes https://github.com/official-stockfish/Stockfish/pull/5548 No functional change --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index 0b6756a7..ad2c35e7 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1292,7 +1292,7 @@ moves_loop: // When in check, search starts here if (value >= beta) { - ss->cutoffCnt += 1 + !ttData.move - (extension >= 2); + ss->cutoffCnt += !ttData.move + (extension < 2); assert(value >= beta); // Fail high break; } From ab00c24c7e547a06e3277fc5ae7f66980532224c Mon Sep 17 00:00:00 2001 From: Joost VandeVondele Date: Sat, 31 Aug 2024 15:42:29 +0200 Subject: [PATCH 230/315] Fix some of the tests due to https://github.com/official-stockfish/Stockfish/issues/5185 some CI tests are skipped. This patch fixes a few tests that need updating. closes https://github.com/official-stockfish/Stockfish/pull/5560 No functional change --- tests/instrumented.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/instrumented.sh b/tests/instrumented.sh index cb5a3a9f..5fc6ca9a 100755 --- a/tests/instrumented.sh +++ b/tests/instrumented.sh @@ -177,25 +177,25 @@ cat << EOF > game.exp send "ucinewgame\n" send "position fen 8/5R2/2K1P3/4k3/8/b1PPpp1B/5p2/8 w - -\n" - send "go depth 18\n" + send "go depth 18 searchmoves c6d7\n" expect "score mate 2 * pv c6d7 * f7f5" expect "bestmove c6d7" send "ucinewgame\n" send "position fen 8/5R2/2K1P3/4k3/8/b1PPpp1B/5p2/8 w - -\n" - send "go mate 2\n" + send "go mate 2 searchmoves c6d7\n" expect "score mate 2 * pv c6d7" expect "bestmove c6d7" send "ucinewgame\n" send "position fen 8/5R2/2K1P3/4k3/8/b1PPpp1B/5p2/8 w - -\n" - send "go nodes 10000\n" + send "go nodes 500000 searchmoves c6d7\n" expect "score mate 2 * pv c6d7 * f7f5" expect "bestmove c6d7" send "ucinewgame\n" send "position fen 1NR2B2/5p2/5p2/1p1kpp2/1P2rp2/2P1pB2/2P1P1K1/8 b - - \n" - send "go depth 18\n" + send "go depth 27\n" expect "score mate -2" expect "pv d5e6 c8d8" expect "bestmove d5e6" @@ -257,7 +257,7 @@ cat << EOF > syzygy.exp expect "Stockfish" send "uci\n" send "setoption name SyzygyPath value ../tests/syzygy/\n" - expect "info string Found 35 tablebases" + expect "info string Found 35 WDL and 35 DTZ tablebase files (up to 4-man)." send "bench 128 1 8 default depth\n" expect "Nodes searched :" send "ucinewgame\n" From 2054add23cf234f302c67709efc0d265c5a98eae Mon Sep 17 00:00:00 2001 From: "Robert Nurnberg @ elitebook" Date: Tue, 3 Sep 2024 08:20:06 +0200 Subject: [PATCH 231/315] Update the WDL model updates the internal WDL model, using data from 2.6M games played by the revisions since 9fb5832. https://github.com/official-stockfish/Stockfish/pull/5565 No functional change --- src/uci.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/uci.cpp b/src/uci.cpp index 9b60680d..c94f8b91 100644 --- a/src/uci.cpp +++ b/src/uci.cpp @@ -339,8 +339,8 @@ WinRateParams win_rate_params(const Position& pos) { double m = std::clamp(material, 17, 78) / 58.0; // Return a = p_a(material) and b = p_b(material), see github.com/official-stockfish/WDL_model - constexpr double as[] = {-41.25712052, 121.47473115, -124.46958843, 411.84490997}; - constexpr double bs[] = {84.92998051, -143.66658718, 80.09988253, 49.80869370}; + constexpr double as[] = {-37.45051876, 121.19101539, -132.78783573, 420.70576692}; + constexpr double bs[] = {90.26261072, -137.26549898, 71.10130540, 51.35259597}; double a = (((as[0] * m + as[1]) * m + as[2]) * m) + as[3]; double b = (((bs[0] * m + bs[1]) * m + bs[2]) * m) + bs[3]; From 38e0cc7b909796c1a71d9c07b636698b69420975 Mon Sep 17 00:00:00 2001 From: Joost VandeVondele Date: Sat, 31 Aug 2024 16:00:59 +0200 Subject: [PATCH 232/315] Update Top CPU Contributors to the status as of Aug 31st 2024. closes https://github.com/official-stockfish/Stockfish/pull/5561 No functional change --- Top CPU Contributors.txt | 193 +++++++++++++++++++++------------------ 1 file changed, 104 insertions(+), 89 deletions(-) diff --git a/Top CPU Contributors.txt b/Top CPU Contributors.txt index 11636e84..3d8c5236 100644 --- a/Top CPU Contributors.txt +++ b/Top CPU Contributors.txt @@ -1,106 +1,109 @@ -Contributors to Fishtest with >10,000 CPU hours, as of 2024-02-24. +Contributors to Fishtest with >10,000 CPU hours, as of 2024-08-31. Thank you! Username CPU Hours Games played ------------------------------------------------------------------ -noobpwnftw 39302472 3055513453 -technologov 20845762 994893444 -linrock 8616428 560281417 +noobpwnftw 40428649 3164740143 +technologov 23581394 1076895482 +vdv 19425375 718302718 +linrock 10034115 643194527 mlang 3026000 200065824 -okrout 2332151 222639518 -pemo 1800019 60274069 +okrout 2572676 237511408 +pemo 1836785 62226157 dew 1689162 100033738 -TueRens 1474943 75121774 -grandphish2 1463002 91616949 -JojoM 1109702 72927902 -olafm 978631 71037944 -sebastronomy 939955 44920556 +TueRens 1648780 77891164 +sebastronomy 1468328 60859092 +grandphish2 1466110 91776075 +JojoM 1130625 73666098 +olafm 1067009 74807270 tvijlbrief 796125 51897690 -gvreuls 711320 49142318 +oz 781847 53910686 +rpngn 768460 49812975 +gvreuls 751085 52177668 mibere 703840 46867607 -oz 646268 46293638 -rpngn 572571 38928563 -leszek 531858 39316505 -cw 518116 34894291 +leszek 566598 42024615 +cw 519601 34988161 fastgm 503862 30260818 CSU_Dynasty 468784 31385034 -ctoks 434591 28520597 -maximmasiutin 429983 27066286 +maximmasiutin 439192 27893522 +ctoks 435148 28541909 crunchy 427414 27371625 bcross 415724 29061187 +robal 371112 24642270 +mgrabiak 367963 26464704 velislav 342588 22140902 -mgrabiak 338763 23999170 +ncfish1 329039 20624527 Fisherman 327231 21829379 -robal 299836 20213182 Dantist 296386 18031762 -ncfish1 267604 17881149 +tolkki963 262050 22049676 +Sylvain27 255595 8864404 nordlandia 249322 16420192 +Fifis 237657 13065577 marrco 234581 17714473 -tolkki963 233490 19773930 +Calis007 217537 14450582 glinscott 208125 13277240 drabel 204167 13930674 mhoram 202894 12601997 bking_US 198894 11876016 -Calis007 188631 12795784 Thanar 179852 12365359 -Fifis 176209 10638245 -vdv 175544 9904472 +javran 169679 13481966 +armo9494 162863 10937118 spams 157128 10319326 -DesolatedDodo 156659 10210328 -armo9494 155355 10566898 +DesolatedDodo 156683 10211206 +Wencey 152308 8375444 sqrt2 147963 9724586 +vdbergh 140311 9225125 jcAEie 140086 10603658 -vdbergh 139746 9172061 CoffeeOne 137100 5024116 malala 136182 8002293 xoto 133759 9159372 +Dubslow 129614 8519312 davar 129023 8376525 DMBK 122960 8980062 dsmith 122059 7570238 -javran 121564 10144656 +CypressChess 120784 8672620 +sschnee 120526 7547722 +maposora 119734 10749710 amicic 119661 7938029 -sschnee 118107 7389266 -Wolfgang 114616 8070494 +Wolfgang 115713 8159062 Data 113305 8220352 BrunoBanani 112960 7436849 -Wencey 111502 5991676 -cuistot 108503 7006992 -CypressChess 108331 7759788 +markkulix 112897 9133168 +cuistot 109802 7121030 skiminki 107583 7218170 +sterni1971 104431 5938282 MaZePallas 102823 6633619 -sterni1971 100532 5880772 sunu 100167 7040199 zeryl 99331 6221261 thirdlife 99156 2245320 ElbertoOne 99028 7023771 -Dubslow 98600 6903242 -markkulix 97010 7643900 -bigpen0r 94809 6529203 +megaman7de 98456 6675076 +Goatminola 96765 8257832 +bigpen0r 94825 6529241 brabos 92118 6186135 Maxim 90818 3283364 psk 89957 5984901 -megaman7de 88822 6052132 racerschmacer 85805 6122790 -maposora 85710 7778146 Vizvezdenec 83761 5344740 0x3C33 82614 5271253 +szupaw 82495 7151686 BRAVONE 81239 5054681 nssy 76497 5259388 +cody 76126 4492126 jromang 76106 5236025 +MarcusTullius 76103 5061991 +woutboat 76072 6022922 +Spprtr 75977 5252287 teddybaer 75125 5407666 Pking_cda 73776 5293873 -yurikvelo 73516 5036928 -MarcusTullius 71053 4803477 +yurikvelo 73611 5046822 +Mineta 71130 4711422 Bobo1239 70579 4794999 solarlight 70517 5028306 dv8silencer 70287 3883992 -Spprtr 69646 4806763 -Mineta 66325 4537742 manap 66273 4121774 -szupaw 65468 5669742 tinker 64333 4268790 qurashee 61208 3429862 -woutboat 59496 4906352 AGI 58195 4329580 robnjr 57262 4053117 Freja 56938 3733019 @@ -108,39 +111,45 @@ MaxKlaxxMiner 56879 3423958 ttruscott 56010 3680085 rkl 55132 4164467 jmdana 54697 4012593 +notchris 53936 4184018 renouve 53811 3501516 -notchris 52433 4044590 finfish 51360 3370515 eva42 51272 3599691 eastorwest 51117 3454811 -Goatminola 51004 4432492 rap 49985 3219146 pb00067 49733 3298934 GPUex 48686 3684998 OuaisBla 48626 3445134 ronaldjerum 47654 3240695 biffhero 46564 3111352 -oryx 45533 3539290 +oryx 45639 3546530 VoyagerOne 45476 3452465 speedycpu 43842 3003273 jbwiebe 43305 2805433 Antihistamine 41788 2761312 mhunt 41735 2691355 +jibarbosa 41640 4145702 homyur 39893 2850481 gri 39871 2515779 +DeepnessFulled 39020 3323102 Garf 37741 2999686 SC 37299 2731694 -Sylvain27 36520 1467082 +Gaster319 37118 3279678 +naclosagc 36562 1279618 csnodgrass 36207 2688994 -Gaster319 35655 3149442 strelock 34716 2074055 +gopeto 33717 2245606 EthanOConnor 33370 2090311 slakovv 32915 2021889 -gopeto 31884 2076712 +jojo2357 32890 2826662 +shawnxu 32019 2802552 Gelma 31771 1551204 +vidar808 31560 1351810 kdave 31157 2198362 manapbk 30987 1810399 -ZacHFX 30551 2238078 +ZacHFX 30966 2272416 +TataneSan 30713 1513402 +votoanthuan 30691 2460856 Prcuvu 30377 2170122 anst 30301 2190091 jkiiski 30136 1904470 @@ -149,14 +158,15 @@ hyperbolic.tom 29840 2017394 chuckstablers 29659 2093438 Pyafue 29650 1902349 belzedar94 28846 1811530 -votoanthuan 27978 2285818 -shawnxu 27438 2465810 +mecevdimitar 27610 1721382 chriswk 26902 1868317 xwziegtm 26897 2124586 achambord 26582 1767323 +somethingintheshadows 26496 2186404 Patrick_G 26276 1801617 yorkman 26193 1992080 -Ulysses 25397 1701264 +srowen 25743 1490684 +Ulysses 25413 1702830 Jopo12321 25227 1652482 SFTUser 25182 1675689 nabildanial 25068 1531665 @@ -164,66 +174,69 @@ Sharaf_DG 24765 1786697 rodneyc 24376 1416402 jsys14 24297 1721230 agg177 23890 1395014 -srowen 23842 1342508 +AndreasKrug 23754 1890115 Ente 23752 1678188 -jojo2357 23479 2061238 JanErik 23408 1703875 Isidor 23388 1680691 Norabor 23371 1603244 +WoodMan777 23253 2023048 +Nullvalue 23155 2022752 cisco2015 22920 1763301 Zirie 22542 1472937 -Nullvalue 22490 1970374 -AndreasKrug 22485 1769491 team-oh 22272 1636708 Roady 22220 1465606 MazeOfGalious 21978 1629593 -sg4032 21947 1643353 +sg4032 21950 1643373 +tsim67 21747 1330880 ianh2105 21725 1632562 +Skiff84 21711 1014212 xor12 21628 1680365 dex 21612 1467203 nesoneg 21494 1463031 user213718 21454 1404128 +Serpensin 21452 1790510 sphinx 21211 1384728 -qoo_charly_cai 21135 1514907 +qoo_charly_cai 21136 1514927 +IslandLambda 21062 1220838 jjoshua2 21001 1423089 Zake9298 20938 1565848 horst.prack 20878 1465656 +fishtester 20729 1348888 0xB00B1ES 20590 1208666 -Serpensin 20487 1729674 -Dinde 20440 1292390 +ols 20477 1195945 +Dinde 20459 1292774 j3corre 20405 941444 Adrian.Schmidt123 20316 1281436 wei 19973 1745989 -fishtester 19617 1257388 +teenychess 19819 1762006 rstoesser 19569 1293588 eudhan 19274 1283717 vulcan 18871 1729392 +wizardassassin 18795 1376884 Karpovbot 18766 1053178 -WoodMan777 18556 1628264 jundery 18445 1115855 +mkstockfishtester 18350 1690676 ville 17883 1384026 chris 17698 1487385 purplefishies 17595 1092533 dju 17414 981289 -ols 17291 1042003 iisiraider 17275 1049015 -Skiff84 17111 950248 DragonLord 17014 1162790 +Karby 17008 1013160 +pirt 16965 1271519 redstone59 16842 1461780 -Karby 16839 1010124 Alb11747 16787 1213990 -pirt 16493 1237199 Naven94 16414 951718 -wizardassassin 16392 1148672 +scuzzi 16115 994341 IgorLeMasson 16064 1147232 -scuzzi 15757 968735 ako027ako 15671 1173203 +infinigon 15285 965966 Nikolay.IT 15154 1068349 Andrew Grant 15114 895539 OssumOpossum 14857 1007129 LunaticBFF57 14525 1190310 enedene 14476 905279 -IslandLambda 14393 958196 +Hjax 14394 1005013 bpfliegel 14233 882523 YELNAMRON 14230 1128094 mpx86 14019 759568 @@ -233,54 +246,56 @@ Nesa92 13806 1116101 crocogoat 13803 1117422 joster 13710 946160 mbeier 13650 1044928 -Hjax 13535 915487 +Pablohn26 13552 1088532 +wxt9861 13550 1312306 Dark_wizzie 13422 1007152 Rudolphous 13244 883140 Machariel 13010 863104 -infinigon 12991 943216 +nalanzeyu 12996 232590 mabichito 12903 749391 +Jackfish 12895 868928 thijsk 12886 722107 AdrianSA 12860 804972 Flopzee 12698 894821 +whelanh 12682 266404 mschmidt 12644 863193 korposzczur 12606 838168 -tsim67 12570 890180 -Jackfish 12553 836958 fatmurphy 12547 853210 -Oakwen 12503 853105 +Oakwen 12532 855759 +icewulf 12447 854878 SapphireBrand 12416 969604 deflectooor 12386 579392 modolief 12386 896470 -TataneSan 12358 609332 Farseer 12249 694108 +Hongildong 12201 648712 pgontarz 12151 848794 dbernier 12103 860824 -FormazChar 11989 907809 +szczur90 12035 942376 +FormazChar 12019 910409 +rensonthemove 11999 971993 stocky 11954 699440 -somethingintheshadows 11940 989472 -MooTheCow 11892 776126 +MooTheCow 11923 779432 3cho 11842 1036786 -whelanh 11557 245188 +ckaz 11792 732276 infinity 11470 727027 aga 11412 695127 torbjo 11395 729145 Thomas A. Anderson 11372 732094 savage84 11358 670860 +Def9Infinity 11345 696552 d64 11263 789184 ali-al-zhrani 11245 779246 -ckaz 11170 680866 +ImperiumAeternum 11155 952000 snicolet 11106 869170 dapper 11032 771402 Ethnikoi 10993 945906 Snuuka 10938 435504 -Karmatron 10859 678058 +Karmatron 10871 678306 basepi 10637 744851 -jibarbosa 10628 857100 Cubox 10621 826448 -mecevdimitar 10609 787318 +gerbil 10519 971688 michaelrpg 10509 739239 -Def9Infinity 10427 686978 OIVAS7572 10420 995586 -wxt9861 10412 1013864 Garruk 10365 706465 dzjp 10343 732529 +RickGroszkiewicz 10263 990798 From e0bfc4b69bbe928d6f474a46560bcc3b3f6709aa Mon Sep 17 00:00:00 2001 From: Joost VandeVondele Date: Tue, 3 Sep 2024 18:07:22 +0200 Subject: [PATCH 233/315] Stockfish 17 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Official release version of Stockfish 17 Bench: 1484730 --- Stockfish 17 Today we have the pleasure to announce a new major release of Stockfish. As always, you can freely download it at https://stockfishchess.org/download and use it in the GUI of your choice. Don’t forget to join our Discord server[1] to get in touch with the community of developers and users of the project! *Quality of chess play* In tests against Stockfish 16, this release brings an Elo gain of up to 46 points[2] and wins up to 4.5 times more game pairs[3] than it loses. In practice, high-quality moves are now found in less time, with a user upgrading from Stockfish 14 being able to analyze games at least 6 times[4] faster with Stockfish 17 while maintaining roughly the same quality. During this development period, Stockfish won its 9th consecutive first place in the main league of the Top Chess Engine Championship (TCEC)[5], and the 24th consecutive first place in the main events (bullet, blitz, and rapid) of the Computer Chess Championship (CCC)[6]. *Update highlights* *Improved engine lines* This release introduces principal variations (PVs) that are more informative for mate and decisive table base (TB) scores. In both cases, the PV will contain all moves up to checkmate. For mate scores, the PV shown is the best variation known to the engine at that point, while for table base wins, it follows, based on the TB, a sequence of moves that preserves the game outcome to checkmate. *NUMA performance optimization* For high-end computers with multiple CPUs (typically a dual-socket architecture with 100+ cores), this release automatically improves performance with a `NumaPolicy` setting that optimizes non-uniform memory access (NUMA). Although typical consumer hardware will not benefit, speedups of up to 2.8x[7] have been measured. *Shoutouts* *ChessDB* During the past 1.5 years, hundreds of cores have been continuously running Stockfish to grow a database of analyzed positions. This chess cloud database[8] now contains well over 45 billion positions, providing excellent coverage of all openings and commonly played lines. This database is already integrated into GUIs such as En Croissant[9] and Nibbler[10], which access it through the public API. *Leela Chess Zero* Generally considered to be the strongest GPU engine, it continues to provide open data which is essential for training our NNUE networks. They released version 0.31.1[11] of their engine a few weeks ago, check it out! *Website redesign* Our website has undergone a redesign in recent months, most notably in our home page[12], now featuring a darker color scheme and a more modern aesthetic, while still maintaining its core identity. We hope you'll like it as much as we do! *Thank you* The Stockfish project builds on a thriving community of enthusiasts (thanks everybody!) who contribute their expertise, time, and resources to build a free and open-source chess engine that is robust, widely available, and very strong. We would like to express our gratitude for the 11k stars[13] that light up our GitHub project! Thank you for your support and encouragement – your recognition means a lot to us. We invite our chess fans to join the Fishtest testing framework[14] to contribute compute resources needed for development. Programmers can contribute to the project either directly to Stockfish[15] (C++), to Fishtest[16] (HTML, CSS, JavaScript, and Python), to our trainer nnue-pytorch[17] (C++ and Python), or to our website[18] (HTML, CSS/SCSS, and JavaScript). The Stockfish team [1] https://discord.gg/GWDRS3kU6R [2] https://tests.stockfishchess.org/tests/view/66d738ba9de3e7f9b33d159a [3] https://tests.stockfishchess.org/tests/view/66d738f39de3e7f9b33d15a0 [4] https://github.com/official-stockfish/Stockfish/wiki/Useful-data#equivalent-time-odds-and-normalized-game-pair-elo [5] https://en.wikipedia.org/wiki/Stockfish_(chess)#Top_Chess_Engine_Championship [6] https://en.wikipedia.org/wiki/Stockfish_(chess)#Chess.com_Computer_Chess_Championship [7] https://github.com/official-stockfish/Stockfish/pull/5285 [8] https://chessdb.cn/queryc_en/ [9] https://encroissant.org/ [10] https://github.com/rooklift/nibbler [11] https://github.com/LeelaChessZero/lc0/releases/tag/v0.31.1 [12] https://stockfishchess.org/ [13] https://github.com/official-stockfish/Stockfish/stargazers [14] https://github.com/official-stockfish/fishtest/wiki/Running-the-worker [15] https://github.com/official-stockfish/Stockfish [16] https://github.com/official-stockfish/fishtest [17] https://github.com/official-stockfish/nnue-pytorch [18] https://github.com/official-stockfish/stockfish-web --- src/misc.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/misc.cpp b/src/misc.cpp index 664ab4b8..91cdbc4d 100644 --- a/src/misc.cpp +++ b/src/misc.cpp @@ -38,7 +38,7 @@ namespace Stockfish { namespace { // Version number or dev. -constexpr std::string_view version = "dev"; +constexpr std::string_view version = "17"; // Our fancy logging facility. The trick here is to replace cin.rdbuf() and // cout.rdbuf() with two Tie objects that tie cin and cout to a file stream. We From f4ba7ce67a0a4b77d9d641b4010dfc73f3b534b2 Mon Sep 17 00:00:00 2001 From: Disservin Date: Mon, 9 Sep 2024 17:07:36 +0200 Subject: [PATCH 234/315] Restore development closes https://github.com/official-stockfish/Stockfish/pull/5580 No functional change --- src/misc.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/misc.cpp b/src/misc.cpp index 91cdbc4d..664ab4b8 100644 --- a/src/misc.cpp +++ b/src/misc.cpp @@ -38,7 +38,7 @@ namespace Stockfish { namespace { // Version number or dev. -constexpr std::string_view version = "17"; +constexpr std::string_view version = "dev"; // Our fancy logging facility. The trick here is to replace cin.rdbuf() and // cout.rdbuf() with two Tie objects that tie cin and cout to a file stream. We From 4fb04eb3df9279cd7b8c3d43dbf1916b3c74bea3 Mon Sep 17 00:00:00 2001 From: FauziAkram Date: Fri, 30 Aug 2024 14:09:24 +0300 Subject: [PATCH 235/315] Simplify history bonus After we recently added the disallowance for negative bonuses, it is no longer necessary to keep the max comparison in the previous step. Passed STC: LLR: 2.93 (-2.94,2.94) <-1.75,0.25> Total: 72000 W: 18820 L: 18637 D: 34543 Ptnml(0-2): 267, 8489, 18276, 8730, 238 https://tests.stockfishchess.org/tests/view/66ce132cbf8c9d8780fdabe7 Passed LTC: LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 67452 W: 17136 L: 16961 D: 33355 Ptnml(0-2): 35, 7489, 18519, 7632, 51 https://tests.stockfishchess.org/tests/view/66cf6ad49de3e7f9b33d1010 closes https://github.com/official-stockfish/Stockfish/pull/5554 Bench: 1147012 --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index ad2c35e7..9d950c0e 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1347,7 +1347,7 @@ moves_loop: // When in check, search starts here + 134 * (!(ss - 1)->inCheck && bestValue <= -(ss - 1)->staticEval - 91)); // Proportional to "how much damage we have to undo" - bonus += std::clamp(-(ss - 1)->statScore / 100, -94, 304); + bonus += std::min(-(ss - 1)->statScore / 100, 304); bonus = std::max(bonus, 0); From ddc9f48bc3e0b1d22b2d1259d5d45d4640e0374d Mon Sep 17 00:00:00 2001 From: Shawn Xu Date: Fri, 23 Aug 2024 17:13:49 -0700 Subject: [PATCH 236/315] Introduce Material Correction History Idea from Caissa (https://github.com/Witek902/Caissa) chess engine. Add a secondary correction history indexed by the material key of a position. The material key is the zobrist hash representing the number of pieces left in a position. Passed STC: LLR: 2.94 (-2.94,2.94) <0.00,2.00> Total: 189472 W: 49360 L: 48813 D: 91299 Ptnml(0-2): 666, 22453, 47953, 22996, 668 https://tests.stockfishchess.org/tests/view/66cbddafbf8c9d8780fda9f1 Passed LTC: LLR: 2.94 (-2.94,2.94) <0.50,2.50> Total: 224190 W: 57022 L: 56312 D: 110856 Ptnml(0-2): 197, 24723, 61540, 25443, 192 https://tests.stockfishchess.org/tests/view/66cd529bbf8c9d8780fdab4c closes https://github.com/official-stockfish/Stockfish/pull/5556 Bench: 1462697 --- src/movepick.h | 30 ++++++++++++++++++++++-------- src/search.cpp | 11 ++++++++--- src/search.h | 11 ++++++----- 3 files changed, 36 insertions(+), 16 deletions(-) diff --git a/src/movepick.h b/src/movepick.h index 61f6368e..651091b0 100644 --- a/src/movepick.h +++ b/src/movepick.h @@ -34,14 +34,15 @@ namespace Stockfish { -constexpr int PAWN_HISTORY_SIZE = 512; // has to be a power of 2 -constexpr int CORRECTION_HISTORY_SIZE = 16384; // has to be a power of 2 -constexpr int CORRECTION_HISTORY_LIMIT = 1024; +constexpr int PAWN_HISTORY_SIZE = 512; // has to be a power of 2 +constexpr int PAWN_CORRECTION_HISTORY_SIZE = 16384; // has to be a power of 2 +constexpr int MATERIAL_CORRECTION_HISTORY_SIZE = 32768; // has to be a power of 2 +constexpr int CORRECTION_HISTORY_LIMIT = 1024; static_assert((PAWN_HISTORY_SIZE & (PAWN_HISTORY_SIZE - 1)) == 0, "PAWN_HISTORY_SIZE has to be a power of 2"); -static_assert((CORRECTION_HISTORY_SIZE & (CORRECTION_HISTORY_SIZE - 1)) == 0, +static_assert((PAWN_CORRECTION_HISTORY_SIZE & (PAWN_CORRECTION_HISTORY_SIZE - 1)) == 0, "CORRECTION_HISTORY_SIZE has to be a power of 2"); enum PawnHistoryType { @@ -51,7 +52,11 @@ enum PawnHistoryType { template inline int pawn_structure_index(const Position& pos) { - return pos.pawn_key() & ((T == Normal ? PAWN_HISTORY_SIZE : CORRECTION_HISTORY_SIZE) - 1); + return pos.pawn_key() & ((T == Normal ? PAWN_HISTORY_SIZE : PAWN_CORRECTION_HISTORY_SIZE) - 1); +} + +inline int material_index(const Position& pos) { + return pos.material_key() & (MATERIAL_CORRECTION_HISTORY_SIZE - 1); } // StatsEntry stores the stat table value. It is usually a number but could @@ -133,9 +138,18 @@ using ContinuationHistory = Stats // PawnHistory is addressed by the pawn structure and a move's [piece][to] using PawnHistory = Stats; -// CorrectionHistory is addressed by color and pawn structure -using CorrectionHistory = - Stats; + +// Correction histories record differences between the static evaluation of +// positions and their search score. It is used to improve the static evaluation +// used by some search heuristics. + +// PawnCorrectionHistory is addressed by color and pawn structure +using PawnCorrectionHistory = + Stats; + +// MaterialCorrectionHistory is addressed by color and material configuration +using MaterialCorrectionHistory = + Stats; // The MovePicker class is used to pick one pseudo-legal move at a time from the // current position. The most important method is next_move(), which emits one diff --git a/src/search.cpp b/src/search.cpp index 9d950c0e..bc85a5c3 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -81,7 +81,10 @@ constexpr int futility_move_count(bool improving, Depth depth) { // Add correctionHistory value to raw staticEval and guarantee evaluation // does not hit the tablebase range. Value to_corrected_static_eval(Value v, const Worker& w, const Position& pos) { - auto cv = w.correctionHistory[pos.side_to_move()][pawn_structure_index(pos)]; + const auto pcv = + w.pawnCorrectionHistory[pos.side_to_move()][pawn_structure_index(pos)]; + const auto mcv = w.materialCorrectionHistory[pos.side_to_move()][material_index(pos)]; + const auto cv = (2 * pcv + mcv) / 3; v += 66 * cv / 512; return std::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1); } @@ -487,7 +490,8 @@ void Search::Worker::clear() { mainHistory.fill(0); captureHistory.fill(-700); pawnHistory.fill(-1188); - correctionHistory.fill(0); + pawnCorrectionHistory.fill(0); + materialCorrectionHistory.fill(0); for (bool inCheck : {false, true}) for (StatsType c : {NoCaptures, Captures}) @@ -1390,7 +1394,8 @@ moves_loop: // When in check, search starts here { auto bonus = std::clamp(int(bestValue - ss->staticEval) * depth / 8, -CORRECTION_HISTORY_LIMIT / 4, CORRECTION_HISTORY_LIMIT / 4); - thisThread->correctionHistory[us][pawn_structure_index(pos)] << bonus; + thisThread->pawnCorrectionHistory[us][pawn_structure_index(pos)] << bonus; + thisThread->materialCorrectionHistory[us][material_index(pos)] << bonus; } assert(bestValue > -VALUE_INFINITE && bestValue < VALUE_INFINITE); diff --git a/src/search.h b/src/search.h index 0f635186..c9fe9e18 100644 --- a/src/search.h +++ b/src/search.h @@ -277,11 +277,12 @@ class Worker { void ensure_network_replicated(); // Public because they need to be updatable by the stats - ButterflyHistory mainHistory; - CapturePieceToHistory captureHistory; - ContinuationHistory continuationHistory[2][2]; - PawnHistory pawnHistory; - CorrectionHistory correctionHistory; + ButterflyHistory mainHistory; + CapturePieceToHistory captureHistory; + ContinuationHistory continuationHistory[2][2]; + PawnHistory pawnHistory; + PawnCorrectionHistory pawnCorrectionHistory; + MaterialCorrectionHistory materialCorrectionHistory; private: void iterative_deepening(); From e74452ae44df35aeda21e81bb2eec883a7a45c38 Mon Sep 17 00:00:00 2001 From: Daniel Monroe Date: Sun, 1 Sep 2024 20:56:09 -0400 Subject: [PATCH 237/315] Reduce on ttcaptures if not capture Tweak ttcapture reduction. Reduce on ttcaptures only if the current move is a capture Passed STC: LLR: 2.93 (-2.94,2.94) <0.00,2.00> Total: 94912 W: 24896 L: 24492 D: 45524 Ptnml(0-2): 301, 11197, 24087, 11539, 332 https://tests.stockfishchess.org/tests/view/66cd2264bf8c9d8780fdab34 Passed LTC: LLR: 2.94 (-2.94,2.94) <0.50,2.50> Total: 60738 W: 15465 L: 15096 D: 30177 Ptnml(0-2): 42, 6573, 16775, 6932, 47 https://tests.stockfishchess.org/tests/view/66cf356d9de3e7f9b33d0fde closes https://github.com/official-stockfish/Stockfish/pull/5562 Bench: 1268700 --- src/search.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index bc85a5c3..aab5c743 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1140,8 +1140,8 @@ moves_loop: // When in check, search starts here if (cutNode) r += 2 - (ttData.depth >= depth && ss->ttPv); - // Increase reduction if ttMove is a capture (~3 Elo) - if (ttCapture) + // Increase reduction if ttMove is a capture but the current move is not a capture (~3 Elo) + if (ttCapture && !capture) r++; // Increase reduction if next ply has a lot of fail high (~5 Elo) From 66a7965b0fab4d1ae59203039b0b2262dbf2bcc0 Mon Sep 17 00:00:00 2001 From: xu-shawn <50402888+xu-shawn@users.noreply.github.com> Date: Fri, 6 Sep 2024 14:31:40 -0700 Subject: [PATCH 238/315] Copy scripts directory in distributed packages closes https://github.com/official-stockfish/Stockfish/pull/5571 No functional change --- .github/workflows/upload_binaries.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/upload_binaries.yml b/.github/workflows/upload_binaries.yml index c5a2cd10..1067f6e7 100644 --- a/.github/workflows/upload_binaries.yml +++ b/.github/workflows/upload_binaries.yml @@ -59,6 +59,7 @@ jobs: mv "${{ matrix.config.simple_name }} ${{ matrix.binaries }}" stockfish-workflow cd stockfish-workflow cp -r src ../stockfish/ + cp -r scripts ../stockfish/ cp stockfish-$NAME-$BINARY$EXT ../stockfish/ cp "Top CPU Contributors.txt" ../stockfish/ cp Copying.txt ../stockfish/ From 1b310cc87e22840621284f27f1f5873c9b9c0384 Mon Sep 17 00:00:00 2001 From: MinetaS Date: Mon, 2 Sep 2024 19:22:18 +0900 Subject: [PATCH 239/315] Export and clean up net downloading script Fixes https://github.com/official-stockfish/Stockfish/issues/5564 This patch extracts the net downloading script in Makefile into an external script file. Also the script is moderately rewritten for improved readability and speed. * Use wget preferentially over curl, as curl is known to have slight overhead. * Use command instead of hash to check if command exists. Reportedly, hash always returns zero in some POSIX shells even when the command fails. * Command existence checks (wget/curl, sha256sum) are performed only once at the beginning. * Each of common patterns is encapsulated in a function (get_nnue_filename, validate_network). * Print out error/warning messages to stderr. closes https://github.com/official-stockfish/Stockfish/pull/5563 No functional change Co-authored-by: Disservin --- .github/workflows/tests.yml | 12 +++--- scripts/net.sh | 75 +++++++++++++++++++++++++++++++++++++ src/Makefile | 52 +------------------------ 3 files changed, 82 insertions(+), 57 deletions(-) create mode 100755 scripts/net.sh diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 8d209a4f..a826e6f0 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -143,7 +143,7 @@ jobs: FROM ${{ matrix.config.base_image }} WORKDIR /app RUN apk update && apk add make g++ - CMD ["sh", "script.sh"] + CMD ["sh", "src/script.sh"] EOF - name: Download required macOS packages @@ -176,7 +176,7 @@ jobs: $COMPCXX -v else echo "$COMPCXX -v" > script.sh - docker run --rm --platform ${{ matrix.config.platform }} -v ${{ github.workspace }}/src:/app sf_builder + docker run --rm --platform ${{ matrix.config.platform }} -v ${{ github.workspace }}:/app sf_builder fi - name: Test help target @@ -342,8 +342,8 @@ jobs: - name: Test riscv64 build if: matrix.config.run_riscv64_tests run: | - echo "export LDFLAGS='-static' && make clean && make -j4 ARCH=riscv64 build" > script.sh - docker run --rm --platform ${{ matrix.config.platform }} -v ${{ github.workspace }}/src:/app sf_builder + echo "cd src && export LDFLAGS='-static' && make clean && make -j4 ARCH=riscv64 build" > script.sh + docker run --rm --platform ${{ matrix.config.platform }} -v ${{ github.workspace }}:/app sf_builder ../tests/signature.sh $benchref # ppc64 tests @@ -351,8 +351,8 @@ jobs: - name: Test ppc64 build if: matrix.config.run_ppc64_tests run: | - echo "export LDFLAGS='-static' && make clean && make -j4 ARCH=ppc-64 build" > script.sh - docker run --rm --platform ${{ matrix.config.platform }} -v ${{ github.workspace }}/src:/app sf_builder + echo "cd src && export LDFLAGS='-static' && make clean && make -j4 ARCH=ppc-64 build" > script.sh + docker run --rm --platform ${{ matrix.config.platform }} -v ${{ github.workspace }}:/app sf_builder ../tests/signature.sh $benchref # Other tests diff --git a/scripts/net.sh b/scripts/net.sh new file mode 100755 index 00000000..168fbad6 --- /dev/null +++ b/scripts/net.sh @@ -0,0 +1,75 @@ +#!/bin/sh + +wget_or_curl=$( (command -v wget > /dev/null 2>&1 && echo "wget -q") || \ + (command -v curl > /dev/null 2>&1 && echo "curl -L -s -k")) + +if [ -z "$wget_or_curl" ]; then + >&2 printf "%s\n" "Neither wget or curl is installed." \ + "Install one of these tools to download NNUE files automatically." + exit 1 +fi + +sha256sum=$( (command -v shasum > /dev/null 2>&1 && echo "shasum -a 256") || \ + (command -v sha256sum > /dev/null 2>&1 && echo "sha256sum")) + +if [ -z "$sha256sum" ]; then + >&2 echo "sha256sum not found, NNUE files will be assumed valid." +fi + +get_nnue_filename() { + grep "$1" evaluate.h | grep "#define" | sed "s/.*\(nn-[a-z0-9]\{12\}.nnue\).*/\1/" +} + +validate_network() { + # If no sha256sum command is available, assume the file is always valid. + if [ -n "$sha256sum" ] && [ -f "$1" ]; then + if [ "$1" != "nn-$($sha256sum "$1" | cut -c 1-12).nnue" ]; then + rm -f "$1" + return 1 + fi + fi +} + +fetch_network() { + _filename="$(get_nnue_filename "$1")" + + if [ -z "$_filename" ]; then + >&2 echo "NNUE file name not found for: $1" + return 1 + fi + + if [ -f "$_filename" ]; then + if validate_network "$_filename"; then + echo "Existing $_filename validated, skipping download" + return + else + echo "Removing invalid NNUE file: $_filename" + fi + fi + + for url in \ + "https://tests.stockfishchess.org/api/nn/$_filename" \ + "https://github.com/official-stockfish/networks/raw/master/$_filename"; do + echo "Downloading from $url ..." + if $wget_or_curl "$url"; then + if validate_network "$_filename"; then + echo "Successfully validated $_filename" + else + echo "Downloaded $_filename is invalid" + continue + fi + else + echo "Failed to download from $url" + fi + if [ -f "$_filename" ]; then + return + fi + done + + # Download was not successful in the loop, return false. + >&2 echo "Failed to download $_filename" + return 1 +} + +fetch_network EvalFileDefaultNameBig && \ +fetch_network EvalFileDefaultNameSmall diff --git a/src/Makefile b/src/Makefile index 7142b972..042d9479 100644 --- a/src/Makefile +++ b/src/Makefile @@ -917,59 +917,9 @@ profileclean: @rm -f stockfish.res @rm -f ./-lstdc++.res -define fetch_network - @echo "Default net: $(nnuenet)" - @if [ "x$(curl_or_wget)" = "x" ]; then \ - echo "Neither curl nor wget is installed. Install one of these tools unless the net has been downloaded manually"; \ - fi - @if [ "x$(shasum_command)" = "x" ]; then \ - echo "shasum / sha256sum not found, skipping net validation"; \ - elif test -f "$(nnuenet)"; then \ - if [ "$(nnuenet)" != "nn-"`$(shasum_command) $(nnuenet) | cut -c1-12`".nnue" ]; then \ - echo "Removing invalid network"; rm -f $(nnuenet); \ - fi; \ - fi; - @for nnuedownloadurl in "$(nnuedownloadurl1)" "$(nnuedownloadurl2)"; do \ - if test -f "$(nnuenet)"; then \ - echo "$(nnuenet) available : OK"; break; \ - else \ - if [ "x$(curl_or_wget)" != "x" ]; then \ - echo "Downloading $${nnuedownloadurl}"; $(curl_or_wget) $${nnuedownloadurl} > $(nnuenet);\ - else \ - echo "No net found and download not possible"; exit 1;\ - fi; \ - fi; \ - if [ "x$(shasum_command)" != "x" ]; then \ - if [ "$(nnuenet)" != "nn-"`$(shasum_command) $(nnuenet) | cut -c1-12`".nnue" ]; then \ - echo "Removing failed download"; rm -f $(nnuenet); \ - fi; \ - fi; \ - done - @if ! test -f "$(nnuenet)"; then \ - echo "Failed to download $(nnuenet)."; \ - fi; - @if [ "x$(shasum_command)" != "x" ]; then \ - if [ "$(nnuenet)" = "nn-"`$(shasum_command) $(nnuenet) | cut -c1-12`".nnue" ]; then \ - echo "Network validated"; break; \ - fi; \ - fi; -endef - -# set up shell variables for the net stuff -define netvariables -$(eval nnuenet := $(shell grep $(1) evaluate.h | grep define | sed 's/.*\(nn-[a-z0-9]\{12\}.nnue\).*/\1/')) -$(eval nnuedownloadurl1 := https://tests.stockfishchess.org/api/nn/$(nnuenet)) -$(eval nnuedownloadurl2 := https://github.com/official-stockfish/networks/raw/master/$(nnuenet)) -$(eval curl_or_wget := $(shell if hash curl 2>/dev/null; then echo "curl -skL"; elif hash wget 2>/dev/null; then echo "wget -qO-"; fi)) -$(eval shasum_command := $(shell if hash shasum 2>/dev/null; then echo "shasum -a 256 "; elif hash sha256sum 2>/dev/null; then echo "sha256sum "; fi)) -endef - # evaluation network (nnue) net: - $(call netvariables, EvalFileDefaultNameBig) - $(call fetch_network) - $(call netvariables, EvalFileDefaultNameSmall) - $(call fetch_network) + @$(SHELL) ../scripts/net.sh format: $(CLANG-FORMAT) -i $(SRCS) $(HEADERS) -style=file From d7e3a708d456ff2793c2392c13d8d9cbea61aaba Mon Sep 17 00:00:00 2001 From: xu-shawn <50402888+xu-shawn@users.noreply.github.com> Date: Fri, 6 Sep 2024 14:20:40 -0700 Subject: [PATCH 240/315] Remove ARCH=... from README.md closes https://github.com/official-stockfish/Stockfish/pull/5570 No functional change --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 52b123cb..25da319d 100644 --- a/README.md +++ b/README.md @@ -97,7 +97,7 @@ descriptions. An example suitable for most Intel and AMD chips: ``` cd src -make -j profile-build ARCH=x86-64-avx2 +make -j profile-build ``` Detailed compilation instructions for all platforms can be found in our From a8cb002038bf314764a737077864a961c0e1d145 Mon Sep 17 00:00:00 2001 From: Michael Chaly Date: Sat, 7 Sep 2024 18:46:09 +0300 Subject: [PATCH 241/315] Simplify ttmove reduction Remove condition that clamps reductions for tt move. Passed STC: https://tests.stockfishchess.org/tests/view/66d5f1239de3e7f9b33d14b0 LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 91136 W: 23805 L: 23646 D: 43685 Ptnml(0-2): 334, 10328, 24066, 10525, 315 Passed LTC: https://tests.stockfishchess.org/tests/view/66d7c5889de3e7f9b33d1721 LLR: 2.95 (-2.94,2.94) <-1.75,0.25> Total: 139242 W: 35130 L: 35030 D: 69082 Ptnml(0-2): 78, 15200, 38986, 15258, 99 closes https://github.com/official-stockfish/Stockfish/pull/5574 Bench: 1268715 --- src/search.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index aab5c743..1ed849f2 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1148,10 +1148,9 @@ moves_loop: // When in check, search starts here if ((ss + 1)->cutoffCnt > 3) r += 1 + allNode; - // For first picked move (ttMove) reduce reduction, but never allow - // reduction to go below 0 (~3 Elo) + // For first picked move (ttMove) reduce reduction (~3 Elo) else if (move == ttData.move) - r = std::max(0, r - 2); + r -= 2; ss->statScore = 2 * thisThread->mainHistory[us][move.from_to()] + (*contHist[0])[movedPiece][move.to_sq()] From effa2460710aef54465967796099916a5f0d13d3 Mon Sep 17 00:00:00 2001 From: Disservin Date: Sat, 7 Sep 2024 20:34:10 +0200 Subject: [PATCH 242/315] Use optional for the engine path - A small quality of file change is to change the type of engine path from a string to an optional string, skips the binary directory lookup, which is commonly disabled by people who create wasm builds or include stockfish as a library. closes https://github.com/official-stockfish/Stockfish/pull/5575 No functional change --- src/engine.cpp | 4 ++-- src/engine.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/engine.cpp b/src/engine.cpp index 81bb260b..b5cc3f83 100644 --- a/src/engine.cpp +++ b/src/engine.cpp @@ -47,8 +47,8 @@ namespace NN = Eval::NNUE; constexpr auto StartFEN = "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1"; constexpr int MaxHashMB = Is64Bit ? 33554432 : 2048; -Engine::Engine(std::string path) : - binaryDirectory(CommandLine::get_binary_directory(path)), +Engine::Engine(std::optional path) : + binaryDirectory(path ? CommandLine::get_binary_directory(*path) : ""), numaContext(NumaConfig::from_system()), states(new std::deque(1)), threads(), diff --git a/src/engine.h b/src/engine.h index f3c78398..efab1c6a 100644 --- a/src/engine.h +++ b/src/engine.h @@ -47,7 +47,7 @@ class Engine { using InfoFull = Search::InfoFull; using InfoIter = Search::InfoIteration; - Engine(std::string path = ""); + Engine(std::optional path = std::nullopt); // Cannot be movable due to components holding backreferences to fields Engine(const Engine&) = delete; From 2680c9c7992f6565e9a2f0acc52260af55e56b5a Mon Sep 17 00:00:00 2001 From: MinetaS Date: Fri, 6 Sep 2024 22:14:47 +0900 Subject: [PATCH 243/315] Small speedup in incremental accumulator updates Instead of updating at most two accumulators, update all accumluators during incremental updates. Tests have shown that this change yields a small speedup of at least 0.5%, and up to 1% with shorter TC. Passed STC: LLR: 2.93 (-2.94,2.94) <0.00,2.00> Total: 54368 W: 14179 L: 13842 D: 26347 Ptnml(0-2): 173, 6122, 14262, 6449, 178 https://tests.stockfishchess.org/tests/view/66db038a9de3e7f9b33d1ad9 Passed 5+0.05: LLR: 2.98 (-2.94,2.94) <0.00,2.00> Total: 55040 W: 14682 L: 14322 D: 26036 Ptnml(0-2): 303, 6364, 13856, 6664, 333 https://tests.stockfishchess.org/tests/view/66dbc325dc53972b68218ba7 Passed non-regression LTC: LLR: 2.95 (-2.94,2.94) <-1.75,0.25> Total: 57390 W: 14555 L: 14376 D: 28459 Ptnml(0-2): 37, 5876, 16683, 6069, 30 https://tests.stockfishchess.org/tests/view/66dbc30adc53972b68218ba5 closes https://github.com/official-stockfish/Stockfish/pull/5576 No functional change --- src/nnue/nnue_feature_transformer.h | 330 ++++++++++++---------------- src/position.cpp | 2 + src/position.h | 1 + 3 files changed, 140 insertions(+), 193 deletions(-) diff --git a/src/nnue/nnue_feature_transformer.h b/src/nnue/nnue_feature_transformer.h index 2f74dcae..fa180678 100644 --- a/src/nnue/nnue_feature_transformer.h +++ b/src/nnue/nnue_feature_transformer.h @@ -453,11 +453,10 @@ class FeatureTransformer { private: template - [[nodiscard]] std::pair - try_find_computed_accumulator(const Position& pos) const { + StateInfo* try_find_computed_accumulator(const Position& pos) const { // Look for a usable accumulator of an earlier position. We keep track // of the estimated gain in terms of features to be added/subtracted. - StateInfo *st = pos.state(), *next = nullptr; + StateInfo* st = pos.state(); int gain = FeatureSet::refresh_cost(pos); while (st->previous && !(st->*accPtr).computed[Perspective]) { @@ -466,30 +465,17 @@ class FeatureTransformer { if (FeatureSet::requires_refresh(st, Perspective) || (gain -= FeatureSet::update_cost(st) + 1) < 0) break; - next = st; - st = st->previous; + st = st->previous; } - return {st, next}; + return st; } - // NOTE: The parameter states_to_update is an array of position states. - // All states must be sequential, that is states_to_update[i] must - // either be reachable by repeatedly applying ->previous from - // states_to_update[i+1], and computed_st must be reachable by - // repeatedly applying ->previous on states_to_update[0]. - template - void update_accumulator_incremental(const Position& pos, - StateInfo* computed_st, - StateInfo* states_to_update[N]) const { - static_assert(N > 0); - assert([&]() { - for (size_t i = 0; i < N; ++i) - { - if (states_to_update[i] == nullptr) - return false; - } - return true; - }()); + // It computes the accumulator of the next position, or updates the + // current position's accumulator if CurrentOnly is true. + template + void update_accumulator_incremental(const Position& pos, StateInfo* computed) const { + assert((computed->*accPtr).computed[Perspective]); + assert(computed->next != nullptr); #ifdef VECTOR // Gcc-10.2 unnecessarily spills AVX2 registers if this array @@ -498,205 +484,186 @@ class FeatureTransformer { psqt_vec_t psqt[NumPsqtRegs]; #endif - // Update incrementally going back through states_to_update. - // Gather all features to be updated. const Square ksq = pos.square(Perspective); // The size must be enough to contain the largest possible update. // That might depend on the feature set and generally relies on the // feature set's update cost calculation to be correct and never allow // updates with more added/removed features than MaxActiveDimensions. - FeatureSet::IndexList removed[N], added[N]; + FeatureSet::IndexList removed, added; - for (int i = N - 1; i >= 0; --i) - { - (states_to_update[i]->*accPtr).computed[Perspective] = true; + if constexpr (CurrentOnly) + for (StateInfo* st = pos.state(); st != computed; st = st->previous) + FeatureSet::append_changed_indices(ksq, st->dirtyPiece, removed, + added); + else + FeatureSet::append_changed_indices(ksq, computed->next->dirtyPiece, + removed, added); - const StateInfo* end_state = i == 0 ? computed_st : states_to_update[i - 1]; + StateInfo* next = CurrentOnly ? pos.state() : computed->next; + assert(!(next->*accPtr).computed[Perspective]); - for (StateInfo* st2 = states_to_update[i]; st2 != end_state; st2 = st2->previous) - FeatureSet::append_changed_indices(ksq, st2->dirtyPiece, removed[i], - added[i]); - } - - StateInfo* st = computed_st; - - // Now update the accumulators listed in states_to_update[], - // where the last element is a sentinel. #ifdef VECTOR - - if (N == 1 && (removed[0].size() == 1 || removed[0].size() == 2) && added[0].size() == 1) + if ((removed.size() == 1 || removed.size() == 2) && added.size() == 1) { - assert(states_to_update[0]); - auto accIn = - reinterpret_cast(&(st->*accPtr).accumulation[Perspective][0]); - auto accOut = reinterpret_cast( - &(states_to_update[0]->*accPtr).accumulation[Perspective][0]); + reinterpret_cast(&(computed->*accPtr).accumulation[Perspective][0]); + auto accOut = reinterpret_cast(&(next->*accPtr).accumulation[Perspective][0]); - const IndexType offsetR0 = HalfDimensions * removed[0][0]; + const IndexType offsetR0 = HalfDimensions * removed[0]; auto columnR0 = reinterpret_cast(&weights[offsetR0]); - const IndexType offsetA = HalfDimensions * added[0][0]; + const IndexType offsetA = HalfDimensions * added[0]; auto columnA = reinterpret_cast(&weights[offsetA]); - if (removed[0].size() == 1) + if (removed.size() == 1) { - for (IndexType k = 0; k < HalfDimensions * sizeof(std::int16_t) / sizeof(vec_t); - ++k) - accOut[k] = vec_add_16(vec_sub_16(accIn[k], columnR0[k]), columnA[k]); + for (IndexType i = 0; i < HalfDimensions * sizeof(WeightType) / sizeof(vec_t); ++i) + accOut[i] = vec_add_16(vec_sub_16(accIn[i], columnR0[i]), columnA[i]); } else { - const IndexType offsetR1 = HalfDimensions * removed[0][1]; + const IndexType offsetR1 = HalfDimensions * removed[1]; auto columnR1 = reinterpret_cast(&weights[offsetR1]); - for (IndexType k = 0; k < HalfDimensions * sizeof(std::int16_t) / sizeof(vec_t); - ++k) - accOut[k] = vec_sub_16(vec_add_16(accIn[k], columnA[k]), - vec_add_16(columnR0[k], columnR1[k])); + for (IndexType i = 0; i < HalfDimensions * sizeof(WeightType) / sizeof(vec_t); ++i) + accOut[i] = vec_sub_16(vec_add_16(accIn[i], columnA[i]), + vec_add_16(columnR0[i], columnR1[i])); } - auto accPsqtIn = - reinterpret_cast(&(st->*accPtr).psqtAccumulation[Perspective][0]); - auto accPsqtOut = reinterpret_cast( - &(states_to_update[0]->*accPtr).psqtAccumulation[Perspective][0]); + auto accPsqtIn = reinterpret_cast( + &(computed->*accPtr).psqtAccumulation[Perspective][0]); + auto accPsqtOut = + reinterpret_cast(&(next->*accPtr).psqtAccumulation[Perspective][0]); - const IndexType offsetPsqtR0 = PSQTBuckets * removed[0][0]; + const IndexType offsetPsqtR0 = PSQTBuckets * removed[0]; auto columnPsqtR0 = reinterpret_cast(&psqtWeights[offsetPsqtR0]); - const IndexType offsetPsqtA = PSQTBuckets * added[0][0]; + const IndexType offsetPsqtA = PSQTBuckets * added[0]; auto columnPsqtA = reinterpret_cast(&psqtWeights[offsetPsqtA]); - if (removed[0].size() == 1) + if (removed.size() == 1) { - for (std::size_t k = 0; k < PSQTBuckets * sizeof(std::int32_t) / sizeof(psqt_vec_t); - ++k) - accPsqtOut[k] = vec_add_psqt_32(vec_sub_psqt_32(accPsqtIn[k], columnPsqtR0[k]), - columnPsqtA[k]); + for (std::size_t i = 0; + i < PSQTBuckets * sizeof(PSQTWeightType) / sizeof(psqt_vec_t); ++i) + accPsqtOut[i] = vec_add_psqt_32(vec_sub_psqt_32(accPsqtIn[i], columnPsqtR0[i]), + columnPsqtA[i]); } else { - const IndexType offsetPsqtR1 = PSQTBuckets * removed[0][1]; + const IndexType offsetPsqtR1 = PSQTBuckets * removed[1]; auto columnPsqtR1 = reinterpret_cast(&psqtWeights[offsetPsqtR1]); - for (std::size_t k = 0; k < PSQTBuckets * sizeof(std::int32_t) / sizeof(psqt_vec_t); - ++k) - accPsqtOut[k] = - vec_sub_psqt_32(vec_add_psqt_32(accPsqtIn[k], columnPsqtA[k]), - vec_add_psqt_32(columnPsqtR0[k], columnPsqtR1[k])); + for (std::size_t i = 0; + i < PSQTBuckets * sizeof(PSQTWeightType) / sizeof(psqt_vec_t); ++i) + accPsqtOut[i] = + vec_sub_psqt_32(vec_add_psqt_32(accPsqtIn[i], columnPsqtA[i]), + vec_add_psqt_32(columnPsqtR0[i], columnPsqtR1[i])); } } else { - for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j) + for (IndexType i = 0; i < HalfDimensions / TileHeight; ++i) { // Load accumulator auto accTileIn = reinterpret_cast( - &(st->*accPtr).accumulation[Perspective][j * TileHeight]); - for (IndexType k = 0; k < NumRegs; ++k) - acc[k] = vec_load(&accTileIn[k]); + &(computed->*accPtr).accumulation[Perspective][i * TileHeight]); + for (IndexType j = 0; j < NumRegs; ++j) + acc[j] = vec_load(&accTileIn[j]); - for (IndexType i = 0; i < N; ++i) + // Difference calculation for the deactivated features + for (const auto index : removed) { - // Difference calculation for the deactivated features - for (const auto index : removed[i]) - { - const IndexType offset = HalfDimensions * index + j * TileHeight; - auto column = reinterpret_cast(&weights[offset]); - for (IndexType k = 0; k < NumRegs; ++k) - acc[k] = vec_sub_16(acc[k], column[k]); - } - - // Difference calculation for the activated features - for (const auto index : added[i]) - { - const IndexType offset = HalfDimensions * index + j * TileHeight; - auto column = reinterpret_cast(&weights[offset]); - for (IndexType k = 0; k < NumRegs; ++k) - acc[k] = vec_add_16(acc[k], column[k]); - } - - // Store accumulator - auto accTileOut = reinterpret_cast( - &(states_to_update[i]->*accPtr).accumulation[Perspective][j * TileHeight]); - for (IndexType k = 0; k < NumRegs; ++k) - vec_store(&accTileOut[k], acc[k]); + const IndexType offset = HalfDimensions * index + i * TileHeight; + auto column = reinterpret_cast(&weights[offset]); + for (IndexType j = 0; j < NumRegs; ++j) + acc[j] = vec_sub_16(acc[j], column[j]); } + + // Difference calculation for the activated features + for (const auto index : added) + { + const IndexType offset = HalfDimensions * index + i * TileHeight; + auto column = reinterpret_cast(&weights[offset]); + for (IndexType j = 0; j < NumRegs; ++j) + acc[j] = vec_add_16(acc[j], column[j]); + } + + // Store accumulator + auto accTileOut = reinterpret_cast( + &(next->*accPtr).accumulation[Perspective][i * TileHeight]); + for (IndexType j = 0; j < NumRegs; ++j) + vec_store(&accTileOut[j], acc[j]); } - for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j) + for (IndexType i = 0; i < PSQTBuckets / PsqtTileHeight; ++i) { // Load accumulator auto accTilePsqtIn = reinterpret_cast( - &(st->*accPtr).psqtAccumulation[Perspective][j * PsqtTileHeight]); - for (std::size_t k = 0; k < NumPsqtRegs; ++k) - psqt[k] = vec_load_psqt(&accTilePsqtIn[k]); + &(computed->*accPtr).psqtAccumulation[Perspective][i * PsqtTileHeight]); + for (std::size_t j = 0; j < NumPsqtRegs; ++j) + psqt[j] = vec_load_psqt(&accTilePsqtIn[j]); - for (IndexType i = 0; i < N; ++i) + // Difference calculation for the deactivated features + for (const auto index : removed) { - // Difference calculation for the deactivated features - for (const auto index : removed[i]) - { - const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight; - auto columnPsqt = reinterpret_cast(&psqtWeights[offset]); - for (std::size_t k = 0; k < NumPsqtRegs; ++k) - psqt[k] = vec_sub_psqt_32(psqt[k], columnPsqt[k]); - } - - // Difference calculation for the activated features - for (const auto index : added[i]) - { - const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight; - auto columnPsqt = reinterpret_cast(&psqtWeights[offset]); - for (std::size_t k = 0; k < NumPsqtRegs; ++k) - psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]); - } - - // Store accumulator - auto accTilePsqtOut = reinterpret_cast( - &(states_to_update[i]->*accPtr) - .psqtAccumulation[Perspective][j * PsqtTileHeight]); - for (std::size_t k = 0; k < NumPsqtRegs; ++k) - vec_store_psqt(&accTilePsqtOut[k], psqt[k]); + const IndexType offset = PSQTBuckets * index + i * PsqtTileHeight; + auto columnPsqt = reinterpret_cast(&psqtWeights[offset]); + for (std::size_t j = 0; j < NumPsqtRegs; ++j) + psqt[j] = vec_sub_psqt_32(psqt[j], columnPsqt[j]); } + + // Difference calculation for the activated features + for (const auto index : added) + { + const IndexType offset = PSQTBuckets * index + i * PsqtTileHeight; + auto columnPsqt = reinterpret_cast(&psqtWeights[offset]); + for (std::size_t j = 0; j < NumPsqtRegs; ++j) + psqt[j] = vec_add_psqt_32(psqt[j], columnPsqt[j]); + } + + // Store accumulator + auto accTilePsqtOut = reinterpret_cast( + &(next->*accPtr).psqtAccumulation[Perspective][i * PsqtTileHeight]); + for (std::size_t j = 0; j < NumPsqtRegs; ++j) + vec_store_psqt(&accTilePsqtOut[j], psqt[j]); } } #else - for (IndexType i = 0; i < N; ++i) + std::memcpy((next->*accPtr).accumulation[Perspective], + (computed->*accPtr).accumulation[Perspective], + HalfDimensions * sizeof(BiasType)); + std::memcpy((next->*accPtr).psqtAccumulation[Perspective], + (computed->*accPtr).psqtAccumulation[Perspective], + PSQTBuckets * sizeof(PSQTWeightType)); + + // Difference calculation for the deactivated features + for (const auto index : removed) { - std::memcpy((states_to_update[i]->*accPtr).accumulation[Perspective], - (st->*accPtr).accumulation[Perspective], HalfDimensions * sizeof(BiasType)); + const IndexType offset = HalfDimensions * index; + for (IndexType i = 0; i < HalfDimensions; ++i) + (next->*accPtr).accumulation[Perspective][i] -= weights[offset + i]; - for (std::size_t k = 0; k < PSQTBuckets; ++k) - (states_to_update[i]->*accPtr).psqtAccumulation[Perspective][k] = - (st->*accPtr).psqtAccumulation[Perspective][k]; + for (std::size_t i = 0; i < PSQTBuckets; ++i) + (next->*accPtr).psqtAccumulation[Perspective][i] -= + psqtWeights[index * PSQTBuckets + i]; + } - st = states_to_update[i]; + // Difference calculation for the activated features + for (const auto index : added) + { + const IndexType offset = HalfDimensions * index; + for (IndexType i = 0; i < HalfDimensions; ++i) + (next->*accPtr).accumulation[Perspective][i] += weights[offset + i]; - // Difference calculation for the deactivated features - for (const auto index : removed[i]) - { - const IndexType offset = HalfDimensions * index; - for (IndexType j = 0; j < HalfDimensions; ++j) - (st->*accPtr).accumulation[Perspective][j] -= weights[offset + j]; - - for (std::size_t k = 0; k < PSQTBuckets; ++k) - (st->*accPtr).psqtAccumulation[Perspective][k] -= - psqtWeights[index * PSQTBuckets + k]; - } - - // Difference calculation for the activated features - for (const auto index : added[i]) - { - const IndexType offset = HalfDimensions * index; - for (IndexType j = 0; j < HalfDimensions; ++j) - (st->*accPtr).accumulation[Perspective][j] += weights[offset + j]; - - for (std::size_t k = 0; k < PSQTBuckets; ++k) - (st->*accPtr).psqtAccumulation[Perspective][k] += - psqtWeights[index * PSQTBuckets + k]; - } + for (std::size_t i = 0; i < PSQTBuckets; ++i) + (next->*accPtr).psqtAccumulation[Perspective][i] += + psqtWeights[index * PSQTBuckets + i]; } #endif + + (next->*accPtr).computed[Perspective] = true; + + if (!CurrentOnly && next != pos.state()) + update_accumulator_incremental(pos, next); } template @@ -871,14 +838,10 @@ class FeatureTransformer { if ((pos.state()->*accPtr).computed[Perspective]) return; - auto [oldest_st, _] = try_find_computed_accumulator(pos); + StateInfo* oldest = try_find_computed_accumulator(pos); - if ((oldest_st->*accPtr).computed[Perspective]) - { - // Only update current position accumulator to minimize work - StateInfo* states_to_update[1] = {pos.state()}; - update_accumulator_incremental(pos, oldest_st, states_to_update); - } + if ((oldest->*accPtr).computed[Perspective] && oldest != pos.state()) + update_accumulator_incremental(pos, oldest); else update_accumulator_refresh_cache(pos, cache); } @@ -887,31 +850,12 @@ class FeatureTransformer { void update_accumulator(const Position& pos, AccumulatorCaches::Cache* cache) const { - auto [oldest_st, next] = try_find_computed_accumulator(pos); + StateInfo* oldest = try_find_computed_accumulator(pos); - if ((oldest_st->*accPtr).computed[Perspective]) - { - if (next == nullptr) - return; - - // Now update the accumulators listed in states_to_update[], where - // the last element is a sentinel. Currently we update two accumulators: - // 1. for the current position - // 2. the next accumulator after the computed one - // The heuristic may change in the future. - if (next == pos.state()) - { - StateInfo* states_to_update[1] = {next}; - - update_accumulator_incremental(pos, oldest_st, states_to_update); - } - else - { - StateInfo* states_to_update[2] = {next, pos.state()}; - - update_accumulator_incremental(pos, oldest_st, states_to_update); - } - } + if ((oldest->*accPtr).computed[Perspective] && oldest != pos.state()) + // Start from the oldest computed accumulator, update all the + // accumulators up to the current position. + update_accumulator_incremental(pos, oldest); else update_accumulator_refresh_cache(pos, cache); } diff --git a/src/position.cpp b/src/position.cpp index d374b1c0..df95ffef 100644 --- a/src/position.cpp +++ b/src/position.cpp @@ -671,6 +671,7 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) { // our state pointer to point to the new (ready to be updated) state. std::memcpy(&newSt, st, offsetof(StateInfo, key)); newSt.previous = st; + st->next = &newSt; st = &newSt; // Increment ply counters. In particular, rule50 will be reset to zero later on @@ -963,6 +964,7 @@ void Position::do_null_move(StateInfo& newSt, TranspositionTable& tt) { std::memcpy(&newSt, st, offsetof(StateInfo, accumulatorBig)); newSt.previous = st; + st->next = &newSt; st = &newSt; st->dirtyPiece.dirty_num = 0; diff --git a/src/position.h b/src/position.h index 064dd5fa..6cac1731 100644 --- a/src/position.h +++ b/src/position.h @@ -53,6 +53,7 @@ struct StateInfo { Key key; Bitboard checkersBB; StateInfo* previous; + StateInfo* next; Bitboard blockersForKing[COLOR_NB]; Bitboard pinners[COLOR_NB]; Bitboard checkSquares[PIECE_TYPE_NB]; From 6de25872361de9515bdb25bf1d0391311d074012 Mon Sep 17 00:00:00 2001 From: MinetaS Date: Sat, 31 Aug 2024 16:35:17 +0900 Subject: [PATCH 244/315] Remove statScore condition in NMP Eliminate the condition that is nearly 100% likelihood of being true. Passed non-regression STC: LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 208832 W: 54053 L: 54022 D: 100757 Ptnml(0-2): 753, 24987, 52901, 25026, 749 https://tests.stockfishchess.org/tests/view/66cddb50bf8c9d8780fdabaf Passed non-regression LTC: LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 154344 W: 39132 L: 39047 D: 76165 Ptnml(0-2): 115, 17231, 42403, 17300, 123 https://tests.stockfishchess.org/tests/view/66cfafe39de3e7f9b33d1050 closes https://github.com/official-stockfish/Stockfish/pull/5558 Bench: 1393697 --- src/search.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 1ed849f2..d26f43db 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -773,10 +773,9 @@ Value Search::Worker::search( return beta + (eval - beta) / 3; // Step 9. Null move search with verification search (~35 Elo) - if (cutNode && (ss - 1)->currentMove != Move::null() && (ss - 1)->statScore < 14389 - && eval >= beta && ss->staticEval >= beta - 21 * depth + 390 && !excludedMove - && pos.non_pawn_material(us) && ss->ply >= thisThread->nmpMinPly - && beta > VALUE_TB_LOSS_IN_MAX_PLY) + if (cutNode && (ss - 1)->currentMove != Move::null() && eval >= beta + && ss->staticEval >= beta - 21 * depth + 390 && !excludedMove && pos.non_pawn_material(us) + && ss->ply >= thisThread->nmpMinPly && beta > VALUE_TB_LOSS_IN_MAX_PLY) { assert(eval - beta >= 0); From d8e49cdbdd8076d85b137510ee5637e36db1074f Mon Sep 17 00:00:00 2001 From: Nonlinear2 <131959792+Nonlinear2@users.noreply.github.com> Date: Mon, 9 Sep 2024 22:32:00 +0200 Subject: [PATCH 245/315] Remove the `moveCount` increase in the LMR condition. Passed non-regression STC: LLR: 2.93 (-2.94,2.94) <-1.75,0.25> Total: 87104 W: 22630 L: 22464 D: 42010 Ptnml(0-2): 316, 10295, 22132, 10525, 284 https://tests.stockfishchess.org/tests/view/66dccd00dc53972b68218c60 Passed non-regression LTC: LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 94050 W: 23869 L: 23722 D: 46459 Ptnml(0-2): 49, 10400, 25985, 10537, 54 https://tests.stockfishchess.org/tests/view/66dd69c7dc53972b68218ca5 closes https://github.com/official-stockfish/Stockfish/pull/5582 Bench: 1281840 --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index d26f43db..ac0b9c6d 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1159,7 +1159,7 @@ moves_loop: // When in check, search starts here r -= ss->statScore / 10898; // Step 17. Late moves reduction / extension (LMR, ~117 Elo) - if (depth >= 2 && moveCount > 1 + (rootNode && depth < 10)) + if (depth >= 2 && moveCount > 1) { // In general we want to cap the LMR depth search at newDepth, but when // reduction is negative, we allow this move a limited search extension From f677aee28baedcab4d3110d0a5c414621ed805c4 Mon Sep 17 00:00:00 2001 From: MinetaS Date: Wed, 11 Sep 2024 05:14:01 +0900 Subject: [PATCH 246/315] Fix net downloading script The recent commit introduced a bug in the net downloading script that the file is not downloaded correctly and the content is redirected to stdout. closes https://github.com/official-stockfish/Stockfish/pull/5585 No functional change --- scripts/net.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/net.sh b/scripts/net.sh index 168fbad6..0bc57a19 100755 --- a/scripts/net.sh +++ b/scripts/net.sh @@ -1,7 +1,7 @@ #!/bin/sh -wget_or_curl=$( (command -v wget > /dev/null 2>&1 && echo "wget -q") || \ - (command -v curl > /dev/null 2>&1 && echo "curl -L -s -k")) +wget_or_curl=$( (command -v wget > /dev/null 2>&1 && echo "wget -qO-") || \ + (command -v curl > /dev/null 2>&1 && echo "curl -skL")) if [ -z "$wget_or_curl" ]; then >&2 printf "%s\n" "Neither wget or curl is installed." \ @@ -51,7 +51,7 @@ fetch_network() { "https://tests.stockfishchess.org/api/nn/$_filename" \ "https://github.com/official-stockfish/networks/raw/master/$_filename"; do echo "Downloading from $url ..." - if $wget_or_curl "$url"; then + if $wget_or_curl "$url" > "$_filename"; then if validate_network "$_filename"; then echo "Successfully validated $_filename" else From a06e7004c1a01fb56f5db90295884eaf3b7cd0f6 Mon Sep 17 00:00:00 2001 From: Disservin Date: Tue, 10 Sep 2024 18:36:54 +0200 Subject: [PATCH 247/315] Port instrumented testing to python Since an unknown amount of time the instrumented CI has been a bit flawed, explained here https://github.com/official-stockfish/Stockfish/issues/5185. It also experiences random timeout issues where restarting the workflow fixes it or very long run times (more than other workflows) and is not very portable. The intention of this commit is to port the instrumented.sh to python which also works on other operating systems. It should also be relatively easy for beginners to add new tests to assert stockfish's output and to run it. From the source directory the following command can be run. `python3 ../tests/instrumented.py --none ./stockfish` A test runner will go over the test suites and run the test cases. All instrumented tests should have been ported over. The required python version for this is should be 3.7 (untested) + the requests package, testing.py includes some infrastructure code which setups the testing. fixes https://github.com/official-stockfish/Stockfish/issues/5185 closes https://github.com/official-stockfish/Stockfish/pull/5583 No functional change --- .github/workflows/sanitizers.yml | 2 +- .gitignore | 5 + tests/instrumented.py | 520 +++++++++++++++++++++++++++++++ tests/instrumented.sh | 301 ------------------ tests/testing.py | 378 ++++++++++++++++++++++ 5 files changed, 904 insertions(+), 302 deletions(-) create mode 100644 tests/instrumented.py delete mode 100755 tests/instrumented.sh create mode 100644 tests/testing.py diff --git a/.github/workflows/sanitizers.yml b/.github/workflows/sanitizers.yml index 55459292..946a81ce 100644 --- a/.github/workflows/sanitizers.yml +++ b/.github/workflows/sanitizers.yml @@ -75,4 +75,4 @@ jobs: export CXXFLAGS="-O1 -fno-inline" make clean make -j4 ARCH=x86-64-sse41-popcnt ${{ matrix.sanitizers.make_option }} debug=yes optimize=no build > /dev/null - ../tests/instrumented.sh --${{ matrix.sanitizers.instrumented_option }} + python3 ../tests/instrumented.py --${{ matrix.sanitizers.instrumented_option }} ./stockfish diff --git a/.gitignore b/.gitignore index 8981efca..2fc80d48 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,8 @@ src/-lstdc++.res # Neural network for the NNUE evaluation **/*.nnue +# Files generated by the instrumented tests +tsan.supp +__pycache__/ +tests/syzygy +tests/bench_tmp.epd \ No newline at end of file diff --git a/tests/instrumented.py b/tests/instrumented.py new file mode 100644 index 00000000..a3747d4e --- /dev/null +++ b/tests/instrumented.py @@ -0,0 +1,520 @@ +import argparse +import re +import sys +import subprocess +import pathlib +import os + +from testing import ( + EPD, + TSAN, + Stockfish as Engine, + MiniTestFramework, + OrderedClassMembers, + Valgrind, + Syzygy, +) + +PATH = pathlib.Path(__file__).parent.resolve() +CWD = os.getcwd() + + +def get_prefix(): + if args.valgrind: + return Valgrind.get_valgrind_command() + if args.valgrind_thread: + return Valgrind.get_valgrind_thread_command() + + return [] + + +def get_threads(): + if args.valgrind_thread or args.sanitizer_thread: + return 2 + return 1 + + +def get_path(): + return os.path.abspath(os.path.join(CWD, args.stockfish_path)) + + +def postfix_check(output): + if args.sanitizer_undefined: + for idx, line in enumerate(output): + if "runtime error:" in line: + # print next possible 50 lines + for i in range(50): + debug_idx = idx + i + if debug_idx < len(output): + print(output[debug_idx]) + return False + + if args.sanitizer_thread: + for idx, line in enumerate(output): + if "WARNING: ThreadSanitizer:" in line: + # print next possible 50 lines + for i in range(50): + debug_idx = idx + i + if debug_idx < len(output): + print(output[debug_idx]) + return False + + return True + + +def Stockfish(*args, **kwargs): + return Engine(get_prefix(), get_path(), *args, **kwargs) + + +class TestCLI(metaclass=OrderedClassMembers): + + def beforeAll(self): + pass + + def afterAll(self): + pass + + def beforeEach(self): + self.stockfish = None + + def afterEach(self): + assert postfix_check(self.stockfish.get_output()) == True + self.stockfish.clear_output() + + def test_eval(self): + self.stockfish = Stockfish("eval".split(" "), True) + assert self.stockfish.process.returncode == 0 + + def test_go_nodes_1000(self): + self.stockfish = Stockfish("go nodes 1000".split(" "), True) + assert self.stockfish.process.returncode == 0 + + def test_go_depth_10(self): + self.stockfish = Stockfish("go depth 10".split(" "), True) + assert self.stockfish.process.returncode == 0 + + def test_go_perft_4(self): + self.stockfish = Stockfish("go perft 4".split(" "), True) + assert self.stockfish.process.returncode == 0 + + def test_go_movetime_1000(self): + self.stockfish = Stockfish("go movetime 1000".split(" "), True) + assert self.stockfish.process.returncode == 0 + + def test_go_wtime_8000_btime_8000_winc_500_binc_500(self): + self.stockfish = Stockfish( + "go wtime 8000 btime 8000 winc 500 binc 500".split(" "), + True, + ) + assert self.stockfish.process.returncode == 0 + + def test_go_wtime_1000_btime_1000_winc_0_binc_0(self): + self.stockfish = Stockfish( + "go wtime 1000 btime 1000 winc 0 binc 0".split(" "), + True, + ) + assert self.stockfish.process.returncode == 0 + + def test_go_wtime_1000_btime_1000_winc_0_binc_0_movestogo_5(self): + self.stockfish = Stockfish( + "go wtime 1000 btime 1000 winc 0 binc 0 movestogo 5".split(" "), + True, + ) + assert self.stockfish.process.returncode == 0 + + def test_go_movetime_200(self): + self.stockfish = Stockfish("go movetime 200".split(" "), True) + assert self.stockfish.process.returncode == 0 + + def test_go_nodes_20000_searchmoves_e2e4_d2d4(self): + self.stockfish = Stockfish( + "go nodes 20000 searchmoves e2e4 d2d4".split(" "), True + ) + assert self.stockfish.process.returncode == 0 + + def test_bench_128_threads_8_default_depth(self): + self.stockfish = Stockfish( + f"bench 128 {get_threads()} 8 default depth".split(" "), + True, + ) + assert self.stockfish.process.returncode == 0 + + def test_bench_128_threads_3_bench_tmp_epd_depth(self): + self.stockfish = Stockfish( + f"bench 128 {get_threads()} 3 {os.path.join(PATH,'bench_tmp.epd')} depth".split( + " " + ), + True, + ) + assert self.stockfish.process.returncode == 0 + + def test_d(self): + self.stockfish = Stockfish("d".split(" "), True) + assert self.stockfish.process.returncode == 0 + + def test_compiler(self): + self.stockfish = Stockfish("compiler".split(" "), True) + assert self.stockfish.process.returncode == 0 + + def test_license(self): + self.stockfish = Stockfish("license".split(" "), True) + assert self.stockfish.process.returncode == 0 + + def test_uci(self): + self.stockfish = Stockfish("uci".split(" "), True) + assert self.stockfish.process.returncode == 0 + + def test_export_net_verify_nnue(self): + current_path = os.path.abspath(os.getcwd()) + self.stockfish = Stockfish( + f"export_net {os.path.join(current_path , 'verify.nnue')}".split(" "), True + ) + assert self.stockfish.process.returncode == 0 + + # verify the generated net equals the base net + + def test_network_equals_base(self): + self.stockfish = Stockfish( + ["uci"], + True, + ) + + output = self.stockfish.process.stdout + + # find line + for line in output.split("\n"): + if "option name EvalFile type string default" in line: + network = line.split(" ")[-1] + break + + # find network file in src dir + network = os.path.join(PATH.parent.resolve(), "src", network) + + if not os.path.exists(network): + print( + f"Network file {network} not found, please download the network file over the make command." + ) + assert False + + diff = subprocess.run(["diff", network, f"verify.nnue"]) + + assert diff.returncode == 0 + + +class TestInteractive(metaclass=OrderedClassMembers): + def beforeAll(self): + self.stockfish = Stockfish() + + def afterAll(self): + self.stockfish.quit() + assert self.stockfish.close() == 0 + + def afterEach(self): + assert postfix_check(self.stockfish.get_output()) == True + self.stockfish.clear_output() + + def test_startup_output(self): + self.stockfish.starts_with("Stockfish") + + def test_uci_command(self): + self.stockfish.send_command("uci") + self.stockfish.equals("uciok") + + def test_set_threads_option(self): + self.stockfish.send_command(f"setoption name Threads value {get_threads()}") + + def test_ucinewgame_and_startpos_nodes_1000(self): + self.stockfish.send_command("ucinewgame") + self.stockfish.send_command("position startpos") + self.stockfish.send_command("go nodes 1000") + self.stockfish.starts_with("bestmove") + + def test_ucinewgame_and_startpos_moves(self): + self.stockfish.send_command("ucinewgame") + self.stockfish.send_command("position startpos moves e2e4 e7e6") + self.stockfish.send_command("go nodes 1000") + self.stockfish.starts_with("bestmove") + + def test_fen_position_1(self): + self.stockfish.send_command("ucinewgame") + self.stockfish.send_command("position fen 5rk1/1K4p1/8/8/3B4/8/8/8 b - - 0 1") + self.stockfish.send_command("go nodes 1000") + self.stockfish.starts_with("bestmove") + + def test_fen_position_2_flip(self): + self.stockfish.send_command("ucinewgame") + self.stockfish.send_command("position fen 5rk1/1K4p1/8/8/3B4/8/8/8 b - - 0 1") + self.stockfish.send_command("flip") + self.stockfish.send_command("go nodes 1000") + self.stockfish.starts_with("bestmove") + + def test_depth_5_with_callback(self): + self.stockfish.send_command("ucinewgame") + self.stockfish.send_command("position startpos") + self.stockfish.send_command("go depth 5") + + def callback(output): + regex = r"info depth \d+ seldepth \d+ multipv \d+ score cp \d+ nodes \d+ nps \d+ hashfull \d+ tbhits \d+ time \d+ pv" + if output.startswith("info depth") and not re.match(regex, output): + assert False + if output.startswith("bestmove"): + return True + return False + + self.stockfish.check_output(callback) + + def test_ucinewgame_and_go_depth_9(self): + self.stockfish.send_command("ucinewgame") + self.stockfish.send_command("setoption name UCI_ShowWDL value true") + self.stockfish.send_command("position startpos") + self.stockfish.send_command("go depth 9") + + depth = 1 + + def callback(output): + nonlocal depth + + regex = rf"info depth {depth} seldepth \d+ multipv \d+ score cp \d+ wdl \d+ \d+ \d+ nodes \d+ nps \d+ hashfull \d+ tbhits \d+ time \d+ pv" + + if output.startswith("info depth"): + if not re.match(regex, output): + assert False + depth += 1 + + if output.startswith("bestmove"): + assert depth == 10 + return True + + return False + + self.stockfish.check_output(callback) + + def test_clear_hash(self): + self.stockfish.send_command("setoption name Clear Hash") + + def test_fen_position_mate_1(self): + self.stockfish.send_command("ucinewgame") + self.stockfish.send_command( + "position fen 5K2/8/2qk4/2nPp3/3r4/6B1/B7/3R4 w - e6" + ) + self.stockfish.send_command("go depth 18") + + self.stockfish.expect("* score mate 1 * pv d5e6") + self.stockfish.equals("bestmove d5e6") + + def test_fen_position_mate_minus_1(self): + self.stockfish.send_command("ucinewgame") + self.stockfish.send_command( + "position fen 2brrb2/8/p7/Q7/1p1kpPp1/1P1pN1K1/3P4/8 b - -" + ) + self.stockfish.send_command("go depth 18") + self.stockfish.expect("* score mate -1 *") + self.stockfish.starts_with("bestmove") + + def test_fen_position_fixed_node(self): + self.stockfish.send_command("ucinewgame") + self.stockfish.send_command( + "position fen 5K2/8/2P1P1Pk/6pP/3p2P1/1P6/3P4/8 w - - 0 1" + ) + self.stockfish.send_command("go nodes 500000") + self.stockfish.starts_with("bestmove") + + def test_fen_position_with_mate_go_depth(self): + self.stockfish.send_command("ucinewgame") + self.stockfish.send_command( + "position fen 8/5R2/2K1P3/4k3/8/b1PPpp1B/5p2/8 w - -" + ) + self.stockfish.send_command("go depth 18 searchmoves c6d7") + self.stockfish.expect("* score mate 2 * pv c6d7 * f7f5") + + self.stockfish.starts_with("bestmove") + + def test_fen_position_with_mate_go_mate(self): + self.stockfish.send_command("ucinewgame") + self.stockfish.send_command( + "position fen 8/5R2/2K1P3/4k3/8/b1PPpp1B/5p2/8 w - -" + ) + self.stockfish.send_command("go mate 2 searchmoves c6d7") + self.stockfish.expect("* score mate 2 * pv c6d7 *") + + self.stockfish.starts_with("bestmove") + + def test_fen_position_with_mate_go_nodes(self): + self.stockfish.send_command("ucinewgame") + self.stockfish.send_command( + "position fen 8/5R2/2K1P3/4k3/8/b1PPpp1B/5p2/8 w - -" + ) + self.stockfish.send_command("go nodes 500000 searchmoves c6d7") + self.stockfish.expect("* score mate 2 * pv c6d7 * f7f5") + + self.stockfish.starts_with("bestmove") + + def test_fen_position_depth_27(self): + self.stockfish.send_command("ucinewgame") + self.stockfish.send_command( + "position fen 1NR2B2/5p2/5p2/1p1kpp2/1P2rp2/2P1pB2/2P1P1K1/8 b - -" + ) + self.stockfish.send_command("go depth 27") + self.stockfish.contains("score mate -2") + + self.stockfish.starts_with("bestmove") + + def test_fen_position_with_mate_go_depth_and_promotion(self): + self.stockfish.send_command("ucinewgame") + self.stockfish.send_command( + "position fen 8/5R2/2K1P3/4k3/8/b1PPpp1B/5p2/8 w - - moves c6d7 f2f1q" + ) + self.stockfish.send_command("go depth 18") + self.stockfish.expect("* score mate 1 * pv f7f5") + self.stockfish.starts_with("bestmove f7f5") + + def test_fen_position_with_mate_go_depth_and_searchmoves(self): + self.stockfish.send_command("ucinewgame") + self.stockfish.send_command( + "position fen 8/5R2/2K1P3/4k3/8/b1PPpp1B/5p2/8 w - -" + ) + self.stockfish.send_command("go depth 18 searchmoves c6d7") + self.stockfish.expect("* score mate 2 * pv c6d7 * f7f5") + + self.stockfish.starts_with("bestmove c6d7") + + def test_fen_position_with_moves_with_mate_go_depth_and_searchmoves(self): + self.stockfish.send_command("ucinewgame") + self.stockfish.send_command( + "position fen 8/5R2/2K1P3/4k3/8/b1PPpp1B/5p2/8 w - - moves c6d7" + ) + self.stockfish.send_command("go depth 18 searchmoves e3e2") + self.stockfish.expect("* score mate -1 * pv e3e2 f7f5") + self.stockfish.starts_with("bestmove e3e2") + + def test_verify_nnue_network(self): + current_path = os.path.abspath(os.getcwd()) + Stockfish( + f"export_net {os.path.join(current_path , 'verify.nnue')}".split(" "), True + ) + + self.stockfish.send_command("setoption name EvalFile value verify.nnue") + self.stockfish.send_command("position startpos") + self.stockfish.send_command("go depth 5") + self.stockfish.starts_with("bestmove") + + def test_multipv_setting(self): + self.stockfish.send_command("setoption name MultiPV value 4") + self.stockfish.send_command("position startpos") + self.stockfish.send_command("go depth 5") + self.stockfish.starts_with("bestmove") + + def test_fen_position_with_skill_level(self): + self.stockfish.send_command("setoption name Skill Level value 10") + self.stockfish.send_command("position startpos") + self.stockfish.send_command("go depth 5") + self.stockfish.starts_with("bestmove") + + self.stockfish.send_command("setoption name Skill Level value 20") + + +class TestSyzygy(metaclass=OrderedClassMembers): + def beforeAll(self): + self.stockfish = Stockfish() + + def afterAll(self): + self.stockfish.quit() + assert self.stockfish.close() == 0 + + def afterEach(self): + assert postfix_check(self.stockfish.get_output()) == True + self.stockfish.clear_output() + + def test_syzygy_setup(self): + self.stockfish.starts_with("Stockfish") + self.stockfish.send_command("uci") + self.stockfish.send_command( + f"setoption name SyzygyPath value {os.path.join(PATH, 'syzygy')}" + ) + self.stockfish.expect( + "info string Found 35 WDL and 35 DTZ tablebase files (up to 4-man)." + ) + + def test_syzygy_bench(self): + self.stockfish.send_command("bench 128 1 8 default depth") + self.stockfish.expect("Nodes searched :*") + + def test_syzygy_position(self): + self.stockfish.send_command("ucinewgame") + self.stockfish.send_command("position fen 4k3/PP6/8/8/8/8/8/4K3 w - - 0 1") + self.stockfish.send_command("go depth 5") + + def check_output(output): + if "score cp 20000" in output or "score mate" in output: + return True + + self.stockfish.check_output(check_output) + self.stockfish.expect("bestmove *") + + def test_syzygy_position_2(self): + self.stockfish.send_command("ucinewgame") + self.stockfish.send_command("position fen 8/1P6/2B5/8/4K3/8/6k1/8 w - - 0 1") + self.stockfish.send_command("go depth 5") + + def check_output(output): + if "score cp 20000" in output or "score mate" in output: + return True + + self.stockfish.check_output(check_output) + self.stockfish.expect("bestmove *") + + def test_syzygy_position_3(self): + self.stockfish.send_command("ucinewgame") + self.stockfish.send_command("position fen 8/1P6/2B5/8/4K3/8/6k1/8 b - - 0 1") + self.stockfish.send_command("go depth 5") + + def check_output(output): + if "score cp -20000" in output or "score mate" in output: + return True + + self.stockfish.check_output(check_output) + self.stockfish.expect("bestmove *") + + +def parse_args(): + parser = argparse.ArgumentParser(description="Run Stockfish with testing options") + parser.add_argument("--valgrind", action="store_true", help="Run valgrind testing") + parser.add_argument( + "--valgrind-thread", action="store_true", help="Run valgrind-thread testing" + ) + parser.add_argument( + "--sanitizer-undefined", + action="store_true", + help="Run sanitizer-undefined testing", + ) + parser.add_argument( + "--sanitizer-thread", action="store_true", help="Run sanitizer-thread testing" + ) + + parser.add_argument( + "--none", action="store_true", help="Run without any testing options" + ) + parser.add_argument("stockfish_path", type=str, help="Path to Stockfish binary") + + return parser.parse_args() + + +if __name__ == "__main__": + args = parse_args() + + EPD.create_bench_epd() + TSAN.set_tsan_option() + Syzygy.download_syzygy() + + framework = MiniTestFramework() + + # Each test suite will be ran inside a temporary directory + framework.run([TestCLI, TestInteractive, TestSyzygy]) + + EPD.delete_bench_epd() + TSAN.unset_tsan_option() + + if framework.has_failed(): + sys.exit(1) + + sys.exit(0) diff --git a/tests/instrumented.sh b/tests/instrumented.sh deleted file mode 100755 index 5fc6ca9a..00000000 --- a/tests/instrumented.sh +++ /dev/null @@ -1,301 +0,0 @@ -#!/bin/bash -# check for errors under Valgrind or sanitizers. - -error() -{ - echo "instrumented testing failed on line $1" - exit 1 -} -trap 'error ${LINENO}' ERR - -# define suitable post and prefixes for testing options -case $1 in - --valgrind) - echo "valgrind testing started" - prefix='' - exeprefix='valgrind --error-exitcode=42 --errors-for-leak-kinds=all --leak-check=full' - postfix='' - threads="1" - ;; - --valgrind-thread) - echo "valgrind-thread testing started" - prefix='' - exeprefix='valgrind --fair-sched=try --error-exitcode=42' - postfix='' - threads="2" - ;; - --sanitizer-undefined) - echo "sanitizer-undefined testing started" - prefix='!' - exeprefix='' - postfix='2>&1 | grep -A50 "runtime error:"' - threads="1" - ;; - --sanitizer-thread) - echo "sanitizer-thread testing started" - prefix='!' - exeprefix='' - postfix='2>&1 | grep -A50 "WARNING: ThreadSanitizer:"' - threads="2" - -cat << EOF > tsan.supp -race:Stockfish::TTEntry::read -race:Stockfish::TTEntry::save - -race:Stockfish::TranspositionTable::probe -race:Stockfish::TranspositionTable::hashfull - -EOF - - export TSAN_OPTIONS="suppressions=./tsan.supp" - - ;; - *) - echo "unknown testing started" - prefix='' - exeprefix='' - postfix='' - threads="1" - ;; -esac - -cat << EOF > bench_tmp.epd -Rn6/1rbq1bk1/2p2n1p/2Bp1p2/3Pp1pP/1N2P1P1/2Q1NPB1/6K1 w - - 2 26 -rnbqkb1r/ppp1pp2/5n1p/3p2p1/P2PP3/5P2/1PP3PP/RNBQKBNR w KQkq - 0 3 -3qnrk1/4bp1p/1p2p1pP/p2bN3/1P1P1B2/P2BQ3/5PP1/4R1K1 w - - 9 28 -r4rk1/1b2ppbp/pq4pn/2pp1PB1/1p2P3/1P1P1NN1/1PP3PP/R2Q1RK1 w - - 0 13 -EOF - -# simple command line testing -for args in "eval" \ - "go nodes 1000" \ - "go depth 10" \ - "go perft 4" \ - "go movetime 1000" \ - "go wtime 8000 btime 8000 winc 500 binc 500" \ - "go wtime 1000 btime 1000 winc 0 binc 0" \ - "go wtime 1000 btime 1000 winc 0 binc 0" \ - "go wtime 1000 btime 1000 winc 0 binc 0 movestogo 5" \ - "go movetime 200" \ - "go nodes 20000 searchmoves e2e4 d2d4" \ - "bench 128 $threads 8 default depth" \ - "bench 128 $threads 3 bench_tmp.epd depth" \ - "export_net verify.nnue" \ - "d" \ - "compiler" \ - "license" \ - "uci" -do - - echo "$prefix $exeprefix ./stockfish $args $postfix" - eval "$prefix $exeprefix ./stockfish $args $postfix" - -done - -# verify the generated net equals the base net -network=`./stockfish uci | grep 'option name EvalFile type string default' | awk '{print $NF}'` -echo "Comparing $network to the written verify.nnue" -diff $network verify.nnue - -# more general testing, following an uci protocol exchange -cat << EOF > game.exp - set timeout 240 - # to correctly catch eof we need the following line - # expect_before timeout { exit 2 } eof { exit 3 } - expect_before timeout { exit 2 } - - spawn $exeprefix ./stockfish - expect "Stockfish" - - send "uci\n" - expect "uciok" - - # send "setoption name Debug Log File value debug.log\n" - send "setoption name Threads value $threads\n" - - send "ucinewgame\n" - send "position startpos\n" - send "go nodes 1000\n" - expect "bestmove" - - send "ucinewgame\n" - send "position startpos moves e2e4 e7e6\n" - send "go nodes 1000\n" - expect "bestmove" - - send "ucinewgame\n" - send "position fen 5rk1/1K4p1/8/8/3B4/8/8/8 b - - 0 1\n" - send "go depth 10\n" - expect "bestmove" - - send "ucinewgame\n" - send "position fen 5rk1/1K4p1/8/8/3B4/8/8/8 b - - 0 1\n" - send "flip\n" - send "go depth 10\n" - expect "bestmove" - - send "ucinewgame\n" - send "position startpos\n" - send "go depth 5\n" - expect -re {info depth \d+ seldepth \d+ multipv \d+ score cp \d+ nodes \d+ nps \d+ hashfull \d+ tbhits \d+ time \d+ pv} - expect "bestmove" - - send "ucinewgame\n" - send "setoption name UCI_ShowWDL value true\n" - send "position startpos\n" - send "go depth 9\n" - expect -re {info depth 1 seldepth \d+ multipv \d+ score cp \d+ wdl \d+ \d+ \d+ nodes \d+ nps \d+ hashfull \d+ tbhits \d+ time \d+ pv} - expect -re {info depth 2 seldepth \d+ multipv \d+ score cp \d+ wdl \d+ \d+ \d+ nodes \d+ nps \d+ hashfull \d+ tbhits \d+ time \d+ pv} - expect -re {info depth 3 seldepth \d+ multipv \d+ score cp \d+ wdl \d+ \d+ \d+ nodes \d+ nps \d+ hashfull \d+ tbhits \d+ time \d+ pv} - expect -re {info depth 4 seldepth \d+ multipv \d+ score cp \d+ wdl \d+ \d+ \d+ nodes \d+ nps \d+ hashfull \d+ tbhits \d+ time \d+ pv} - expect -re {info depth 5 seldepth \d+ multipv \d+ score cp \d+ wdl \d+ \d+ \d+ nodes \d+ nps \d+ hashfull \d+ tbhits \d+ time \d+ pv} - expect -re {info depth 6 seldepth \d+ multipv \d+ score cp \d+ wdl \d+ \d+ \d+ nodes \d+ nps \d+ hashfull \d+ tbhits \d+ time \d+ pv} - expect -re {info depth 7 seldepth \d+ multipv \d+ score cp \d+ wdl \d+ \d+ \d+ nodes \d+ nps \d+ hashfull \d+ tbhits \d+ time \d+ pv} - expect -re {info depth 8 seldepth \d+ multipv \d+ score cp \d+ wdl \d+ \d+ \d+ nodes \d+ nps \d+ hashfull \d+ tbhits \d+ time \d+ pv} - expect -re {info depth 9 seldepth \d+ multipv \d+ score cp \d+ wdl \d+ \d+ \d+ nodes \d+ nps \d+ hashfull \d+ tbhits \d+ time \d+ pv} - expect "bestmove" - - send "setoption name Clear Hash\n" - - send "ucinewgame\n" - send "position fen 5K2/8/2qk4/2nPp3/3r4/6B1/B7/3R4 w - e6\n" - send "go depth 18\n" - expect "score mate 1" - expect "pv d5e6" - expect "bestmove d5e6" - - send "ucinewgame\n" - send "position fen 2brrb2/8/p7/Q7/1p1kpPp1/1P1pN1K1/3P4/8 b - -\n" - send "go depth 18\n" - expect "score mate -1" - expect "bestmove" - - send "ucinewgame\n" - send "position fen 7K/P1p1p1p1/2P1P1Pk/6pP/3p2P1/1P6/3P4/8 w - - 0 1\n" - send "go nodes 500000\n" - expect "bestmove" - - send "ucinewgame\n" - send "position fen 8/5R2/2K1P3/4k3/8/b1PPpp1B/5p2/8 w - -\n" - send "go depth 18 searchmoves c6d7\n" - expect "score mate 2 * pv c6d7 * f7f5" - expect "bestmove c6d7" - - send "ucinewgame\n" - send "position fen 8/5R2/2K1P3/4k3/8/b1PPpp1B/5p2/8 w - -\n" - send "go mate 2 searchmoves c6d7\n" - expect "score mate 2 * pv c6d7" - expect "bestmove c6d7" - - send "ucinewgame\n" - send "position fen 8/5R2/2K1P3/4k3/8/b1PPpp1B/5p2/8 w - -\n" - send "go nodes 500000 searchmoves c6d7\n" - expect "score mate 2 * pv c6d7 * f7f5" - expect "bestmove c6d7" - - send "ucinewgame\n" - send "position fen 1NR2B2/5p2/5p2/1p1kpp2/1P2rp2/2P1pB2/2P1P1K1/8 b - - \n" - send "go depth 27\n" - expect "score mate -2" - expect "pv d5e6 c8d8" - expect "bestmove d5e6" - - send "ucinewgame\n" - send "position fen 8/5R2/2K1P3/4k3/8/b1PPpp1B/5p2/8 w - - moves c6d7 f2f1q\n" - send "go depth 18\n" - expect "score mate 1 * pv f7f5" - expect "bestmove f7f5" - - send "ucinewgame\n" - send "position fen 8/5R2/2K1P3/4k3/8/b1PPpp1B/5p2/8 w - -\n" - send "go depth 18 searchmoves c6d7\n" - expect "score mate 2 * pv c6d7 * f7f5" - expect "bestmove c6d7" - - send "ucinewgame\n" - send "position fen 8/5R2/2K1P3/4k3/8/b1PPpp1B/5p2/8 w - - moves c6d7\n" - send "go depth 18 searchmoves e3e2\n" - expect "score mate -1 * pv e3e2 f7f5" - expect "bestmove e3e2" - - send "setoption name EvalFile value verify.nnue\n" - send "position startpos\n" - send "go depth 5\n" - expect "bestmove" - - send "setoption name MultiPV value 4\n" - send "position startpos\n" - send "go depth 5\n" - expect "bestmove" - - send "setoption name Skill Level value 10\n" - send "position startpos\n" - send "go depth 5\n" - expect "bestmove" - send "setoption name Skill Level value 20\n" - - send "quit\n" - expect eof - - # return error code of the spawned program, useful for Valgrind - lassign [wait] pid spawnid os_error_flag value - exit \$value -EOF - -#download TB as needed -if [ ! -d ../tests/syzygy ]; then - curl -sL https://api.github.com/repos/niklasf/python-chess/tarball/9b9aa13f9f36d08aadfabff872882f4ab1494e95 | tar -xzf - - mv niklasf-python-chess-9b9aa13 ../tests/syzygy -fi - -cat << EOF > syzygy.exp - set timeout 240 - # to correctly catch eof we need the following line - # expect_before timeout { exit 2 } eof { exit 3 } - expect_before timeout { exit 2 } - spawn $exeprefix ./stockfish - expect "Stockfish" - send "uci\n" - send "setoption name SyzygyPath value ../tests/syzygy/\n" - expect "info string Found 35 WDL and 35 DTZ tablebase files (up to 4-man)." - send "bench 128 1 8 default depth\n" - expect "Nodes searched :" - send "ucinewgame\n" - send "position fen 4k3/PP6/8/8/8/8/8/4K3 w - - 0 1\n" - send "go depth 5\n" - expect -re {score cp 20000|score mate} - expect "bestmove" - send "ucinewgame\n" - send "position fen 8/1P6/2B5/8/4K3/8/6k1/8 w - - 0 1\n" - send "go depth 5\n" - expect -re {score cp 20000|score mate} - expect "bestmove" - send "ucinewgame\n" - send "position fen 8/1P6/2B5/8/4K3/8/6k1/8 b - - 0 1\n" - send "go depth 5\n" - expect -re {score cp -20000|score mate} - expect "bestmove" - send "quit\n" - expect eof - - # return error code of the spawned program, useful for Valgrind - lassign [wait] pid spawnid os_error_flag value - exit \$value -EOF - -for exp in game.exp syzygy.exp -do - - echo "======== $exp ==============" - cat $exp - echo "============================" - echo "$prefix expect $exp $postfix" - eval "$prefix expect $exp $postfix" - - rm $exp - -done - -rm -f tsan.supp bench_tmp.epd - -echo "instrumented testing OK" diff --git a/tests/testing.py b/tests/testing.py new file mode 100644 index 00000000..d51ca89a --- /dev/null +++ b/tests/testing.py @@ -0,0 +1,378 @@ +import subprocess +from typing import List +import os +import collections +import time +import sys +import traceback +import fnmatch +from functools import wraps +from contextlib import redirect_stdout +import io +import tarfile +import pathlib +import concurrent.futures +import tempfile +import shutil +import requests + +CYAN_COLOR = "\033[36m" +GRAY_COLOR = "\033[2m" +RED_COLOR = "\033[31m" +GREEN_COLOR = "\033[32m" +RESET_COLOR = "\033[0m" +WHITE_BOLD = "\033[1m" + +MAX_TIMEOUT = 60 * 5 + +PATH = pathlib.Path(__file__).parent.resolve() + + +class Valgrind: + @staticmethod + def get_valgrind_command(): + return [ + "valgrind", + "--error-exitcode=42", + "--errors-for-leak-kinds=all", + "--leak-check=full", + ] + + @staticmethod + def get_valgrind_thread_command(): + return ["valgrind", "--error-exitcode=42", "--fair-sched=try"] + + +class TSAN: + @staticmethod + def set_tsan_option(): + with open(f"tsan.supp", "w") as f: + f.write( + """ +race:Stockfish::TTEntry::read +race:Stockfish::TTEntry::save +race:Stockfish::TranspositionTable::probe +race:Stockfish::TranspositionTable::hashfull +""" + ) + + os.environ["TSAN_OPTIONS"] = "suppressions=./tsan.supp" + + @staticmethod + def unset_tsan_option(): + os.environ.pop("TSAN_OPTIONS", None) + os.remove(f"tsan.supp") + + +class EPD: + @staticmethod + def create_bench_epd(): + with open(f"{os.path.join(PATH,'bench_tmp.epd')}", "w") as f: + f.write( + """ +Rn6/1rbq1bk1/2p2n1p/2Bp1p2/3Pp1pP/1N2P1P1/2Q1NPB1/6K1 w - - 2 26 +rnbqkb1r/ppp1pp2/5n1p/3p2p1/P2PP3/5P2/1PP3PP/RNBQKBNR w KQkq - 0 3 +3qnrk1/4bp1p/1p2p1pP/p2bN3/1P1P1B2/P2BQ3/5PP1/4R1K1 w - - 9 28 +r4rk1/1b2ppbp/pq4pn/2pp1PB1/1p2P3/1P1P1NN1/1PP3PP/R2Q1RK1 w - - 0 13 +""" + ) + + @staticmethod + def delete_bench_epd(): + os.remove(f"{os.path.join(PATH,'bench_tmp.epd')}") + + +class Syzygy: + @staticmethod + def get_syzygy_path(): + return os.path.abspath("syzygy") + + @staticmethod + def download_syzygy(): + if not os.path.isdir(os.path.join(PATH, "syzygy")): + url = "https://api.github.com/repos/niklasf/python-chess/tarball/9b9aa13f9f36d08aadfabff872882f4ab1494e95" + file = "niklasf-python-chess-9b9aa13" + + with tempfile.TemporaryDirectory() as tmpdirname: + tarball_path = os.path.join(tmpdirname, f"{file}.tar.gz") + + response = requests.get(url, stream=True) + with open(tarball_path, 'wb') as f: + for chunk in response.iter_content(chunk_size=8192): + f.write(chunk) + + with tarfile.open(tarball_path, "r:gz") as tar: + tar.extractall(tmpdirname) + + shutil.move(os.path.join(tmpdirname, file), os.path.join(PATH, "syzygy")) + +class OrderedClassMembers(type): + @classmethod + def __prepare__(self, name, bases): + return collections.OrderedDict() + + def __new__(self, name, bases, classdict): + classdict["__ordered__"] = [ + key for key in classdict.keys() if key not in ("__module__", "__qualname__") + ] + return type.__new__(self, name, bases, classdict) + + +class TimeoutException(Exception): + def __init__(self, message: str, timeout: int): + self.message = message + self.timeout = timeout + + +def timeout_decorator(timeout: float): + def decorator(func): + @wraps(func) + def wrapper(*args, **kwargs): + with concurrent.futures.ThreadPoolExecutor() as executor: + future = executor.submit(func, *args, **kwargs) + try: + result = future.result(timeout=timeout) + except concurrent.futures.TimeoutError: + raise TimeoutException( + f"Function {func.__name__} timed out after {timeout} seconds", + timeout, + ) + return result + + return wrapper + + return decorator + + +class MiniTestFramework: + def __init__(self): + self.passed_test_suites = 0 + self.failed_test_suites = 0 + self.passed_tests = 0 + self.failed_tests = 0 + + def has_failed(self) -> bool: + return self.failed_test_suites > 0 + + def run(self, classes: List[type]) -> bool: + self.start_time = time.time() + + for test_class in classes: + with tempfile.TemporaryDirectory() as tmpdirname: + original_cwd = os.getcwd() + os.chdir(tmpdirname) + + try: + if self.__run(test_class): + self.failed_test_suites += 1 + else: + self.passed_test_suites += 1 + finally: + os.chdir(original_cwd) + + self.__print_summary(round(time.time() - self.start_time, 2)) + return self.has_failed() + + def __run(self, test_class) -> bool: + test_instance = test_class() + test_name = test_instance.__class__.__name__ + test_methods = [m for m in test_instance.__ordered__ if m.startswith("test_")] + + print(f"\nTest Suite: {test_name}") + + if hasattr(test_instance, "beforeAll"): + test_instance.beforeAll() + + fails = 0 + + for method in test_methods: + fails += self.__run_test_method(test_instance, method) + + if hasattr(test_instance, "afterAll"): + test_instance.afterAll() + + self.failed_tests += fails + + return fails > 0 + + def __run_test_method(self, test_instance, method: str) -> int: + print(f" Running {method}... \r", end="", flush=True) + + buffer = io.StringIO() + fails = 0 + + try: + t0 = time.time() + + with redirect_stdout(buffer): + if hasattr(test_instance, "beforeEach"): + test_instance.beforeEach() + + getattr(test_instance, method)() + + if hasattr(test_instance, "afterEach"): + test_instance.afterEach() + + duration = time.time() - t0 + + self.print_success(f" {method} ({duration * 1000:.2f}ms)") + self.passed_tests += 1 + except Exception as e: + if isinstance(e, TimeoutException): + self.print_failure( + f" {method} (hit execution limit of {e.timeout} seconds)" + ) + + if isinstance(e, AssertionError): + self.__handle_assertion_error(t0, method) + + fails += 1 + finally: + self.__print_buffer_output(buffer) + + return fails + + def __handle_assertion_error(self, start_time, method: str): + duration = time.time() - start_time + self.print_failure(f" {method} ({duration * 1000:.2f}ms)") + traceback_output = "".join(traceback.format_tb(sys.exc_info()[2])) + + colored_traceback = "\n".join( + f" {CYAN_COLOR}{line}{RESET_COLOR}" + for line in traceback_output.splitlines() + ) + + print(colored_traceback) + + def __print_buffer_output(self, buffer: io.StringIO): + output = buffer.getvalue() + if output: + indented_output = "\n".join(f" {line}" for line in output.splitlines()) + print(f" {RED_COLOR}⎯⎯⎯⎯⎯OUTPUT⎯⎯⎯⎯⎯{RESET_COLOR}") + print(f"{GRAY_COLOR}{indented_output}{RESET_COLOR}") + print(f" {RED_COLOR}⎯⎯⎯⎯⎯OUTPUT⎯⎯⎯⎯⎯{RESET_COLOR}") + + def __print_summary(self, duration: float): + print(f"\n{WHITE_BOLD}Test Summary{RESET_COLOR}\n") + print( + f" Test Suites: {GREEN_COLOR}{self.passed_test_suites} passed{RESET_COLOR}, {RED_COLOR}{self.failed_test_suites} failed{RESET_COLOR}, {self.passed_test_suites + self.failed_test_suites} total" + ) + print( + f" Tests: {GREEN_COLOR}{self.passed_tests} passed{RESET_COLOR}, {RED_COLOR}{self.failed_tests} failed{RESET_COLOR}, {self.passed_tests + self.failed_tests} total" + ) + print(f" Time: {duration}s\n") + + def print_failure(self, add: str): + print(f" {RED_COLOR}✗{RESET_COLOR}{add}", flush=True) + + def print_success(self, add: str): + print(f" {GREEN_COLOR}✓{RESET_COLOR}{add}", flush=True) + + +class Stockfish: + def __init__( + self, + prefix: List[str], + path: str, + args: List[str] = [], + cli: bool = False, + ): + self.path = path + self.process = None + self.args = args + self.cli = cli + self.prefix = prefix + self.output = [] + + self.start() + + def start(self): + if self.cli: + self.process = subprocess.run( + self.prefix + [self.path] + self.args, + capture_output=True, + text=True, + ) + + self.process.stdout + + return + + self.process = subprocess.Popen( + self.prefix + [self.path] + self.args, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + universal_newlines=True, + bufsize=1, + ) + + def setoption(self, name: str, value: str): + self.send_command(f"setoption name {name} value {value}") + + def send_command(self, command: str): + if not self.process: + raise RuntimeError("Stockfish process is not started") + + self.process.stdin.write(command + "\n") + self.process.stdin.flush() + + @timeout_decorator(MAX_TIMEOUT) + def equals(self, expected_output: str): + for line in self.readline(): + if line == expected_output: + return + + @timeout_decorator(MAX_TIMEOUT) + def expect(self, expected_output: str): + for line in self.readline(): + if fnmatch.fnmatch(line, expected_output): + return + + @timeout_decorator(MAX_TIMEOUT) + def contains(self, expected_output: str): + for line in self.readline(): + if expected_output in line: + return + + @timeout_decorator(MAX_TIMEOUT) + def starts_with(self, expected_output: str): + for line in self.readline(): + if line.startswith(expected_output): + return + + @timeout_decorator(MAX_TIMEOUT) + def check_output(self, callback): + if not callback: + raise ValueError("Callback function is required") + + for line in self.readline(): + if callback(line) == True: + return + + def readline(self): + if not self.process: + raise RuntimeError("Stockfish process is not started") + + while True: + line = self.process.stdout.readline().strip() + self.output.append(line) + + yield line + + def clear_output(self): + self.output = [] + + def get_output(self) -> List[str]: + return self.output + + def quit(self): + self.send_command("quit") + + def close(self): + if self.process: + self.process.stdin.close() + self.process.stdout.close() + return self.process.wait() + + return 0 From 224c147bd6211d2481afd25605b07c3fc98d837c Mon Sep 17 00:00:00 2001 From: Muzhen Gaming <61100393+XInTheDark@users.noreply.github.com> Date: Tue, 17 Sep 2024 20:44:57 +0200 Subject: [PATCH 248/315] VVLTC Search Tune Tuned with 115k games at VVLTC: https://tests.stockfishchess.org/tests/view/66c80e09bf8c9d8780fda62a Passed VVLTC 1st sprt: https://tests.stockfishchess.org/tests/view/66d69ade9de3e7f9b33d14f9 LLR: 2.94 (-2.94,2.94) <0.00,2.00> Total: 54270 W: 13935 L: 13647 D: 26688 Ptnml(0-2): 2, 4907, 17032, 5189, 5 Passed VVLTC 2nd sprt: https://tests.stockfishchess.org/tests/view/66dcf9c1dc53972b68218c84 LLR: 2.94 (-2.94,2.94) <0.50,2.50> Total: 136696 W: 34941 L: 34462 D: 67293 Ptnml(0-2): 8, 12659, 42535, 13138, 8 closes https://github.com/official-stockfish/Stockfish/pull/5592 Bench: 1644273 --- src/search.cpp | 84 +++++++++++++++++++++++++------------------------- 1 file changed, 42 insertions(+), 42 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index ac0b9c6d..4f6e7511 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -67,7 +67,7 @@ namespace { // Futility margin Value futility_margin(Depth d, bool noTtCutNode, bool improving, bool oppWorsening) { - Value futilityMult = 122 - 37 * noTtCutNode; + Value futilityMult = 118 - 33 * noTtCutNode; Value improvingDeduction = improving * futilityMult * 2; Value worseningDeduction = oppWorsening * futilityMult / 3; @@ -85,15 +85,15 @@ Value to_corrected_static_eval(Value v, const Worker& w, const Position& pos) { w.pawnCorrectionHistory[pos.side_to_move()][pawn_structure_index(pos)]; const auto mcv = w.materialCorrectionHistory[pos.side_to_move()][material_index(pos)]; const auto cv = (2 * pcv + mcv) / 3; - v += 66 * cv / 512; + v += 74 * cv / 512; return std::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1); } // History and stats update bonus, based on depth -int stat_bonus(Depth d) { return std::min(190 * d - 108, 1596); } +int stat_bonus(Depth d) { return std::min(179 * d - 108, 1598); } // History and stats update malus, based on depth -int stat_malus(Depth d) { return std::min(736 * d - 268, 2044); } +int stat_malus(Depth d) { return std::min(820 * d - 261, 2246); } // Add a small random component to draw evaluations to avoid 3-fold blindness Value value_draw(size_t nodes) { return VALUE_DRAW - 1 + Value(nodes & 0x2); } @@ -299,12 +299,12 @@ void Search::Worker::iterative_deepening() { // Reset aspiration window starting size Value avg = rootMoves[pvIdx].averageScore; - delta = 5 + avg * avg / 13424; + delta = 5 + avg * avg / 11797; alpha = std::max(avg - delta, -VALUE_INFINITE); beta = std::min(avg + delta, VALUE_INFINITE); // Adjust optimism based on root move's averageScore (~4 Elo) - optimism[us] = 125 * avg / (std::abs(avg) + 89); + optimism[us] = 132 * avg / (std::abs(avg) + 89); optimism[~us] = -optimism[us]; // Start with a small aspiration window and, in the case of a fail @@ -488,8 +488,8 @@ void Search::Worker::iterative_deepening() { // Reset histories, usually before a new game void Search::Worker::clear() { mainHistory.fill(0); - captureHistory.fill(-700); - pawnHistory.fill(-1188); + captureHistory.fill(-753); + pawnHistory.fill(-1152); pawnCorrectionHistory.fill(0); materialCorrectionHistory.fill(0); @@ -497,10 +497,10 @@ void Search::Worker::clear() { for (StatsType c : {NoCaptures, Captures}) for (auto& to : continuationHistory[inCheck][c]) for (auto& h : to) - h->fill(-658); + h->fill(-678); for (size_t i = 1; i < reductions.size(); ++i) - reductions[i] = int((18.62 + std::log(size_t(options["Threads"])) / 2) * std::log(i)); + reductions[i] = int((18.43 + std::log(size_t(options["Threads"])) / 2) * std::log(i)); refreshTable.clear(networks[numaAccessToken]); } @@ -737,7 +737,7 @@ Value Search::Worker::search( // Use static evaluation difference to improve quiet move ordering (~9 Elo) if (((ss - 1)->currentMove).is_ok() && !(ss - 1)->inCheck && !priorCapture) { - int bonus = std::clamp(-10 * int((ss - 1)->staticEval + ss->staticEval), -1664, 1471) + 752; + int bonus = std::clamp(-10 * int((ss - 1)->staticEval + ss->staticEval), -1641, 1423) + 760; thisThread->mainHistory[~us][((ss - 1)->currentMove).from_to()] << bonus; if (type_of(pos.piece_on(prevSq)) != PAWN && ((ss - 1)->currentMove).type_of() != PROMOTION) thisThread->pawnHistory[pawn_structure_index(pos)][pos.piece_on(prevSq)][prevSq] @@ -755,7 +755,7 @@ Value Search::Worker::search( // Step 7. Razoring (~1 Elo) // If eval is really low, check with qsearch if we can exceed alpha. If the // search suggests we cannot exceed alpha, return a speculative fail low. - if (eval < alpha - 494 - 290 * depth * depth) + if (eval < alpha - 501 - 272 * depth * depth) { value = qsearch(pos, ss, alpha - 1, alpha); if (value < alpha && std::abs(value) < VALUE_TB_WIN_IN_MAX_PLY) @@ -766,7 +766,7 @@ Value Search::Worker::search( // The depth condition is important for mate finding. if (!ss->ttPv && depth < 13 && eval - futility_margin(depth, cutNode && !ss->ttHit, improving, opponentWorsening) - - (ss - 1)->statScore / 260 + - (ss - 1)->statScore / 272 >= beta && eval >= beta && (!ttData.move || ttCapture) && beta > VALUE_TB_LOSS_IN_MAX_PLY && eval < VALUE_TB_WIN_IN_MAX_PLY) @@ -774,13 +774,13 @@ Value Search::Worker::search( // Step 9. Null move search with verification search (~35 Elo) if (cutNode && (ss - 1)->currentMove != Move::null() && eval >= beta - && ss->staticEval >= beta - 21 * depth + 390 && !excludedMove && pos.non_pawn_material(us) + && ss->staticEval >= beta - 23 * depth + 400 && !excludedMove && pos.non_pawn_material(us) && ss->ply >= thisThread->nmpMinPly && beta > VALUE_TB_LOSS_IN_MAX_PLY) { assert(eval - beta >= 0); // Null move dynamic reduction based on depth and eval - Depth R = std::min(int(eval - beta) / 202, 6) + depth / 3 + 5; + Depth R = std::min(int(eval - beta) / 209, 6) + depth / 3 + 5; ss->currentMove = Move::null(); ss->continuationHistory = &thisThread->continuationHistory[0][0][NO_PIECE][0]; @@ -829,7 +829,7 @@ Value Search::Worker::search( // Step 11. ProbCut (~10 Elo) // If we have a good enough capture (or queen promotion) and a reduced search // returns a value much above beta, we can (almost) safely prune the previous move. - probCutBeta = beta + 184 - 53 * improving; + probCutBeta = beta + 189 - 53 * improving; if (!PvNode && depth > 3 && std::abs(beta) < VALUE_TB_WIN_IN_MAX_PLY // If value from transposition table is lower than probCutBeta, don't attempt @@ -898,7 +898,7 @@ Value Search::Worker::search( moves_loop: // When in check, search starts here // Step 12. A small Probcut idea (~4 Elo) - probCutBeta = beta + 390; + probCutBeta = beta + 379; if ((ttData.bound & BOUND_LOWER) && ttData.depth >= depth - 4 && ttData.value >= probCutBeta && std::abs(beta) < VALUE_TB_WIN_IN_MAX_PLY && std::abs(ttData.value) < VALUE_TB_WIN_IN_MAX_PLY) @@ -982,15 +982,15 @@ moves_loop: // When in check, search starts here // Futility pruning for captures (~2 Elo) if (!givesCheck && lmrDepth < 7 && !ss->inCheck) { - Value futilityValue = ss->staticEval + 285 + 251 * lmrDepth + Value futilityValue = ss->staticEval + 300 + 238 * lmrDepth + PieceValue[capturedPiece] + captHist / 7; if (futilityValue <= alpha) continue; } // SEE based pruning for captures and checks (~11 Elo) - int seeHist = std::clamp(captHist / 32, -182 * depth, 166 * depth); - if (!pos.see_ge(move, -168 * depth - seeHist)) + int seeHist = std::clamp(captHist / 32, -159 * depth, 160 * depth); + if (!pos.see_ge(move, -167 * depth - seeHist)) continue; } else @@ -1001,15 +1001,15 @@ moves_loop: // When in check, search starts here + thisThread->pawnHistory[pawn_structure_index(pos)][movedPiece][move.to_sq()]; // Continuation history based pruning (~2 Elo) - if (history < -4165 * depth) + if (history < -4071 * depth) continue; history += 2 * thisThread->mainHistory[us][move.from_to()]; - lmrDepth += history / 3853; + lmrDepth += history / 3653; Value futilityValue = - ss->staticEval + (bestValue < ss->staticEval - 51 ? 143 : 52) + 135 * lmrDepth; + ss->staticEval + (bestValue < ss->staticEval - 51 ? 145 : 49) + 144 * lmrDepth; // Futility pruning: parent node (~13 Elo) if (!ss->inCheck && lmrDepth < 12 && futilityValue <= alpha) @@ -1050,7 +1050,7 @@ moves_loop: // When in check, search starts here && std::abs(ttData.value) < VALUE_TB_WIN_IN_MAX_PLY && (ttData.bound & BOUND_LOWER) && ttData.depth >= depth - 3) { - Value singularBeta = ttData.value - (54 + 76 * (ss->ttPv && !PvNode)) * depth / 64; + Value singularBeta = ttData.value - (54 + 77 * (ss->ttPv && !PvNode)) * depth / 64; Depth singularDepth = newDepth / 2; ss->excludedMove = move; @@ -1060,13 +1060,13 @@ moves_loop: // When in check, search starts here if (value < singularBeta) { - int doubleMargin = 293 * PvNode - 195 * !ttCapture; - int tripleMargin = 107 + 259 * PvNode - 260 * !ttCapture + 98 * ss->ttPv; + int doubleMargin = 262 * PvNode - 204 * !ttCapture; + int tripleMargin = 97 + 266 * PvNode - 255 * !ttCapture + 94 * ss->ttPv; extension = 1 + (value < singularBeta - doubleMargin) + (value < singularBeta - tripleMargin); - depth += ((!PvNode) && (depth < 16)); + depth += ((!PvNode) && (depth < 14)); } // Multi-cut pruning @@ -1099,7 +1099,7 @@ moves_loop: // When in check, search starts here else if (PvNode && move.to_sq() == prevSq && thisThread->captureHistory[movedPiece][move.to_sq()] [type_of(pos.piece_on(move.to_sq()))] - > 3994) + > 4299) extension = 1; } @@ -1153,10 +1153,10 @@ moves_loop: // When in check, search starts here ss->statScore = 2 * thisThread->mainHistory[us][move.from_to()] + (*contHist[0])[movedPiece][move.to_sq()] - + (*contHist[1])[movedPiece][move.to_sq()] - 4664; + + (*contHist[1])[movedPiece][move.to_sq()] - 4410; // Decrease/increase reduction for moves with a good/bad history (~8 Elo) - r -= ss->statScore / 10898; + r -= ss->statScore / 11016; // Step 17. Late moves reduction / extension (LMR, ~117 Elo) if (depth >= 2 && moveCount > 1) @@ -1175,7 +1175,7 @@ moves_loop: // When in check, search starts here { // Adjust full-depth search based on LMR results - if the result was // good enough search deeper, if it was bad enough search shallower. - const bool doDeeperSearch = value > (bestValue + 35 + 2 * newDepth); // (~1 Elo) + const bool doDeeperSearch = value > (bestValue + 38 + 2 * newDepth); // (~1 Elo) const bool doShallowerSearch = value < bestValue + 8; // (~2 Elo) newDepth += doDeeperSearch - doShallowerSearch; @@ -1344,19 +1344,19 @@ moves_loop: // When in check, search starts here // Bonus for prior countermove that caused the fail low else if (!priorCapture && prevSq != SQ_NONE) { - int bonus = (122 * (depth > 5) + 39 * !allNode + 165 * ((ss - 1)->moveCount > 8) - + 107 * (!ss->inCheck && bestValue <= ss->staticEval - 98) - + 134 * (!(ss - 1)->inCheck && bestValue <= -(ss - 1)->staticEval - 91)); + int bonus = (118 * (depth > 5) + 38 * !allNode + 169 * ((ss - 1)->moveCount > 8) + + 116 * (!ss->inCheck && bestValue <= ss->staticEval - 101) + + 133 * (!(ss - 1)->inCheck && bestValue <= -(ss - 1)->staticEval - 92)); // Proportional to "how much damage we have to undo" - bonus += std::min(-(ss - 1)->statScore / 100, 304); + bonus += std::min(-(ss - 1)->statScore / 102, 305); bonus = std::max(bonus, 0); update_continuation_histories(ss - 1, pos.piece_on(prevSq), prevSq, - stat_bonus(depth) * bonus / 116); + stat_bonus(depth) * bonus / 107); thisThread->mainHistory[~us][((ss - 1)->currentMove).from_to()] - << stat_bonus(depth) * bonus / 180; + << stat_bonus(depth) * bonus / 174; if (type_of(pos.piece_on(prevSq)) != PAWN && ((ss - 1)->currentMove).type_of() != PROMOTION) @@ -1522,7 +1522,7 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta) if (bestValue > alpha) alpha = bestValue; - futilityBase = ss->staticEval + 299; + futilityBase = ss->staticEval + 280; } const PieceToHistory* contHist[] = {(ss - 1)->continuationHistory, @@ -1593,11 +1593,11 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta) + (*contHist[1])[pos.moved_piece(move)][move.to_sq()] + thisThread->pawnHistory[pawn_structure_index(pos)][pos.moved_piece(move)] [move.to_sq()] - <= 4643) + <= 5036) continue; // Do not search moves with bad enough SEE values (~5 Elo) - if (!pos.see_ge(move, -83)) + if (!pos.see_ge(move, -82)) continue; } @@ -1663,7 +1663,7 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta) Depth Search::Worker::reduction(bool i, Depth d, int mn, int delta) const { int reductionScale = reductions[d] * reductions[mn]; - return (reductionScale + 1274 - delta * 746 / rootDelta) / 1024 + (!i && reductionScale > 1293); + return (reductionScale + 1239 - delta * 795 / rootDelta) / 1024 + (!i && reductionScale > 1341); } // elapsed() returns the time elapsed since the search started. If the @@ -1794,7 +1794,7 @@ void update_all_stats(const Position& pos, // at ply -1, -2, -3, -4, and -6 with current move. void update_continuation_histories(Stack* ss, Piece pc, Square to, int bonus) { - bonus = bonus * 52 / 64; + bonus = bonus * 53 / 64; for (int i : {1, 2, 3, 4, 6}) { From 5ce7f866a57264c38cf308152208deadc65508c8 Mon Sep 17 00:00:00 2001 From: Shawn Xu Date: Sat, 7 Sep 2024 15:04:28 -0700 Subject: [PATCH 249/315] Simplify Fail Low Bonus Passed Non-regression STC: LLR: 2.93 (-2.94,2.94) <-1.75,0.25> Total: 302528 W: 78190 L: 78264 D: 146074 Ptnml(0-2): 1029, 35797, 77551, 35993, 894 https://tests.stockfishchess.org/tests/view/66dcebdedc53972b68218c7e Passed Non-regression LTC: LLR: 2.95 (-2.94,2.94) <-1.75,0.25> Total: 122754 W: 31025 L: 30907 D: 60822 Ptnml(0-2): 74, 13597, 33908, 13733, 65 https://tests.stockfishchess.org/tests/view/66e0c38686d5ee47d953a481 closes https://github.com/official-stockfish/Stockfish/pull/5594 Bench: 1646373 --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index 4f6e7511..135db0ce 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1365,7 +1365,7 @@ moves_loop: // When in check, search starts here } // Bonus when search fails low and there is a TT move - else if (moveCount > 1 && ttData.move && !allNode) + else if (ttData.move && !allNode) thisThread->mainHistory[us][ttData.move.from_to()] << stat_bonus(depth) / 4; if (PvNode) From 240a5b1c72af0c9fa7b2dd13d17cdef61415b4e6 Mon Sep 17 00:00:00 2001 From: Michael Chaly Date: Sat, 14 Sep 2024 08:22:32 +0300 Subject: [PATCH 250/315] Introduce separate butterfly history table for sorting root moves Idea of this patch comes from the fact that current history heuristics are mostly populated by low depth entries since our stat bonus reaches maximum value at depth 5-6 and number of low depth nodes is much bigger than number of high depth nodes. But it doesn't make a whole lost of sense to use this low-depth centered histories to sort moves at root. Current patch introduces special history table that is used exclusively at root, it remembers which quiet moves were good and which quiet moves were not good there and uses this information for move ordering. Passed STC: https://tests.stockfishchess.org/tests/view/66dda74adc53972b68218cc9 LLR: 2.93 (-2.94,2.94) <0.00,2.00> Total: 127680 W: 33579 L: 33126 D: 60975 Ptnml(0-2): 422, 15098, 32391, 15463, 466 Passed LTC: https://tests.stockfishchess.org/tests/view/66dead2adc53972b68218d34 LLR: 2.95 (-2.94,2.94) <0.50,2.50> Total: 381978 W: 96958 L: 95923 D: 189097 Ptnml(0-2): 277, 42165, 105089, 43162, 296 closes https://github.com/official-stockfish/Stockfish/pull/5595 Bench: 1611283 --- src/movepick.cpp | 11 +++++++-- src/movepick.h | 6 ++++- src/search.cpp | 59 +++++++++++++++++++++++++++++++----------------- src/search.h | 1 + 4 files changed, 53 insertions(+), 24 deletions(-) diff --git a/src/movepick.cpp b/src/movepick.cpp index bdc0e4af..63d9e8b1 100644 --- a/src/movepick.cpp +++ b/src/movepick.cpp @@ -82,16 +82,20 @@ MovePicker::MovePicker(const Position& p, Move ttm, Depth d, const ButterflyHistory* mh, + const ButterflyHistory* rh, const CapturePieceToHistory* cph, const PieceToHistory** ch, - const PawnHistory* ph) : + const PawnHistory* ph, + bool rn) : pos(p), mainHistory(mh), + rootHistory(rh), captureHistory(cph), continuationHistory(ch), pawnHistory(ph), ttMove(ttm), - depth(d) { + depth(d), + rootNode(rn) { if (pos.checkers()) stage = EVASION_TT + !(ttm && pos.pseudo_legal(ttm)); @@ -174,6 +178,9 @@ void MovePicker::score() { m.value -= (pt == QUEEN ? bool(to & threatenedByRook) * 49000 : pt == ROOK ? bool(to & threatenedByMinor) * 24335 : bool(to & threatenedByPawn) * 14900); + + if (rootNode) + m.value += 4 * (*rootHistory)[pos.side_to_move()][m.from_to()]; } else // Type == EVASIONS diff --git a/src/movepick.h b/src/movepick.h index 651091b0..f66cdadf 100644 --- a/src/movepick.h +++ b/src/movepick.h @@ -171,9 +171,11 @@ class MovePicker { Move, Depth, const ButterflyHistory*, + const ButterflyHistory*, const CapturePieceToHistory*, const PieceToHistory**, - const PawnHistory*); + const PawnHistory*, + bool); MovePicker(const Position&, Move, int, const CapturePieceToHistory*); Move next_move(bool skipQuiets = false); @@ -187,6 +189,7 @@ class MovePicker { const Position& pos; const ButterflyHistory* mainHistory; + const ButterflyHistory* rootHistory; const CapturePieceToHistory* captureHistory; const PieceToHistory** continuationHistory; const PawnHistory* pawnHistory; @@ -195,6 +198,7 @@ class MovePicker { int stage; int threshold; Depth depth; + bool rootNode; ExtMove moves[MAX_MOVES]; }; diff --git a/src/search.cpp b/src/search.cpp index 135db0ce..3c6da163 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -101,16 +101,21 @@ Value value_to_tt(Value v, int ply); Value value_from_tt(Value v, int ply, int r50c); void update_pv(Move* pv, Move move, const Move* childPv); void update_continuation_histories(Stack* ss, Piece pc, Square to, int bonus); -void update_quiet_histories( - const Position& pos, Stack* ss, Search::Worker& workerThread, Move move, int bonus); -void update_all_stats(const Position& pos, - Stack* ss, - Search::Worker& workerThread, - Move bestMove, - Square prevSq, - ValueList& quietsSearched, - ValueList& capturesSearched, - Depth depth); +void update_quiet_histories(const Position& pos, + Stack* ss, + Search::Worker& workerThread, + Move move, + int bonus, + bool rootNode); +void update_all_stats(const Position& pos, + Stack* ss, + Search::Worker& workerThread, + Move bestMove, + Square prevSq, + ValueList& quietsSearched, + ValueList& capturesSearched, + Depth depth, + bool rootNode); } // namespace @@ -264,6 +269,8 @@ void Search::Worker::iterative_deepening() { int searchAgainCounter = 0; + rootHistory.fill(0); + // Iterative deepening loop until requested to stop or the target depth is reached while (++rootDepth < MAX_PLY && !threads.stop && !(limits.depth && mainThread && rootDepth > limits.depth)) @@ -488,6 +495,7 @@ void Search::Worker::iterative_deepening() { // Reset histories, usually before a new game void Search::Worker::clear() { mainHistory.fill(0); + rootHistory.fill(0); captureHistory.fill(-753); pawnHistory.fill(-1152); pawnCorrectionHistory.fill(0); @@ -622,7 +630,7 @@ Value Search::Worker::search( { // Bonus for a quiet ttMove that fails high (~2 Elo) if (!ttCapture) - update_quiet_histories(pos, ss, *this, ttData.move, stat_bonus(depth)); + update_quiet_histories(pos, ss, *this, ttData.move, stat_bonus(depth), rootNode); // Extra penalty for early quiet moves of // the previous ply (~1 Elo on STC, ~2 Elo on LTC) @@ -912,8 +920,8 @@ moves_loop: // When in check, search starts here (ss - 6)->continuationHistory}; - MovePicker mp(pos, ttData.move, depth, &thisThread->mainHistory, &thisThread->captureHistory, - contHist, &thisThread->pawnHistory); + MovePicker mp(pos, ttData.move, depth, &thisThread->mainHistory, &thisThread->rootHistory, + &thisThread->captureHistory, contHist, &thisThread->pawnHistory, rootNode); value = bestValue; @@ -1339,7 +1347,8 @@ moves_loop: // When in check, search starts here // If there is a move that produces search value greater than alpha, // we update the stats of searched moves. else if (bestMove) - update_all_stats(pos, ss, *this, bestMove, prevSq, quietsSearched, capturesSearched, depth); + update_all_stats(pos, ss, *this, bestMove, prevSq, quietsSearched, capturesSearched, depth, + rootNode); // Bonus for prior countermove that caused the fail low else if (!priorCapture && prevSq != SQ_NONE) @@ -1533,8 +1542,9 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta) // Initialize a MovePicker object for the current position, and prepare to search // the moves. We presently use two stages of move generator in quiescence search: // captures, or evasions only when in check. - MovePicker mp(pos, ttData.move, DEPTH_QS, &thisThread->mainHistory, &thisThread->captureHistory, - contHist, &thisThread->pawnHistory); + MovePicker mp(pos, ttData.move, DEPTH_QS, &thisThread->mainHistory, &thisThread->rootHistory, + &thisThread->captureHistory, contHist, &thisThread->pawnHistory, + nodeType == Root); // Step 5. Loop through all pseudo-legal moves until no moves remain or a beta // cutoff occurs. @@ -1751,7 +1761,8 @@ void update_all_stats(const Position& pos, Square prevSq, ValueList& quietsSearched, ValueList& capturesSearched, - Depth depth) { + Depth depth, + bool rootNode) { CapturePieceToHistory& captureHistory = workerThread.captureHistory; Piece moved_piece = pos.moved_piece(bestMove); @@ -1762,11 +1773,11 @@ void update_all_stats(const Position& pos, if (!pos.capture_stage(bestMove)) { - update_quiet_histories(pos, ss, workerThread, bestMove, quietMoveBonus); + update_quiet_histories(pos, ss, workerThread, bestMove, quietMoveBonus, rootNode); // Decrease stats for all non-best quiet moves for (Move move : quietsSearched) - update_quiet_histories(pos, ss, workerThread, move, -quietMoveMalus); + update_quiet_histories(pos, ss, workerThread, move, -quietMoveMalus, rootNode); } else { @@ -1808,11 +1819,17 @@ void update_continuation_histories(Stack* ss, Piece pc, Square to, int bonus) { // Updates move sorting heuristics -void update_quiet_histories( - const Position& pos, Stack* ss, Search::Worker& workerThread, Move move, int bonus) { +void update_quiet_histories(const Position& pos, + Stack* ss, + Search::Worker& workerThread, + Move move, + int bonus, + bool rootNode) { Color us = pos.side_to_move(); workerThread.mainHistory[us][move.from_to()] << bonus; + if (rootNode) + workerThread.rootHistory[us][move.from_to()] << bonus; update_continuation_histories(ss, pos.moved_piece(move), move.to_sq(), bonus); diff --git a/src/search.h b/src/search.h index c9fe9e18..b06c7c94 100644 --- a/src/search.h +++ b/src/search.h @@ -278,6 +278,7 @@ class Worker { // Public because they need to be updatable by the stats ButterflyHistory mainHistory; + ButterflyHistory rootHistory; CapturePieceToHistory captureHistory; ContinuationHistory continuationHistory[2][2]; PawnHistory pawnHistory; From 60351b9df901ff5278f208a9cf3a40059ff54832 Mon Sep 17 00:00:00 2001 From: Shawn Xu Date: Thu, 12 Sep 2024 15:53:15 -0700 Subject: [PATCH 251/315] Introduce Various Correction histories This patch introduces three additional correction histories, namely, Major Piece Correction History, Minor Piece Correction History, and Non-Pawn Correction History. Introduced by @mcthouacbb in Sirius (https://github.com/mcthouacbb/Sirius) chess engine. The Major Piece Correction History is indexed by side-to-move and the Zobrist key representing the position of the King, Rook, and Queen of both sides. Likewise, the Minor Piece Correction History is indexed by side-to-move and the Zobrist key representing the position of the King, Knight, and Bishop of both sides. Also See: https://github.com/mcthouacbb/Sirius/commit/97b85bbaac88ff5a0f63e28776027dd3de77164e https://github.com/mcthouacbb/Sirius/commit/3099cdef2f13e29805654b5f8153e6ecd5853195 Introduced by @zzzzz151 in Starzix (https://github.com/zzzzz151/Starzix) chess engine. Non-Pawn correction history consists of side-to-move, side of Zobrist key, and a Zobrist key representing of the position of all non-pawn pieces of **one side**. The non-pawn correction values for both key sides are then summed. Also See: https://github.com/zzzzz151/Starzix/commit/34911772f178c27b3a239dda0acb79c397c3a2f0 https://github.com/zzzzz151/Starzix/commit/33e0df8dd2db1d4775974ab12e3390154697f47a The weights on the final correction value of the above correction histories, as well as existing correction histories, are then tuned in two separate SPSA sessions, totaling 75k games. SPSA1: https://tests.stockfishchess.org/tests/view/66e5243886d5ee47d953a86b (Stopped early due to some weights reaching the maximum value) SPSA2: https://tests.stockfishchess.org/tests/view/66e6a26f86d5ee47d953a965 Also thanks to @martinnovaak, (Motor https://github.com/martinnovaak/motor author) for insights and suggestions. Passed STC: LLR: 2.94 (-2.94,2.94) <0.00,2.00> Total: 23328 W: 6197 L: 5901 D: 11230 Ptnml(0-2): 82, 2582, 6041, 2876, 83 https://tests.stockfishchess.org/tests/view/66e8787b86d5ee47d953ab6f Passed LTC: LLR: 2.94 (-2.94,2.94) <0.50,2.50> Total: 10626 W: 2826 L: 2560 D: 5240 Ptnml(0-2): 4, 1054, 2941, 1300, 14 https://tests.stockfishchess.org/tests/view/66e8ab2386d5ee47d953aba8 closes https://github.com/official-stockfish/Stockfish/pull/5598 Bench: 1011161 --- src/bitboard.cpp | 4 +-- src/movepick.h | 40 ++++++++++++++++++++++++----- src/position.cpp | 66 ++++++++++++++++++++++++++++++++++++++++++++---- src/position.h | 12 +++++++++ src/search.cpp | 24 +++++++++++++----- src/search.h | 19 +++++++++----- tests/perft.sh | 2 +- 7 files changed, 140 insertions(+), 27 deletions(-) diff --git a/src/bitboard.cpp b/src/bitboard.cpp index c842ca12..a8b4e5f4 100644 --- a/src/bitboard.cpp +++ b/src/bitboard.cpp @@ -140,8 +140,8 @@ Bitboard sliding_attack(PieceType pt, Square sq, Bitboard occupied) { // Computes all rook and bishop attacks at startup. Magic // bitboards are used to look up attacks of sliding pieces. As a reference see -// www.chessprogramming.org/Magic_Bitboards. In particular, here we use the so -// called "fancy" approach. +// https://www.chessprogramming.org/Magic_Bitboards. In particular, here we use +// the so called "fancy" approach. void init_magics(PieceType pt, Bitboard table[], Magic magics[]) { // Optimal PRNG seeds to pick the correct magics in the shortest time diff --git a/src/movepick.h b/src/movepick.h index f66cdadf..13b9635b 100644 --- a/src/movepick.h +++ b/src/movepick.h @@ -34,10 +34,13 @@ namespace Stockfish { -constexpr int PAWN_HISTORY_SIZE = 512; // has to be a power of 2 -constexpr int PAWN_CORRECTION_HISTORY_SIZE = 16384; // has to be a power of 2 -constexpr int MATERIAL_CORRECTION_HISTORY_SIZE = 32768; // has to be a power of 2 -constexpr int CORRECTION_HISTORY_LIMIT = 1024; +constexpr int PAWN_HISTORY_SIZE = 512; // has to be a power of 2 +constexpr int PAWN_CORRECTION_HISTORY_SIZE = 16384; // has to be a power of 2 +constexpr int MATERIAL_CORRECTION_HISTORY_SIZE = 32768; // has to be a power of 2 +constexpr int MAJOR_PIECE_CORRECTION_HISTORY_SIZE = 32768; // has to be a power of 2 +constexpr int MINOR_PIECE_CORRECTION_HISTORY_SIZE = 32768; // has to be a power of 2 +constexpr int NON_PAWN_CORRECTION_HISTORY_SIZE = 32768; // has to be a power of 2 +constexpr int CORRECTION_HISTORY_LIMIT = 1024; static_assert((PAWN_HISTORY_SIZE & (PAWN_HISTORY_SIZE - 1)) == 0, "PAWN_HISTORY_SIZE has to be a power of 2"); @@ -59,6 +62,19 @@ inline int material_index(const Position& pos) { return pos.material_key() & (MATERIAL_CORRECTION_HISTORY_SIZE - 1); } +inline int major_piece_index(const Position& pos) { + return pos.major_piece_key() & (MAJOR_PIECE_CORRECTION_HISTORY_SIZE - 1); +} + +inline int minor_piece_index(const Position& pos) { + return pos.minor_piece_key() & (MINOR_PIECE_CORRECTION_HISTORY_SIZE - 1); +} + +template +inline int non_pawn_index(const Position& pos) { + return pos.non_pawn_key(c) & (NON_PAWN_CORRECTION_HISTORY_SIZE - 1); +} + // StatsEntry stores the stat table value. It is usually a number but could // be a move or even a nested history. We use a class instead of a naked value // to directly call history update operator<<() on the entry so to use stats @@ -120,7 +136,7 @@ enum StatsType { // ButterflyHistory records how often quiet moves have been successful or unsuccessful // during the current search, and is used for reduction and move ordering decisions. // It uses 2 tables (one for each color) indexed by the move's from and to squares, -// see www.chessprogramming.org/Butterfly_Boards (~11 elo) +// see https://www.chessprogramming.org/Butterfly_Boards (~11 elo) using ButterflyHistory = Stats; // CapturePieceToHistory is addressed by a move's [piece][to][captured piece type] @@ -138,10 +154,10 @@ using ContinuationHistory = Stats // PawnHistory is addressed by the pawn structure and a move's [piece][to] using PawnHistory = Stats; - // Correction histories record differences between the static evaluation of // positions and their search score. It is used to improve the static evaluation // used by some search heuristics. +// see https://www.chessprogramming.org/Static_Evaluation_Correction_History // PawnCorrectionHistory is addressed by color and pawn structure using PawnCorrectionHistory = @@ -151,6 +167,18 @@ using PawnCorrectionHistory = using MaterialCorrectionHistory = Stats; +// MajorPieceCorrectionHistory is addressed by color and king/major piece (Queen, Rook) positions +using MajorPieceCorrectionHistory = + Stats; + +// MinorPieceCorrectionHistory is addressed by color and king/minor piece (Knight, Bishop) positions +using MinorPieceCorrectionHistory = + Stats; + +// NonPawnCorrectionHistory is addressed by color and non-pawn material positions +using NonPawnCorrectionHistory = + Stats; + // The MovePicker class is used to pick one pseudo-legal move at a time from the // current position. The most important method is next_move(), which emits one // new pseudo-legal move on every call, until there are no moves left, when diff --git a/src/position.cpp b/src/position.cpp index df95ffef..f596b015 100644 --- a/src/position.cpp +++ b/src/position.cpp @@ -334,8 +334,10 @@ void Position::set_check_info() const { // The function is only used when a new position is set up void Position::set_state() const { - st->key = st->materialKey = 0; - st->pawnKey = Zobrist::noPawns; + st->key = st->materialKey = 0; + st->majorPieceKey = st->minorPieceKey = 0; + st->nonPawnKey[WHITE] = st->nonPawnKey[BLACK] = 0; + st->pawnKey = Zobrist::noPawns; st->nonPawnMaterial[WHITE] = st->nonPawnMaterial[BLACK] = VALUE_ZERO; st->checkersBB = attackers_to(square(sideToMove)) & pieces(~sideToMove); @@ -350,8 +352,27 @@ void Position::set_state() const { if (type_of(pc) == PAWN) st->pawnKey ^= Zobrist::psq[pc][s]; - else if (type_of(pc) != KING) - st->nonPawnMaterial[color_of(pc)] += PieceValue[pc]; + else + { + st->nonPawnKey[color_of(pc)] ^= Zobrist::psq[pc][s]; + + if (type_of(pc) != KING) + { + st->nonPawnMaterial[color_of(pc)] += PieceValue[pc]; + + if (type_of(pc) == QUEEN || type_of(pc) == ROOK) + st->majorPieceKey ^= Zobrist::psq[pc][s]; + + else + st->minorPieceKey ^= Zobrist::psq[pc][s]; + } + + else + { + st->majorPieceKey ^= Zobrist::psq[pc][s]; + st->minorPieceKey ^= Zobrist::psq[pc][s]; + } + } } if (st->epSquare != SQ_NONE) @@ -707,6 +728,8 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) { do_castling(us, from, to, rfrom, rto); k ^= Zobrist::psq[captured][rfrom] ^ Zobrist::psq[captured][rto]; + st->majorPieceKey ^= Zobrist::psq[captured][rfrom] ^ Zobrist::psq[captured][rto]; + st->nonPawnKey[us] ^= Zobrist::psq[captured][rfrom] ^ Zobrist::psq[captured][rto]; captured = NO_PIECE; } @@ -732,7 +755,16 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) { st->pawnKey ^= Zobrist::psq[captured][capsq]; } else + { st->nonPawnMaterial[them] -= PieceValue[captured]; + st->nonPawnKey[them] ^= Zobrist::psq[captured][capsq]; + + if (type_of(pc) == QUEEN || type_of(pc) == ROOK) + st->majorPieceKey ^= Zobrist::psq[captured][capsq]; + + else + st->minorPieceKey ^= Zobrist::psq[captured][capsq]; + } dp.dirty_num = 2; // 1 piece moved, 1 piece captured dp.piece[1] = captured; @@ -790,7 +822,8 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) { else if (m.type_of() == PROMOTION) { - Piece promotion = make_piece(us, m.promotion_type()); + Piece promotion = make_piece(us, m.promotion_type()); + PieceType promotionType = type_of(promotion); assert(relative_rank(us, to) == RANK_8); assert(type_of(promotion) >= KNIGHT && type_of(promotion) <= QUEEN); @@ -811,6 +844,12 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) { st->materialKey ^= Zobrist::psq[promotion][pieceCount[promotion] - 1] ^ Zobrist::psq[pc][pieceCount[pc]]; + if (promotionType == QUEEN || promotionType == ROOK) + st->majorPieceKey ^= Zobrist::psq[promotion][to]; + + else + st->minorPieceKey ^= Zobrist::psq[promotion][to]; + // Update material st->nonPawnMaterial[us] += PieceValue[promotion]; } @@ -822,6 +861,23 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) { st->rule50 = 0; } + else + { + st->nonPawnKey[us] ^= Zobrist::psq[pc][from] ^ Zobrist::psq[pc][to]; + + if (type_of(pc) == KING) + { + st->majorPieceKey ^= Zobrist::psq[pc][from] ^ Zobrist::psq[pc][to]; + st->minorPieceKey ^= Zobrist::psq[pc][from] ^ Zobrist::psq[pc][to]; + } + + else if (type_of(pc) == QUEEN || type_of(pc) == ROOK) + st->majorPieceKey ^= Zobrist::psq[pc][from] ^ Zobrist::psq[pc][to]; + + else + st->minorPieceKey ^= Zobrist::psq[pc][from] ^ Zobrist::psq[pc][to]; + } + // Set capture piece st->capturedPiece = captured; diff --git a/src/position.h b/src/position.h index 6cac1731..888612da 100644 --- a/src/position.h +++ b/src/position.h @@ -43,6 +43,9 @@ struct StateInfo { // Copied when making a move Key materialKey; Key pawnKey; + Key majorPieceKey; + Key minorPieceKey; + Key nonPawnKey[COLOR_NB]; Value nonPawnMaterial[COLOR_NB]; int castlingRights; int rule50; @@ -151,6 +154,9 @@ class Position { Key key_after(Move m) const; Key material_key() const; Key pawn_key() const; + Key major_piece_key() const; + Key minor_piece_key() const; + Key non_pawn_key(Color c) const; // Other properties of the position Color side_to_move() const; @@ -298,6 +304,12 @@ inline Key Position::pawn_key() const { return st->pawnKey; } inline Key Position::material_key() const { return st->materialKey; } +inline Key Position::major_piece_key() const { return st->majorPieceKey; } + +inline Key Position::minor_piece_key() const { return st->minorPieceKey; } + +inline Key Position::non_pawn_key(Color c) const { return st->nonPawnKey[c]; } + inline Value Position::non_pawn_material(Color c) const { return st->nonPawnMaterial[c]; } inline Value Position::non_pawn_material() const { diff --git a/src/search.cpp b/src/search.cpp index 3c6da163..199b9355 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -46,7 +46,6 @@ #include "thread.h" #include "timeman.h" #include "tt.h" -#include "types.h" #include "uci.h" #include "ucioption.h" @@ -81,11 +80,16 @@ constexpr int futility_move_count(bool improving, Depth depth) { // Add correctionHistory value to raw staticEval and guarantee evaluation // does not hit the tablebase range. Value to_corrected_static_eval(Value v, const Worker& w, const Position& pos) { - const auto pcv = - w.pawnCorrectionHistory[pos.side_to_move()][pawn_structure_index(pos)]; - const auto mcv = w.materialCorrectionHistory[pos.side_to_move()][material_index(pos)]; - const auto cv = (2 * pcv + mcv) / 3; - v += 74 * cv / 512; + const Color us = pos.side_to_move(); + const auto pcv = w.pawnCorrectionHistory[us][pawn_structure_index(pos)]; + const auto mcv = w.materialCorrectionHistory[us][material_index(pos)]; + const auto macv = w.majorPieceCorrectionHistory[us][major_piece_index(pos)]; + const auto micv = w.minorPieceCorrectionHistory[us][minor_piece_index(pos)]; + const auto wnpcv = w.nonPawnCorrectionHistory[WHITE][us][non_pawn_index(pos)]; + const auto bnpcv = w.nonPawnCorrectionHistory[BLACK][us][non_pawn_index(pos)]; + const auto cv = + (98198 * pcv + 68968 * mcv + 54353 * macv + 85174 * micv + 85581 * (wnpcv + bnpcv)) / 2097152; + v += cv; return std::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1); } @@ -500,6 +504,10 @@ void Search::Worker::clear() { pawnHistory.fill(-1152); pawnCorrectionHistory.fill(0); materialCorrectionHistory.fill(0); + majorPieceCorrectionHistory.fill(0); + minorPieceCorrectionHistory.fill(0); + nonPawnCorrectionHistory[WHITE].fill(0); + nonPawnCorrectionHistory[BLACK].fill(0); for (bool inCheck : {false, true}) for (StatsType c : {NoCaptures, Captures}) @@ -1403,6 +1411,10 @@ moves_loop: // When in check, search starts here -CORRECTION_HISTORY_LIMIT / 4, CORRECTION_HISTORY_LIMIT / 4); thisThread->pawnCorrectionHistory[us][pawn_structure_index(pos)] << bonus; thisThread->materialCorrectionHistory[us][material_index(pos)] << bonus; + thisThread->majorPieceCorrectionHistory[us][major_piece_index(pos)] << bonus; + thisThread->minorPieceCorrectionHistory[us][minor_piece_index(pos)] << bonus; + thisThread->nonPawnCorrectionHistory[WHITE][us][non_pawn_index(pos)] << bonus; + thisThread->nonPawnCorrectionHistory[BLACK][us][non_pawn_index(pos)] << bonus; } assert(bestValue > -VALUE_INFINITE && bestValue < VALUE_INFINITE); diff --git a/src/search.h b/src/search.h index b06c7c94..d7a909a8 100644 --- a/src/search.h +++ b/src/search.h @@ -277,13 +277,18 @@ class Worker { void ensure_network_replicated(); // Public because they need to be updatable by the stats - ButterflyHistory mainHistory; - ButterflyHistory rootHistory; - CapturePieceToHistory captureHistory; - ContinuationHistory continuationHistory[2][2]; - PawnHistory pawnHistory; - PawnCorrectionHistory pawnCorrectionHistory; - MaterialCorrectionHistory materialCorrectionHistory; + ButterflyHistory mainHistory; + ButterflyHistory rootHistory; + + CapturePieceToHistory captureHistory; + ContinuationHistory continuationHistory[2][2]; + PawnHistory pawnHistory; + + PawnCorrectionHistory pawnCorrectionHistory; + MaterialCorrectionHistory materialCorrectionHistory; + MajorPieceCorrectionHistory majorPieceCorrectionHistory; + MinorPieceCorrectionHistory minorPieceCorrectionHistory; + NonPawnCorrectionHistory nonPawnCorrectionHistory[COLOR_NB]; private: void iterative_deepening(); diff --git a/tests/perft.sh b/tests/perft.sh index 545e750f..c1532c20 100755 --- a/tests/perft.sh +++ b/tests/perft.sh @@ -1,5 +1,5 @@ #!/bin/bash -# verify perft numbers (positions from www.chessprogramming.org/Perft_Results) +# verify perft numbers (positions from https://www.chessprogramming.org/Perft_Results) error() { From 93869d5d0aab2f7121bdf227def3a942c9fcde17 Mon Sep 17 00:00:00 2001 From: Wencey Wang Date: Thu, 19 Sep 2024 16:30:28 +0800 Subject: [PATCH 252/315] Fix native arch builds on loongarch64 Adds support for LSX and LASX closes https://github.com/official-stockfish/Stockfish/pull/5600 No functional change --- AUTHORS | 1 + scripts/get_native_properties.sh | 15 ++++++++++++ src/Makefile | 42 +++++++++++++++++++++++++++++--- 3 files changed, 54 insertions(+), 4 deletions(-) diff --git a/AUTHORS b/AUTHORS index 3201e7a8..c0a8beeb 100644 --- a/AUTHORS +++ b/AUTHORS @@ -237,6 +237,7 @@ Unai Corzo (unaiic) Uri Blass (uriblass) Vince Negri (cuddlestmonkey) Viren +Wencey Wang windfishballad xefoci7612 Xiang Wang (KatyushaScarlet) diff --git a/scripts/get_native_properties.sh b/scripts/get_native_properties.sh index fb124021..dfbfac0e 100755 --- a/scripts/get_native_properties.sh +++ b/scripts/get_native_properties.sh @@ -26,6 +26,17 @@ check_znver_1_2() { [ "$vendor_id" = "AuthenticAMD" ] && [ "$cpu_family" = "23" ] && znver_1_2=true } +# Set the file CPU loongarch64 architecture +set_arch_loongarch64() { + if check_flags 'lasx'; then + true_arch='loongarch64-lasx' + elif check_flags 'lsx'; then + true_arch='lonngarch64-lsx' + else + true_arch='loongarch64' + fi +} + # Set the file CPU x86_64 architecture set_arch_x86_64() { if check_flags 'avx512vnni' 'avx512dq' 'avx512f' 'avx512bw' 'avx512vl'; then @@ -90,6 +101,10 @@ case $uname_s in true_arch="$true_arch-neon" fi ;; + 'loongarch64'*) + file_os='linux' + set_arch_loongarch64 + ;; *) # Unsupported machine type, exit with error printf 'Unsupported machine type: %s\n' "$uname_m" exit 1 diff --git a/src/Makefile b/src/Makefile index 042d9479..6cb778a6 100644 --- a/src/Makefile +++ b/src/Makefile @@ -100,6 +100,8 @@ VPATH = syzygy:nnue:nnue/features # vnni512 = yes/no --- -mavx512vnni --- Use Intel Vector Neural Network Instructions 512 # neon = yes/no --- -DUSE_NEON --- Use ARM SIMD architecture # dotprod = yes/no --- -DUSE_NEON_DOTPROD --- Use ARM advanced SIMD Int8 dot product instructions +# lsx = yes/no --- -mlsx --- Use Loongson SIMD eXtension +# lasx = yes/no --- -mlasx --- use Loongson Advanced SIMD eXtension # # Note that Makefile is space sensitive, so when adding new architectures # or modifying existing flags, you have to make sure there are no extra spaces @@ -125,7 +127,8 @@ ifeq ($(ARCH), $(filter $(ARCH), \ x86-64-vnni512 x86-64-vnni256 x86-64-avx512 x86-64-avxvnni x86-64-bmi2 \ x86-64-avx2 x86-64-sse41-popcnt x86-64-modern x86-64-ssse3 x86-64-sse3-popcnt \ x86-64 x86-32-sse41-popcnt x86-32-sse2 x86-32 ppc-64 ppc-32 e2k \ - armv7 armv7-neon armv8 armv8-dotprod apple-silicon general-64 general-32 riscv64 loongarch64)) + armv7 armv7-neon armv8 armv8-dotprod apple-silicon general-64 general-32 riscv64 \ + loongarch64 loongarch64-lsx loongarch64-lasx)) SUPPORTED_ARCH=true else SUPPORTED_ARCH=false @@ -151,6 +154,8 @@ vnni512 = no neon = no dotprod = no arm_version = 0 +lsx = no +lasx = no STRIP = strip ifneq ($(shell which clang-format-18 2> /dev/null),) @@ -370,8 +375,19 @@ ifeq ($(ARCH),riscv64) arch = riscv64 endif -ifeq ($(ARCH),loongarch64) +ifeq ($(findstring loongarch64,$(ARCH)),loongarch64) arch = loongarch64 + prefetch = yes + +ifeq ($(findstring -lasx,$(ARCH)),-lasx) + lsx = yes + lasx = yes +endif + +ifeq ($(findstring -lsx,$(ARCH)),-lsx) + lsx = yes +endif + endif endif @@ -408,7 +424,7 @@ ifeq ($(COMP),gcc) ifeq ($(ARCH),riscv64) CXXFLAGS += -latomic endif - else ifeq ($(ARCH),loongarch64) + else ifeq ($(arch),loongarch64) CXXFLAGS += -latomic else CXXFLAGS += -m$(bits) @@ -480,7 +496,7 @@ ifeq ($(COMP),clang) ifeq ($(ARCH),riscv64) CXXFLAGS += -latomic endif - else ifeq ($(ARCH),loongarch64) + else ifeq ($(arch),loongarch64) CXXFLAGS += -latomic else CXXFLAGS += -m$(bits) @@ -719,6 +735,18 @@ ifeq ($(dotprod),yes) CXXFLAGS += -march=armv8.2-a+dotprod -DUSE_NEON_DOTPROD endif +ifeq ($(lasx),yes) + ifeq ($(comp),$(filter $(comp),gcc clang mingw icx)) + CXXFLAGS += -mlasx + endif +endif + +ifeq ($(lsx),yes) + ifeq ($(comp),$(filter $(comp),gcc clang mingw icx)) + CXXFLAGS += -mlsx + endif +endif + ### 3.7 pext ifeq ($(pext),yes) CXXFLAGS += -DUSE_PEXT @@ -835,6 +863,8 @@ help: @echo "general-32 > unspecified 32-bit" @echo "riscv64 > RISC-V 64-bit" @echo "loongarch64 > LoongArch 64-bit" + @echo "loongarch64-lsx > LoongArch 64-bit with SIMD eXtension" + @echo "loongarch64-lasx > LoongArch 64-bit with Advanced SIMD eXtension" @echo "" @echo "Supported compilers:" @echo "" @@ -960,6 +990,8 @@ config-sanity: net @echo "neon: '$(neon)'" @echo "dotprod: '$(dotprod)'" @echo "arm_version: '$(arm_version)'" + @echo "lsx: '$(lsx)'" + @echo "lasx: '$(lasx)'" @echo "target_windows: '$(target_windows)'" @echo "" @echo "Flags:" @@ -989,6 +1021,8 @@ config-sanity: net @test "$(vnni256)" = "yes" || test "$(vnni256)" = "no" @test "$(vnni512)" = "yes" || test "$(vnni512)" = "no" @test "$(neon)" = "yes" || test "$(neon)" = "no" + @test "$(lsx)" = "yes" || test "$(lsx)" = "no" + @test "$(lasx)" = "yes" || test "$(lasx)" = "no" @test "$(comp)" = "gcc" || test "$(comp)" = "icx" || test "$(comp)" = "mingw" || test "$(comp)" = "clang" \ || test "$(comp)" = "armv7a-linux-androideabi16-clang" || test "$(comp)" = "aarch64-linux-android21-clang" From 5d0bb5976ef2da06a6386d0f5cad2f755e9b0927 Mon Sep 17 00:00:00 2001 From: FauziAkram Date: Thu, 19 Sep 2024 15:03:07 +0300 Subject: [PATCH 253/315] Removed ROOK threatenedByPawn Passed STC: LLR: 2.93 (-2.94,2.94) <-1.75,0.25> Total: 56608 W: 14788 L: 14588 D: 27232 Ptnml(0-2): 162, 6763, 14313, 6845, 221 https://tests.stockfishchess.org/tests/view/66e83f9c86d5ee47d953ab1d Passed LTC: LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 175758 W: 44501 L: 44438 D: 86819 Ptnml(0-2): 125, 19489, 48601, 19526, 138 https://tests.stockfishchess.org/tests/view/66e882d486d5ee47d953ab8a closes https://github.com/official-stockfish/Stockfish/pull/5601 bench: 1241271 --- src/movepick.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/movepick.cpp b/src/movepick.cpp index 63d9e8b1..f4ef0e54 100644 --- a/src/movepick.cpp +++ b/src/movepick.cpp @@ -175,9 +175,9 @@ void MovePicker::score() { : 0; // malus for putting piece en prise - m.value -= (pt == QUEEN ? bool(to & threatenedByRook) * 49000 - : pt == ROOK ? bool(to & threatenedByMinor) * 24335 - : bool(to & threatenedByPawn) * 14900); + m.value -= (pt == QUEEN ? bool(to & threatenedByRook) * 49000 + : pt == ROOK && bool(to & threatenedByMinor) ? 24335 + : 0); if (rootNode) m.value += 4 * (*rootHistory)[pos.side_to_move()][m.from_to()]; From ae420e735f378bbb675dcf47598a5204f008cdd5 Mon Sep 17 00:00:00 2001 From: Joost VandeVondele Date: Wed, 18 Sep 2024 06:26:20 +0200 Subject: [PATCH 254/315] Tweak Correction histories tune parameters some more, adjust scores updated for each history passed STC: https://tests.stockfishchess.org/tests/view/66ea569186d5ee47d953ae48 LLR: 2.92 (-2.94,2.94) <0.00,2.00> Total: 36288 W: 9660 L: 9344 D: 17284 Ptnml(0-2): 110, 4207, 9220, 4471, 136 passed LTC: https://tests.stockfishchess.org/tests/view/66ea9b4e86d5ee47d953ae6f LLR: 2.94 (-2.94,2.94) <0.50,2.50> Total: 241446 W: 61748 L: 61010 D: 118688 Ptnml(0-2): 173, 26211, 67202, 26979, 158 closes https://github.com/official-stockfish/Stockfish/pull/5606 Bench: 1677953 --- src/search.cpp | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 199b9355..229aef9b 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -88,7 +88,8 @@ Value to_corrected_static_eval(Value v, const Worker& w, const Position& pos) { const auto wnpcv = w.nonPawnCorrectionHistory[WHITE][us][non_pawn_index(pos)]; const auto bnpcv = w.nonPawnCorrectionHistory[BLACK][us][non_pawn_index(pos)]; const auto cv = - (98198 * pcv + 68968 * mcv + 54353 * macv + 85174 * micv + 85581 * (wnpcv + bnpcv)) / 2097152; + (99916 * pcv + 55067 * mcv + 55530 * macv + 95324 * micv + 105056 * (wnpcv + bnpcv)) + / 2097152; v += cv; return std::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1); } @@ -1409,12 +1410,15 @@ moves_loop: // When in check, search starts here { auto bonus = std::clamp(int(bestValue - ss->staticEval) * depth / 8, -CORRECTION_HISTORY_LIMIT / 4, CORRECTION_HISTORY_LIMIT / 4); - thisThread->pawnCorrectionHistory[us][pawn_structure_index(pos)] << bonus; - thisThread->materialCorrectionHistory[us][material_index(pos)] << bonus; - thisThread->majorPieceCorrectionHistory[us][major_piece_index(pos)] << bonus; - thisThread->minorPieceCorrectionHistory[us][minor_piece_index(pos)] << bonus; - thisThread->nonPawnCorrectionHistory[WHITE][us][non_pawn_index(pos)] << bonus; - thisThread->nonPawnCorrectionHistory[BLACK][us][non_pawn_index(pos)] << bonus; + thisThread->pawnCorrectionHistory[us][pawn_structure_index(pos)] + << bonus * 101 / 128; + thisThread->materialCorrectionHistory[us][material_index(pos)] << bonus * 99 / 128; + thisThread->majorPieceCorrectionHistory[us][major_piece_index(pos)] << bonus * 157 / 128; + thisThread->minorPieceCorrectionHistory[us][minor_piece_index(pos)] << bonus * 153 / 128; + thisThread->nonPawnCorrectionHistory[WHITE][us][non_pawn_index(pos)] + << bonus * 123 / 128; + thisThread->nonPawnCorrectionHistory[BLACK][us][non_pawn_index(pos)] + << bonus * 165 / 128; } assert(bestValue > -VALUE_INFINITE && bestValue < VALUE_INFINITE); From aff1f67997cd2584ea7c82d967ac7bfd4cc77861 Mon Sep 17 00:00:00 2001 From: Nonlinear2 <131959792+Nonlinear2@users.noreply.github.com> Date: Tue, 24 Sep 2024 13:17:24 +0200 Subject: [PATCH 255/315] simplify see pruning in qsearch passed non-regression STC: LLR: 2.96 (-2.94,2.94) <-1.75,0.25> Total: 34880 W: 9193 L: 8968 D: 16719 Ptnml(0-2): 103, 4047, 8935, 4232, 123 https://tests.stockfishchess.org/tests/view/66ee83bd86d5ee47d953b15b passed non-regression LTC: LLR: 2.95 (-2.94,2.94) <-1.75,0.25> Total: 69126 W: 17529 L: 17357 D: 34240 Ptnml(0-2): 41, 7507, 19285, 7699, 31 https://tests.stockfishchess.org/tests/view/66ef3e0386d5ee47d953b1d3 closes https://github.com/official-stockfish/Stockfish/pull/5607 Bench: 1339840 --- src/search.cpp | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 229aef9b..d87a6b9a 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1596,19 +1596,11 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta) continue; } - // If static eval is much lower than alpha and move is - // not winning material, we can prune this move. (~2 Elo) - if (futilityBase <= alpha && !pos.see_ge(move, 1)) + // if static exchange evaluation is low enough + // we can prune this move. (~2 Elo) + if (!pos.see_ge(move, alpha - futilityBase)) { - bestValue = std::max(bestValue, futilityBase); - continue; - } - - // If static exchange evaluation is much worse than what - // is needed to not fall below alpha, we can prune this move. - if (futilityBase > alpha && !pos.see_ge(move, (alpha - futilityBase) * 4)) - { - bestValue = alpha; + bestValue = (futilityBase > alpha) ? alpha : std::max(bestValue, futilityBase); continue; } } From 3ac75cd27d914da29280163c9d391bbca414d766 Mon Sep 17 00:00:00 2001 From: Tomasz Sobczyk Date: Tue, 4 Jun 2024 17:23:56 +0200 Subject: [PATCH 256/315] Add a standardized benchmark command `speedtest`. `speedtest [threads] [hash_MiB] [time_s]`. `threads` default to system concurrency. `hash_MiB` defaults to `threads*128`. `time_s` defaults to 150. Intended to be used with default parameters, as a stable hardware benchmark. Example: ``` C:\dev\stockfish-master\src>stockfish.exe speedtest Stockfish dev-20240928-nogit by the Stockfish developers (see AUTHORS file) info string Using 16 threads Warmup position 3/3 Position 258/258 =========================== Version : Stockfish dev-20240928-nogit Compiled by : g++ (GNUC) 13.2.0 on MinGW64 Compilation architecture : x86-64-vnni256 Compilation settings : 64bit VNNI BMI2 AVX2 SSE41 SSSE3 SSE2 POPCNT Compiler __VERSION__ macro : 13.2.0 Large pages : yes User invocation : speedtest Filled invocation : speedtest 16 2048 150 Available processors : 0-15 Thread count : 16 Thread binding : none TT size [MiB] : 2048 Hash max, avg [per mille] : single search : 40, 21 single game : 631, 428 Total nodes searched : 2099917842 Total search time [s] : 153.937 Nodes/second : 13641410 ``` ------------------------------- Small unrelated tweaks: - Network verification output is now handled as a callback. - TT hashfull queries allow specifying maximum entry age. closes https://github.com/official-stockfish/Stockfish/pull/5354 No functional change --- src/benchmark.cpp | 349 +++++++++++++++++++++++++++++++++++++++++++ src/benchmark.h | 10 ++ src/engine.cpp | 37 +++-- src/engine.h | 7 +- src/memory.cpp | 31 ++++ src/memory.h | 2 + src/misc.cpp | 11 +- src/misc.h | 8 +- src/nnue/network.cpp | 49 +++--- src/nnue/network.h | 4 +- src/numa.h | 11 +- src/tt.cpp | 15 +- src/tt.h | 2 +- src/uci.cpp | 174 ++++++++++++++++++++- src/uci.h | 5 +- 15 files changed, 663 insertions(+), 52 deletions(-) diff --git a/src/benchmark.cpp b/src/benchmark.cpp index 3622ac8a..35ad3c18 100644 --- a/src/benchmark.cpp +++ b/src/benchmark.cpp @@ -17,6 +17,7 @@ */ #include "benchmark.h" +#include "numa.h" #include #include @@ -91,6 +92,282 @@ const std::vector Defaults = { }; // clang-format on +// clang-format off +// human-randomly picked 5 games with <60 moves from +// https://tests.stockfishchess.org/tests/view/665c71f9fd45fb0f907c21e0 +// only moves for one side +const std::vector> BenchmarkPositions = { + { + "rnbq1k1r/ppp1bppp/4pn2/8/2B5/2NP1N2/PPP2PPP/R1BQR1K1 b - - 2 8", + "rnbq1k1r/pp2bppp/4pn2/2p5/2B2B2/2NP1N2/PPP2PPP/R2QR1K1 b - - 1 9", + "r1bq1k1r/pp2bppp/2n1pn2/2p5/2B1NB2/3P1N2/PPP2PPP/R2QR1K1 b - - 3 10", + "r1bq1k1r/pp2bppp/2n1p3/2p5/2B1PB2/5N2/PPP2PPP/R2QR1K1 b - - 0 11", + "r1b2k1r/pp2bppp/2n1p3/2p5/2B1PB2/5N2/PPP2PPP/3RR1K1 b - - 0 12", + "r1b1k2r/pp2bppp/2n1p3/2p5/2B1PB2/2P2N2/PP3PPP/3RR1K1 b - - 0 13", + "r1b1k2r/1p2bppp/p1n1p3/2p5/4PB2/2P2N2/PP2BPPP/3RR1K1 b - - 1 14", + "r1b1k2r/4bppp/p1n1p3/1pp5/P3PB2/2P2N2/1P2BPPP/3RR1K1 b - - 0 15", + "r1b1k2r/4bppp/p1n1p3/1P6/2p1PB2/2P2N2/1P2BPPP/3RR1K1 b - - 0 16", + "r1b1k2r/4bppp/2n1p3/1p6/2p1PB2/1PP2N2/4BPPP/3RR1K1 b - - 0 17", + "r3k2r/3bbppp/2n1p3/1p6/2P1PB2/2P2N2/4BPPP/3RR1K1 b - - 0 18", + "r3k2r/3bbppp/2n1p3/8/1pP1P3/2P2N2/3BBPPP/3RR1K1 b - - 1 19", + "1r2k2r/3bbppp/2n1p3/8/1pPNP3/2P5/3BBPPP/3RR1K1 b - - 3 20", + "1r2k2r/3bbppp/2n1p3/8/2PNP3/2B5/4BPPP/3RR1K1 b - - 0 21", + "1r2k2r/3bb1pp/2n1pp2/1N6/2P1P3/2B5/4BPPP/3RR1K1 b - - 1 22", + "1r2k2r/3b2pp/2n1pp2/1N6/1BP1P3/8/4BPPP/3RR1K1 b - - 0 23", + "1r2k2r/3b2pp/4pp2/1N6/1nP1P3/8/3RBPPP/4R1K1 b - - 1 24", + "1r5r/3bk1pp/4pp2/1N6/1nP1PP2/8/3RB1PP/4R1K1 b - - 0 25", + "1r5r/3bk1pp/2n1pp2/1N6/2P1PP2/8/3RBKPP/4R3 b - - 2 26", + "1r5r/3bk1pp/2n2p2/1N2p3/2P1PP2/6P1/3RBK1P/4R3 b - - 0 27", + "1r1r4/3bk1pp/2n2p2/1N2p3/2P1PP2/6P1/3RBK1P/R7 b - - 2 28", + "1r1r4/N3k1pp/2n1bp2/4p3/2P1PP2/6P1/3RBK1P/R7 b - - 4 29", + "1r1r4/3bk1pp/2N2p2/4p3/2P1PP2/6P1/3RBK1P/R7 b - - 0 30", + "1r1R4/4k1pp/2b2p2/4p3/2P1PP2/6P1/4BK1P/R7 b - - 0 31", + "3r4/4k1pp/2b2p2/4P3/2P1P3/6P1/4BK1P/R7 b - - 0 32", + "3r4/R3k1pp/2b5/4p3/2P1P3/6P1/4BK1P/8 b - - 1 33", + "8/3rk1pp/2b5/R3p3/2P1P3/6P1/4BK1P/8 b - - 3 34", + "8/3r2pp/2bk4/R1P1p3/4P3/6P1/4BK1P/8 b - - 0 35", + "8/2kr2pp/2b5/R1P1p3/4P3/4K1P1/4B2P/8 b - - 2 36", + "1k6/3r2pp/2b5/RBP1p3/4P3/4K1P1/7P/8 b - - 4 37", + "8/1k1r2pp/2b5/R1P1p3/4P3/3BK1P1/7P/8 b - - 6 38", + "1k6/3r2pp/2b5/2P1p3/4P3/3BK1P1/7P/R7 b - - 8 39", + "1k6/r5pp/2b5/2P1p3/4P3/3BK1P1/7P/5R2 b - - 10 40", + "1k3R2/6pp/2b5/2P1p3/4P3/r2BK1P1/7P/8 b - - 12 41", + "5R2/2k3pp/2b5/2P1p3/4P3/r2B2P1/3K3P/8 b - - 14 42", + "5R2/2k3pp/2b5/2P1p3/4P3/3BK1P1/r6P/8 b - - 16 43", + "5R2/2k3pp/2b5/2P1p3/4P3/r2B2P1/4K2P/8 b - - 18 44", + "5R2/2k3pp/2b5/2P1p3/4P3/3B1KP1/r6P/8 b - - 20 45", + "8/2k2Rpp/2b5/2P1p3/4P3/r2B1KP1/7P/8 b - - 22 46", + "3k4/5Rpp/2b5/2P1p3/4P3/r2B2P1/4K2P/8 b - - 24 47", + "3k4/5Rpp/2b5/2P1p3/4P3/3B1KP1/r6P/8 b - - 26 48", + "3k4/5Rpp/2b5/2P1p3/4P3/r2B2P1/4K2P/8 b - - 28 49", + "3k4/5Rpp/2b5/2P1p3/4P3/3BK1P1/r6P/8 b - - 30 50", + "3k4/5Rpp/2b5/2P1p3/4P3/r2B2P1/3K3P/8 b - - 32 51", + "3k4/5Rpp/2b5/2P1p3/4P3/2KB2P1/r6P/8 b - - 34 52", + "3k4/5Rpp/2b5/2P1p3/4P3/r2B2P1/2K4P/8 b - - 36 53", + "3k4/5Rpp/2b5/2P1p3/4P3/1K1B2P1/r6P/8 b - - 38 54", + "3k4/6Rp/2b5/2P1p3/4P3/1K1B2P1/7r/8 b - - 0 55", + "3k4/8/2b3Rp/2P1p3/4P3/1K1B2P1/7r/8 b - - 1 56", + "8/2k3R1/2b4p/2P1p3/4P3/1K1B2P1/7r/8 b - - 3 57", + "3k4/8/2b3Rp/2P1p3/4P3/1K1B2P1/7r/8 b - - 5 58", + "8/2k5/2b3Rp/2P1p3/1K2P3/3B2P1/7r/8 b - - 7 59", + "8/2k5/2b3Rp/2P1p3/4P3/2KB2P1/3r4/8 b - - 9 60", + "8/2k5/2b3Rp/2P1p3/1K2P3/3B2P1/6r1/8 b - - 11 61", + "8/2k5/2b3Rp/2P1p3/4P3/2KB2P1/3r4/8 b - - 13 62", + "8/2k5/2b3Rp/2P1p3/2K1P3/3B2P1/6r1/8 b - - 15 63", + "4b3/2k3R1/7p/2P1p3/2K1P3/3B2P1/6r1/8 b - - 17 64", + }, + { + "r1bqkbnr/npp1pppp/p7/3P4/4pB2/2N5/PPP2PPP/R2QKBNR w KQkq - 1 6", + "r1bqkb1r/npp1pppp/p4n2/3P4/4pB2/2N5/PPP1QPPP/R3KBNR w KQkq - 3 7", + "r2qkb1r/npp1pppp/p4n2/3P1b2/4pB2/2N5/PPP1QPPP/2KR1BNR w kq - 5 8", + "r2qkb1r/1pp1pppp/p4n2/1n1P1b2/4pB2/2N4P/PPP1QPP1/2KR1BNR w kq - 1 9", + "r2qkb1r/1pp1pppp/5n2/1p1P1b2/4pB2/7P/PPP1QPP1/2KR1BNR w kq - 0 10", + "r2qkb1r/1ppbpppp/5n2/1Q1P4/4pB2/7P/PPP2PP1/2KR1BNR w kq - 1 11", + "3qkb1r/1Qpbpppp/5n2/3P4/4pB2/7P/rPP2PP1/2KR1BNR w k - 0 12", + "q3kb1r/1Qpbpppp/5n2/3P4/4pB2/7P/rPP2PP1/1K1R1BNR w k - 2 13", + "r3kb1r/2pbpppp/5n2/3P4/4pB2/7P/1PP2PP1/1K1R1BNR w k - 0 14", + "r3kb1r/2Bb1ppp/4pn2/3P4/4p3/7P/1PP2PP1/1K1R1BNR w k - 0 15", + "r3kb1r/2Bb2pp/4pn2/8/4p3/7P/1PP2PP1/1K1R1BNR w k - 0 16", + "r3k2r/2Bb2pp/4pn2/2b5/4p3/7P/1PP1NPP1/1K1R1B1R w k - 2 17", + "r6r/2Bbk1pp/4pn2/2b5/3Np3/7P/1PP2PP1/1K1R1B1R w - - 4 18", + "r6r/b2bk1pp/4pn2/4B3/3Np3/7P/1PP2PP1/1K1R1B1R w - - 6 19", + "r1r5/b2bk1pp/4pn2/4B3/2BNp3/7P/1PP2PP1/1K1R3R w - - 8 20", + "r7/b2bk1pp/4pn2/2r1B3/2BNp3/1P5P/2P2PP1/1K1R3R w - - 1 21", + "rb6/3bk1pp/4pn2/2r1B3/2BNpP2/1P5P/2P3P1/1K1R3R w - - 1 22", + "1r6/3bk1pp/4pn2/2r5/2BNpP2/1P5P/2P3P1/1K1R3R w - - 0 23", + "1r6/3bk1p1/4pn1p/2r5/2BNpP2/1P5P/2P3P1/2KR3R w - - 0 24", + "8/3bk1p1/1r2pn1p/2r5/2BNpP1P/1P6/2P3P1/2KR3R w - - 1 25", + "8/3bk3/1r2pnpp/2r5/2BNpP1P/1P6/2P3P1/2K1R2R w - - 0 26", + "2b5/4k3/1r2pnpp/2r5/2BNpP1P/1P4P1/2P5/2K1R2R w - - 1 27", + "8/1b2k3/1r2pnpp/2r5/2BNpP1P/1P4P1/2P5/2K1R1R1 w - - 3 28", + "8/1b1nk3/1r2p1pp/2r5/2BNpPPP/1P6/2P5/2K1R1R1 w - - 1 29", + "8/1b2k3/1r2p1pp/2r1nP2/2BNp1PP/1P6/2P5/2K1R1R1 w - - 1 30", + "8/1b2k3/1r2p1p1/2r1nPp1/2BNp2P/1P6/2P5/2K1R1R1 w - - 0 31", + "8/1b2k3/1r2p1n1/2r3p1/2BNp2P/1P6/2P5/2K1R1R1 w - - 0 32", + "8/1b2k3/1r2p1n1/6r1/2BNp2P/1P6/2P5/2K1R3 w - - 0 33", + "8/1b2k3/1r2p3/4n1P1/2BNp3/1P6/2P5/2K1R3 w - - 1 34", + "8/1b2k3/1r2p3/4n1P1/2BN4/1P2p3/2P5/2K4R w - - 0 35", + "8/1b2k3/1r2p2R/6P1/2nN4/1P2p3/2P5/2K5 w - - 0 36", + "8/1b2k3/3rp2R/6P1/2PN4/4p3/2P5/2K5 w - - 1 37", + "8/4k3/3rp2R/6P1/2PN4/2P1p3/6b1/2K5 w - - 1 38", + "8/4k3/r3p2R/2P3P1/3N4/2P1p3/6b1/2K5 w - - 1 39", + "8/3k4/r3p2R/2P2NP1/8/2P1p3/6b1/2K5 w - - 3 40", + "8/3k4/4p2R/2P3P1/8/2P1N3/6b1/r1K5 w - - 1 41", + "8/3k4/4p2R/2P3P1/8/2P1N3/3K2b1/6r1 w - - 3 42", + "8/3k4/4p2R/2P3P1/8/2PKNb2/8/6r1 w - - 5 43", + "8/4k3/4p1R1/2P3P1/8/2PKNb2/8/6r1 w - - 7 44", + "8/4k3/4p1R1/2P3P1/3K4/2P1N3/8/6rb w - - 9 45", + "8/3k4/4p1R1/2P1K1P1/8/2P1N3/8/6rb w - - 11 46", + "8/3k4/4p1R1/2P3P1/5K2/2P1N3/8/4r2b w - - 13 47", + "8/3k4/2b1p2R/2P3P1/5K2/2P1N3/8/4r3 w - - 15 48", + "8/3k4/2b1p3/2P3P1/5K2/2P1N2R/8/6r1 w - - 17 49", + "2k5/7R/2b1p3/2P3P1/5K2/2P1N3/8/6r1 w - - 19 50", + "2k5/7R/4p3/2P3P1/b1P2K2/4N3/8/6r1 w - - 1 51", + "2k5/3bR3/4p3/2P3P1/2P2K2/4N3/8/6r1 w - - 3 52", + "3k4/3b2R1/4p3/2P3P1/2P2K2/4N3/8/6r1 w - - 5 53", + "3kb3/6R1/4p1P1/2P5/2P2K2/4N3/8/6r1 w - - 1 54", + "3kb3/6R1/4p1P1/2P5/2P2KN1/8/8/2r5 w - - 3 55", + "3kb3/6R1/4p1P1/2P1N3/2P2K2/8/8/5r2 w - - 5 56", + "3kb3/6R1/4p1P1/2P1N3/2P5/4K3/8/4r3 w - - 7 57", + }, + { + "rnbq1rk1/ppp1npb1/4p1p1/3P3p/3PP3/2N2N2/PP2BPPP/R1BQ1RK1 b - - 0 8", + "rnbq1rk1/ppp1npb1/6p1/3pP2p/3P4/2N2N2/PP2BPPP/R1BQ1RK1 b - - 0 9", + "rn1q1rk1/ppp1npb1/6p1/3pP2p/3P2b1/2N2N2/PP2BPPP/R1BQR1K1 b - - 2 10", + "r2q1rk1/ppp1npb1/2n3p1/3pP2p/3P2bN/2N5/PP2BPPP/R1BQR1K1 b - - 4 11", + "r4rk1/pppqnpb1/2n3p1/3pP2p/3P2bN/2N4P/PP2BPP1/R1BQR1K1 b - - 0 12", + "r4rk1/pppqnpb1/2n3p1/3pP2p/3P3N/7P/PP2NPP1/R1BQR1K1 b - - 0 13", + "r4rk1/pppq1pb1/2n3p1/3pPN1p/3P4/7P/PP2NPP1/R1BQR1K1 b - - 0 14", + "r4rk1/ppp2pb1/2n3p1/3pPq1p/3P1N2/7P/PP3PP1/R1BQR1K1 b - - 1 15", + "r4rk1/pppq1pb1/2n3p1/3pP2p/P2P1N2/7P/1P3PP1/R1BQR1K1 b - - 0 16", + "r2n1rk1/pppq1pb1/6p1/3pP2p/P2P1N2/R6P/1P3PP1/2BQR1K1 b - - 2 17", + "r4rk1/pppq1pb1/4N1p1/3pP2p/P2P4/R6P/1P3PP1/2BQR1K1 b - - 0 18", + "r4rk1/ppp2pb1/4q1p1/3pP1Bp/P2P4/R6P/1P3PP1/3QR1K1 b - - 1 19", + "r3r1k1/ppp2pb1/4q1p1/3pP1Bp/P2P1P2/R6P/1P4P1/3QR1K1 b - - 0 20", + "r3r1k1/ppp3b1/4qpp1/3pP2p/P2P1P1B/R6P/1P4P1/3QR1K1 b - - 1 21", + "r3r1k1/ppp3b1/4q1p1/3pP2p/P4P1B/R6P/1P4P1/3QR1K1 b - - 0 22", + "r4rk1/ppp3b1/4q1p1/3pP1Bp/P4P2/R6P/1P4P1/3QR1K1 b - - 2 23", + "r4rk1/pp4b1/4q1p1/2ppP1Bp/P4P2/3R3P/1P4P1/3QR1K1 b - - 1 24", + "r4rk1/pp4b1/4q1p1/2p1P1Bp/P2p1PP1/3R3P/1P6/3QR1K1 b - - 0 25", + "r4rk1/pp4b1/4q1p1/2p1P1B1/P2p1PP1/3R4/1P6/3QR1K1 b - - 0 26", + "r5k1/pp3rb1/4q1p1/2p1P1B1/P2p1PP1/6R1/1P6/3QR1K1 b - - 2 27", + "5rk1/pp3rb1/4q1p1/2p1P1B1/P2pRPP1/6R1/1P6/3Q2K1 b - - 4 28", + "5rk1/1p3rb1/p3q1p1/P1p1P1B1/3pRPP1/6R1/1P6/3Q2K1 b - - 0 29", + "4r1k1/1p3rb1/p3q1p1/P1p1P1B1/3pRPP1/1P4R1/8/3Q2K1 b - - 0 30", + "4r1k1/5rb1/pP2q1p1/2p1P1B1/3pRPP1/1P4R1/8/3Q2K1 b - - 0 31", + "4r1k1/5rb1/pq4p1/2p1P1B1/3pRPP1/1P4R1/4Q3/6K1 b - - 1 32", + "4r1k1/1r4b1/pq4p1/2p1P1B1/3pRPP1/1P4R1/2Q5/6K1 b - - 3 33", + "4r1k1/1r4b1/1q4p1/p1p1P1B1/3p1PP1/1P4R1/2Q5/4R1K1 b - - 1 34", + "4r1k1/3r2b1/1q4p1/p1p1P1B1/2Qp1PP1/1P4R1/8/4R1K1 b - - 3 35", + "4r1k1/3r2b1/4q1p1/p1p1P1B1/2Qp1PP1/1P4R1/5K2/4R3 b - - 5 36", + "4r1k1/3r2b1/6p1/p1p1P1B1/2Pp1PP1/6R1/5K2/4R3 b - - 0 37", + "4r1k1/3r2b1/6p1/p1p1P1B1/2P2PP1/3p2R1/5K2/3R4 b - - 1 38", + "5rk1/3r2b1/6p1/p1p1P1B1/2P2PP1/3p2R1/8/3RK3 b - - 3 39", + "5rk1/6b1/6p1/p1p1P1B1/2Pr1PP1/3R4/8/3RK3 b - - 0 40", + "5rk1/3R2b1/6p1/p1p1P1B1/2r2PP1/8/8/3RK3 b - - 1 41", + "5rk1/3R2b1/6p1/p1p1P1B1/4rPP1/8/3K4/3R4 b - - 3 42", + "1r4k1/3R2b1/6p1/p1p1P1B1/4rPP1/2K5/8/3R4 b - - 5 43", + "1r4k1/3R2b1/6p1/p1p1P1B1/2K2PP1/4r3/8/3R4 b - - 7 44", + "1r3bk1/8/3R2p1/p1p1P1B1/2K2PP1/4r3/8/3R4 b - - 9 45", + "1r3bk1/8/6R1/2p1P1B1/p1K2PP1/4r3/8/3R4 b - - 0 46", + "1r3b2/5k2/R7/2p1P1B1/p1K2PP1/4r3/8/3R4 b - - 2 47", + "5b2/1r3k2/R7/2p1P1B1/p1K2PP1/4r3/8/7R b - - 4 48", + "5b2/5k2/R7/2pKP1B1/pr3PP1/4r3/8/7R b - - 6 49", + "5b2/5k2/R1K5/2p1P1B1/p2r1PP1/4r3/8/7R b - - 8 50", + "8/R4kb1/2K5/2p1P1B1/p2r1PP1/4r3/8/7R b - - 10 51", + "8/R5b1/2K3k1/2p1PPB1/p2r2P1/4r3/8/7R b - - 0 52", + "8/6R1/2K5/2p1PPk1/p2r2P1/4r3/8/7R b - - 0 53", + "8/6R1/2K5/2p1PP2/p2r1kP1/4r3/8/5R2 b - - 2 54", + "8/6R1/2K2P2/2p1P3/p2r2P1/4r1k1/8/5R2 b - - 0 55", + "8/5PR1/2K5/2p1P3/p2r2P1/4r3/6k1/5R2 b - - 0 56", + }, + { + "rn1qkb1r/p1pbpppp/5n2/8/2pP4/2N5/1PQ1PPPP/R1B1KBNR w KQkq - 0 7", + "r2qkb1r/p1pbpppp/2n2n2/8/2pP4/2N2N2/1PQ1PPPP/R1B1KB1R w KQkq - 2 8", + "r2qkb1r/p1pbpppp/5n2/8/1npPP3/2N2N2/1PQ2PPP/R1B1KB1R w KQkq - 1 9", + "r2qkb1r/p1pb1ppp/4pn2/8/1npPP3/2N2N2/1P3PPP/R1BQKB1R w KQkq - 0 10", + "r2qk2r/p1pbbppp/4pn2/8/1nBPP3/2N2N2/1P3PPP/R1BQK2R w KQkq - 1 11", + "r2q1rk1/p1pbbppp/4pn2/8/1nBPP3/2N2N2/1P3PPP/R1BQ1RK1 w - - 3 12", + "r2q1rk1/2pbbppp/p3pn2/8/1nBPPB2/2N2N2/1P3PPP/R2Q1RK1 w - - 0 13", + "r2q1rk1/2p1bppp/p3pn2/1b6/1nBPPB2/2N2N2/1P3PPP/R2QR1K1 w - - 2 14", + "r2q1rk1/4bppp/p1p1pn2/1b6/1nBPPB2/1PN2N2/5PPP/R2QR1K1 w - - 0 15", + "r4rk1/3qbppp/p1p1pn2/1b6/1nBPPB2/1PN2N2/3Q1PPP/R3R1K1 w - - 2 16", + "r4rk1/1q2bppp/p1p1pn2/1b6/1nBPPB2/1PN2N1P/3Q1PP1/R3R1K1 w - - 1 17", + "r3r1k1/1q2bppp/p1p1pn2/1b6/1nBPPB2/1PN2N1P/4QPP1/R3R1K1 w - - 3 18", + "r3r1k1/1q1nbppp/p1p1p3/1b6/1nBPPB2/1PN2N1P/4QPP1/3RR1K1 w - - 5 19", + "r3rbk1/1q1n1ppp/p1p1p3/1b6/1nBPPB2/1PN2N1P/3RQPP1/4R1K1 w - - 7 20", + "r3rbk1/1q3ppp/pnp1p3/1b6/1nBPPB2/1PN2N1P/3RQPP1/4R2K w - - 9 21", + "2r1rbk1/1q3ppp/pnp1p3/1b6/1nBPPB2/1PN2N1P/3RQPP1/1R5K w - - 11 22", + "2r1rbk1/1q4pp/pnp1pp2/1b6/1nBPPB2/1PN2N1P/4QPP1/1R1R3K w - - 0 23", + "2r1rbk1/5qpp/pnp1pp2/1b6/1nBPP3/1PN1BN1P/4QPP1/1R1R3K w - - 2 24", + "2r1rbk1/5qp1/pnp1pp1p/1b6/1nBPP3/1PN1BN1P/4QPP1/1R1R2K1 w - - 0 25", + "2r1rbk1/5qp1/pnp1pp1p/1b6/2BPP3/1P2BN1P/n3QPP1/1R1R2K1 w - - 0 26", + "r3rbk1/5qp1/pnp1pp1p/1b6/2BPP3/1P2BN1P/Q4PP1/1R1R2K1 w - - 1 27", + "rr3bk1/5qp1/pnp1pp1p/1b6/2BPP3/1P2BN1P/Q4PP1/R2R2K1 w - - 3 28", + "rr2qbk1/6p1/pnp1pp1p/1b6/2BPP3/1P2BN1P/4QPP1/R2R2K1 w - - 5 29", + "rr2qbk1/6p1/1np1pp1p/pb6/2BPP3/1P1QBN1P/5PP1/R2R2K1 w - - 0 30", + "rr2qbk1/6p1/1n2pp1p/pp6/3PP3/1P1QBN1P/5PP1/R2R2K1 w - - 0 31", + "rr2qbk1/6p1/1n2pp1p/1p1P4/p3P3/1P1QBN1P/5PP1/R2R2K1 w - - 0 32", + "rr2qbk1/3n2p1/3Ppp1p/1p6/p3P3/1P1QBN1P/5PP1/R2R2K1 w - - 1 33", + "rr3bk1/3n2p1/3Ppp1p/1p5q/pP2P3/3QBN1P/5PP1/R2R2K1 w - - 1 34", + "rr3bk1/3n2p1/3Ppp1p/1p5q/1P2P3/p2QBN1P/5PP1/2RR2K1 w - - 0 35", + "1r3bk1/3n2p1/r2Ppp1p/1p5q/1P2P3/pQ2BN1P/5PP1/2RR2K1 w - - 2 36", + "1r2qbk1/2Rn2p1/r2Ppp1p/1p6/1P2P3/pQ2BN1P/5PP1/3R2K1 w - - 4 37", + "1r2qbk1/2Rn2p1/r2Ppp1p/1pB5/1P2P3/1Q3N1P/p4PP1/3R2K1 w - - 0 38", + "1r2q1k1/2Rn2p1/r2bpp1p/1pB5/1P2P3/1Q3N1P/p4PP1/R5K1 w - - 0 39", + "1r2q1k1/2Rn2p1/3rpp1p/1p6/1P2P3/1Q3N1P/p4PP1/R5K1 w - - 0 40", + "2r1q1k1/2Rn2p1/3rpp1p/1p6/1P2P3/5N1P/Q4PP1/R5K1 w - - 1 41", + "1r2q1k1/1R1n2p1/3rpp1p/1p6/1P2P3/5N1P/Q4PP1/R5K1 w - - 3 42", + "2r1q1k1/2Rn2p1/3rpp1p/1p6/1P2P3/5N1P/Q4PP1/R5K1 w - - 5 43", + "1r2q1k1/1R1n2p1/3rpp1p/1p6/1P2P3/5N1P/Q4PP1/R5K1 w - - 7 44", + "1rq3k1/R2n2p1/3rpp1p/1p6/1P2P3/5N1P/Q4PP1/R5K1 w - - 9 45", + "2q3k1/Rr1n2p1/3rpp1p/1p6/1P2P3/5N1P/4QPP1/R5K1 w - - 11 46", + "Rrq3k1/3n2p1/3rpp1p/1p6/1P2P3/5N1P/4QPP1/R5K1 w - - 13 47", + }, + { + "rn1qkb1r/1pp2ppp/p4p2/3p1b2/5P2/1P2PN2/P1PP2PP/RN1QKB1R b KQkq - 1 6", + "r2qkb1r/1pp2ppp/p1n2p2/3p1b2/3P1P2/1P2PN2/P1P3PP/RN1QKB1R b KQkq - 0 7", + "r2qkb1r/1pp2ppp/p4p2/3p1b2/1n1P1P2/1P1BPN2/P1P3PP/RN1QK2R b KQkq - 2 8", + "r2qkb1r/1pp2ppp/p4p2/3p1b2/3P1P2/1P1PPN2/P5PP/RN1QK2R b KQkq - 0 9", + "r2qk2r/1pp2ppp/p2b1p2/3p1b2/3P1P2/1PNPPN2/P5PP/R2QK2R b KQkq - 2 10", + "r2qk2r/1p3ppp/p1pb1p2/3p1b2/3P1P2/1PNPPN2/P5PP/R2Q1RK1 b kq - 1 11", + "r2q1rk1/1p3ppp/p1pb1p2/3p1b2/3P1P2/1PNPPN2/P2Q2PP/R4RK1 b - - 3 12", + "r2qr1k1/1p3ppp/p1pb1p2/3p1b2/3P1P2/1P1PPN2/P2QN1PP/R4RK1 b - - 5 13", + "r3r1k1/1p3ppp/pqpb1p2/3p1b2/3P1P2/1P1PPNN1/P2Q2PP/R4RK1 b - - 7 14", + "r3r1k1/1p3ppp/pqp2p2/3p1b2/1b1P1P2/1P1PPNN1/P1Q3PP/R4RK1 b - - 9 15", + "r3r1k1/1p1b1ppp/pqp2p2/3p4/1b1P1P2/1P1PPNN1/P4QPP/R4RK1 b - - 11 16", + "2r1r1k1/1p1b1ppp/pqp2p2/3p4/1b1PPP2/1P1P1NN1/P4QPP/R4RK1 b - - 0 17", + "2r1r1k1/1p1b1ppp/pq3p2/2pp4/1b1PPP2/PP1P1NN1/5QPP/R4RK1 b - - 0 18", + "2r1r1k1/1p1b1ppp/pq3p2/2Pp4/4PP2/PPbP1NN1/5QPP/R4RK1 b - - 0 19", + "2r1r1k1/1p1b1ppp/p4p2/2Pp4/4PP2/PqbP1NN1/5QPP/RR4K1 b - - 1 20", + "2r1r1k1/1p1b1ppp/p4p2/2Pp4/q3PP2/P1bP1NN1/R4QPP/1R4K1 b - - 3 21", + "2r1r1k1/1p3ppp/p4p2/1bPP4/q4P2/P1bP1NN1/R4QPP/1R4K1 b - - 0 22", + "2r1r1k1/1p3ppp/p4p2/2PP4/q4P2/P1bb1NN1/R4QPP/2R3K1 b - - 1 23", + "2r1r1k1/1p3ppp/p2P1p2/2P5/2q2P2/P1bb1NN1/R4QPP/2R3K1 b - - 0 24", + "2rr2k1/1p3ppp/p2P1p2/2P5/2q2P2/P1bb1NN1/R4QPP/2R4K b - - 2 25", + "2rr2k1/1p3ppp/p2P1p2/2Q5/5P2/P1bb1NN1/R5PP/2R4K b - - 0 26", + "3r2k1/1p3ppp/p2P1p2/2r5/5P2/P1bb1N2/R3N1PP/2R4K b - - 1 27", + "3r2k1/1p3ppp/p2P1p2/2r5/5P2/P1b2N2/4R1PP/2R4K b - - 0 28", + "3r2k1/1p3ppp/p2P1p2/2r5/1b3P2/P4N2/4R1PP/3R3K b - - 2 29", + "3r2k1/1p2Rppp/p2P1p2/b1r5/5P2/P4N2/6PP/3R3K b - - 4 30", + "3r2k1/1R3ppp/p1rP1p2/b7/5P2/P4N2/6PP/3R3K b - - 0 31", + "3r2k1/1R3ppp/p2R1p2/b7/5P2/P4N2/6PP/7K b - - 0 32", + "6k1/1R3ppp/p2r1p2/b7/5P2/P4NP1/7P/7K b - - 0 33", + "6k1/1R3p1p/p2r1pp1/b7/5P1P/P4NP1/8/7K b - - 0 34", + "6k1/3R1p1p/pr3pp1/b7/5P1P/P4NP1/8/7K b - - 2 35", + "6k1/5p2/pr3pp1/b2R3p/5P1P/P4NP1/8/7K b - - 1 36", + "6k1/5p2/pr3pp1/7p/5P1P/P1bR1NP1/8/7K b - - 3 37", + "6k1/5p2/p1r2pp1/7p/5P1P/P1bR1NP1/6K1/8 b - - 5 38", + "6k1/5p2/p1r2pp1/b2R3p/5P1P/P4NP1/6K1/8 b - - 7 39", + "6k1/5p2/p4pp1/b2R3p/5P1P/P4NPK/2r5/8 b - - 9 40", + "6k1/2b2p2/p4pp1/7p/5P1P/P2R1NPK/2r5/8 b - - 11 41", + "6k1/2b2p2/5pp1/p6p/3N1P1P/P2R2PK/2r5/8 b - - 1 42", + "6k1/2b2p2/5pp1/p6p/3N1P1P/P1R3PK/r7/8 b - - 3 43", + "6k1/5p2/1b3pp1/p6p/5P1P/P1R3PK/r1N5/8 b - - 5 44", + "8/5pk1/1bR2pp1/p6p/5P1P/P5PK/r1N5/8 b - - 7 45", + "3b4/5pk1/2R2pp1/p4P1p/7P/P5PK/r1N5/8 b - - 0 46", + "8/4bpk1/2R2pp1/p4P1p/6PP/P6K/r1N5/8 b - - 0 47", + "8/5pk1/2R2pP1/p6p/6PP/b6K/r1N5/8 b - - 0 48", + "8/6k1/2R2pp1/p6P/7P/b6K/r1N5/8 b - - 0 49", + "8/6k1/2R2p2/p6p/7P/b5K1/r1N5/8 b - - 1 50", + "8/8/2R2pk1/p6p/7P/b4K2/r1N5/8 b - - 3 51", + "8/8/2R2pk1/p6p/7P/4NK2/rb6/8 b - - 5 52", + "2R5/8/5pk1/7p/p6P/4NK2/rb6/8 b - - 1 53", + "6R1/8/5pk1/7p/p6P/4NK2/1b6/r7 b - - 3 54", + "R7/5k2/5p2/7p/p6P/4NK2/1b6/r7 b - - 5 55", + "R7/5k2/5p2/7p/7P/p3N3/1b2K3/r7 b - - 1 56", + "8/R4k2/5p2/7p/7P/p3N3/1b2K3/7r b - - 3 57", + "8/8/5pk1/7p/R6P/p3N3/1b2K3/7r b - - 5 58", + "8/8/5pk1/7p/R6P/p7/4K3/2bN3r b - - 7 59", + "8/8/5pk1/7p/R6P/p7/4KN1r/2b5 b - - 9 60", + "8/8/5pk1/7p/R6P/p3K3/1b3N1r/8 b - - 11 61", + "8/8/R4pk1/7p/7P/p1b1K3/5N1r/8 b - - 13 62", + "8/8/5pk1/7p/7P/2b1K3/R4N1r/8 b - - 0 63", + "8/8/5pk1/7p/3K3P/8/R4N1r/4b3 b - - 2 64", + } +}; +// clang-format on + } // namespace namespace Stockfish::Benchmark { @@ -160,4 +437,76 @@ std::vector setup_bench(const std::string& currentFen, std::istream return list; } +BenchmarkSetup setup_benchmark(std::istream& is) { + // TT_SIZE_PER_THREAD is chosen such that roughly half of the hash is used all positions + // for the current sequence have been searched. + static constexpr int TT_SIZE_PER_THREAD = 128; + + static constexpr int DEFAULT_DURATION_S = 150; + + BenchmarkSetup setup{}; + + // Assign default values to missing arguments + int desiredTimeS; + + if (!(is >> setup.threads)) + setup.threads = get_hardware_concurrency(); + else + setup.originalInvocation += std::to_string(setup.threads); + + if (!(is >> setup.ttSize)) + setup.ttSize = TT_SIZE_PER_THREAD * setup.threads; + else + setup.originalInvocation += " " + std::to_string(setup.ttSize); + + if (!(is >> desiredTimeS)) + desiredTimeS = DEFAULT_DURATION_S; + else + setup.originalInvocation += " " + std::to_string(desiredTimeS); + + setup.filledInvocation += std::to_string(setup.threads) + " " + std::to_string(setup.ttSize) + + " " + std::to_string(desiredTimeS); + + auto getCorrectedTime = [&](int ply) { + // time per move is fit roughly based on LTC games + // seconds = 50/{ply+15} + // ms = 50000/{ply+15} + // with this fit 10th move gets 2000ms + // adjust for desired 10th move time + return 50000.0 / (static_cast(ply) + 15.0); + }; + + float totalTime = 0; + for (const auto& game : BenchmarkPositions) + { + setup.commands.emplace_back("ucinewgame"); + int ply = 1; + for (int i = 0; i < static_cast(game.size()); ++i) + { + const float correctedTime = getCorrectedTime(ply); + totalTime += correctedTime; + ply += 1; + } + } + + float timeScaleFactor = static_cast(desiredTimeS * 1000) / totalTime; + + for (const auto& game : BenchmarkPositions) + { + setup.commands.emplace_back("ucinewgame"); + int ply = 1; + for (const std::string& fen : game) + { + setup.commands.emplace_back("position fen " + fen); + + const int correctedTime = static_cast(getCorrectedTime(ply) * timeScaleFactor); + setup.commands.emplace_back("go movetime " + std::to_string(correctedTime)); + + ply += 1; + } + } + + return setup; +} + } // namespace Stockfish \ No newline at end of file diff --git a/src/benchmark.h b/src/benchmark.h index b1eba40f..eb3a52d8 100644 --- a/src/benchmark.h +++ b/src/benchmark.h @@ -27,6 +27,16 @@ namespace Stockfish::Benchmark { std::vector setup_bench(const std::string&, std::istream&); +struct BenchmarkSetup { + int ttSize; + int threads; + std::vector commands; + std::string originalInvocation; + std::string filledInvocation; +}; + +BenchmarkSetup setup_benchmark(std::istream&); + } // namespace Stockfish #endif // #ifndef BENCHMARK_H_INCLUDED diff --git a/src/engine.cpp b/src/engine.cpp index b5cc3f83..85c84099 100644 --- a/src/engine.cpp +++ b/src/engine.cpp @@ -67,12 +67,13 @@ Engine::Engine(std::optional path) : options["NumaPolicy"] << Option("auto", [this](const Option& o) { set_numa_config_from_option(o); - return numa_config_information_as_string() + "\n" + thread_binding_information_as_string(); + return numa_config_information_as_string() + "\n" + + thread_allocation_information_as_string(); }); options["Threads"] << Option(1, 1, 1024, [this](const Option&) { resize_threads(); - return thread_binding_information_as_string(); + return thread_allocation_information_as_string(); }); options["Hash"] << Option(16, 1, MaxHashMB, [this](const Option& o) { @@ -156,6 +157,10 @@ void Engine::set_on_bestmove(std::function&& f) { + onVerifyNetworks = std::move(f); +} + void Engine::wait_for_search_finished() { threads.main_thread()->wait_for_search_finished(); } void Engine::set_position(const std::string& fen, const std::vector& moves) { @@ -226,8 +231,8 @@ void Engine::set_ponderhit(bool b) { threads.main_manager()->ponder = b; } // network related void Engine::verify_networks() const { - networks->big.verify(options["EvalFile"]); - networks->small.verify(options["EvalFileSmall"]); + networks->big.verify(options["EvalFile"], onVerifyNetworks); + networks->small.verify(options["EvalFileSmall"], onVerifyNetworks); } void Engine::load_networks() { @@ -285,6 +290,8 @@ std::string Engine::visualize() const { return ss.str(); } +int Engine::get_hashfull(int maxAge) const { return tt.hashfull(maxAge); } + std::vector> Engine::get_bound_thread_count_by_numa_node() const { auto counts = threads.get_bound_thread_count_by_numa_node(); const NumaConfig& cfg = numaContext.get_numa_config(); @@ -310,15 +317,9 @@ std::string Engine::numa_config_information_as_string() const { std::string Engine::thread_binding_information_as_string() const { auto boundThreadsByNode = get_bound_thread_count_by_numa_node(); std::stringstream ss; - - size_t threadsSize = threads.size(); - ss << "Using " << threadsSize << (threadsSize > 1 ? " threads" : " thread"); - if (boundThreadsByNode.empty()) return ss.str(); - ss << " with NUMA node thread binding: "; - bool isFirst = true; for (auto&& [current, total] : boundThreadsByNode) @@ -332,4 +333,20 @@ std::string Engine::thread_binding_information_as_string() const { return ss.str(); } +std::string Engine::thread_allocation_information_as_string() const { + std::stringstream ss; + + size_t threadsSize = threads.size(); + ss << "Using " << threadsSize << (threadsSize > 1 ? " threads" : " thread"); + + auto boundThreadsByNodeStr = thread_binding_information_as_string(); + if (boundThreadsByNodeStr.empty()) + return ss.str(); + + ss << " with NUMA node thread binding: "; + ss << boundThreadsByNodeStr; + + return ss.str(); +} + } diff --git a/src/engine.h b/src/engine.h index efab1c6a..25782693 100644 --- a/src/engine.h +++ b/src/engine.h @@ -81,6 +81,7 @@ class Engine { void set_on_update_full(std::function&&); void set_on_iter(std::function&&); void set_on_bestmove(std::function&&); + void set_on_verify_networks(std::function&&); // network related @@ -97,12 +98,15 @@ class Engine { const OptionsMap& get_options() const; OptionsMap& get_options(); + int get_hashfull(int maxAge = 0) const; + std::string fen() const; void flip(); std::string visualize() const; std::vector> get_bound_thread_count_by_numa_node() const; std::string get_numa_config_as_string() const; std::string numa_config_information_as_string() const; + std::string thread_allocation_information_as_string() const; std::string thread_binding_information_as_string() const; private: @@ -119,7 +123,8 @@ class Engine { TranspositionTable tt; LazyNumaReplicated networks; - Search::SearchManager::UpdateContext updateContext; + Search::SearchManager::UpdateContext updateContext; + std::function onVerifyNetworks; }; } // namespace Stockfish diff --git a/src/memory.cpp b/src/memory.cpp index ae303c53..47c901b4 100644 --- a/src/memory.cpp +++ b/src/memory.cpp @@ -212,6 +212,37 @@ void* aligned_large_pages_alloc(size_t allocSize) { #endif +bool has_large_pages() { + +#if defined(_WIN32) + + constexpr size_t page_size = 2 * 1024 * 1024; // 2MB page size assumed + void* mem = aligned_large_pages_alloc_windows(page_size); + if (mem == nullptr) + { + return false; + } + else + { + aligned_large_pages_free(mem); + return true; + } + +#elif defined(__linux__) + + #if defined(MADV_HUGEPAGE) + return true; + #else + return false; + #endif + +#else + + return false; + +#endif +} + // aligned_large_pages_free() will free the previously memory allocated // by aligned_large_pages_alloc(). The effect is a nop if mem == nullptr. diff --git a/src/memory.h b/src/memory.h index 3155a5aa..eaf0261a 100644 --- a/src/memory.h +++ b/src/memory.h @@ -38,6 +38,8 @@ void std_aligned_free(void* ptr); void* aligned_large_pages_alloc(size_t size); void aligned_large_pages_free(void* mem); +bool has_large_pages(); + // Frees memory which was placed there with placement new. // Works for both single objects and arrays of unknown bound. template diff --git a/src/misc.cpp b/src/misc.cpp index 664ab4b8..10c86b7a 100644 --- a/src/misc.cpp +++ b/src/misc.cpp @@ -122,7 +122,7 @@ class Logger { // // For releases (non-dev builds) we only include the version number: // Stockfish version -std::string engine_info(bool to_uci) { +std::string engine_version_info() { std::stringstream ss; ss << "Stockfish " << version << std::setfill('0'); @@ -151,11 +151,14 @@ std::string engine_info(bool to_uci) { #endif } - ss << (to_uci ? "\nid author " : " by ") << "the Stockfish developers (see AUTHORS file)"; - return ss.str(); } +std::string engine_info(bool to_uci) { + return engine_version_info() + (to_uci ? "\nid author " : " by ") + + "the Stockfish developers (see AUTHORS file)"; +} + // Returns a string trying to describe the compiler we use std::string compiler_info() { @@ -451,7 +454,7 @@ void remove_whitespace(std::string& s) { s.erase(std::remove_if(s.begin(), s.end(), [](char c) { return std::isspace(c); }), s.end()); } -bool is_whitespace(const std::string& s) { +bool is_whitespace(std::string_view s) { return std::all_of(s.begin(), s.end(), [](char c) { return std::isspace(c); }); } diff --git a/src/misc.h b/src/misc.h index ce49a1f6..21093769 100644 --- a/src/misc.h +++ b/src/misc.h @@ -28,6 +28,7 @@ #include #include #include +#include #include #define stringify2(x) #x @@ -35,6 +36,7 @@ namespace Stockfish { +std::string engine_version_info(); std::string engine_info(bool to_uci = false); std::string compiler_info(); @@ -79,8 +81,8 @@ inline TimePoint now() { .count(); } -inline std::vector split(const std::string& s, const std::string& delimiter) { - std::vector res; +inline std::vector split(std::string_view s, std::string_view delimiter) { + std::vector res; if (s.empty()) return res; @@ -102,7 +104,7 @@ inline std::vector split(const std::string& s, const std::string& d } void remove_whitespace(std::string& s); -bool is_whitespace(const std::string& s); +bool is_whitespace(std::string_view s); enum SyncCout { IO_LOCK, diff --git a/src/nnue/network.cpp b/src/nnue/network.cpp index f7d2cc6a..a8e901a0 100644 --- a/src/nnue/network.cpp +++ b/src/nnue/network.cpp @@ -234,35 +234,44 @@ Network::evaluate(const Position& pos template -void Network::verify(std::string evalfilePath) const { +void Network::verify(std::string evalfilePath, + const std::function& f) const { if (evalfilePath.empty()) evalfilePath = evalFile.defaultName; if (evalFile.current != evalfilePath) { - std::string msg1 = - "Network evaluation parameters compatible with the engine must be available."; - std::string msg2 = "The network file " + evalfilePath + " was not loaded successfully."; - std::string msg3 = "The UCI option EvalFile might need to specify the full path, " - "including the directory name, to the network file."; - std::string msg4 = "The default net can be downloaded from: " - "https://tests.stockfishchess.org/api/nn/" - + evalFile.defaultName; - std::string msg5 = "The engine will be terminated now."; + if (f) + { + std::string msg1 = + "Network evaluation parameters compatible with the engine must be available."; + std::string msg2 = "The network file " + evalfilePath + " was not loaded successfully."; + std::string msg3 = "The UCI option EvalFile might need to specify the full path, " + "including the directory name, to the network file."; + std::string msg4 = "The default net can be downloaded from: " + "https://tests.stockfishchess.org/api/nn/" + + evalFile.defaultName; + std::string msg5 = "The engine will be terminated now."; + + std::string msg = "ERROR: " + msg1 + '\n' + "ERROR: " + msg2 + '\n' + "ERROR: " + msg3 + + '\n' + "ERROR: " + msg4 + '\n' + "ERROR: " + msg5 + '\n'; + + f(msg); + } - sync_cout << "info string ERROR: " << msg1 << sync_endl; - sync_cout << "info string ERROR: " << msg2 << sync_endl; - sync_cout << "info string ERROR: " << msg3 << sync_endl; - sync_cout << "info string ERROR: " << msg4 << sync_endl; - sync_cout << "info string ERROR: " << msg5 << sync_endl; exit(EXIT_FAILURE); } - size_t size = sizeof(*featureTransformer) + sizeof(Arch) * LayerStacks; - sync_cout << "info string NNUE evaluation using " << evalfilePath << " (" - << size / (1024 * 1024) << "MiB, (" << featureTransformer->InputDimensions << ", " - << network[0].TransformedFeatureDimensions << ", " << network[0].FC_0_OUTPUTS << ", " - << network[0].FC_1_OUTPUTS << ", 1))" << sync_endl; + if (f) + { + size_t size = sizeof(*featureTransformer) + sizeof(Arch) * LayerStacks; + f("info string NNUE evaluation using " + evalfilePath + " (" + + std::to_string(size / (1024 * 1024)) + "MiB, (" + + std::to_string(featureTransformer->InputDimensions) + ", " + + std::to_string(network[0].TransformedFeatureDimensions) + ", " + + std::to_string(network[0].FC_0_OUTPUTS) + ", " + std::to_string(network[0].FC_1_OUTPUTS) + + ", 1))"); + } } diff --git a/src/nnue/network.h b/src/nnue/network.h index 15208255..95253595 100644 --- a/src/nnue/network.h +++ b/src/nnue/network.h @@ -20,9 +20,11 @@ #define NETWORK_H_INCLUDED #include +#include #include #include #include +#include #include #include @@ -68,7 +70,7 @@ class Network { void hint_common_access(const Position& pos, AccumulatorCaches::Cache* cache) const; - void verify(std::string evalfilePath) const; + void verify(std::string evalfilePath, const std::function&) const; NnueEvalTrace trace_evaluate(const Position& pos, AccumulatorCaches::Cache* cache) const; diff --git a/src/numa.h b/src/numa.h index db835922..1063721e 100644 --- a/src/numa.h +++ b/src/numa.h @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -653,7 +654,7 @@ class NumaConfig { NumaIndex n = 0; for (auto&& nodeStr : split(s, ":")) { - auto indices = indices_from_shortened_string(nodeStr); + auto indices = indices_from_shortened_string(std::string(nodeStr)); if (!indices.empty()) { for (auto idx : indices) @@ -1015,7 +1016,7 @@ class NumaConfig { if (s.empty()) return indices; - for (const std::string& ss : split(s, ",")) + for (const auto& ss : split(s, ",")) { if (ss.empty()) continue; @@ -1023,13 +1024,13 @@ class NumaConfig { auto parts = split(ss, "-"); if (parts.size() == 1) { - const CpuIndex c = CpuIndex{str_to_size_t(parts[0])}; + const CpuIndex c = CpuIndex{str_to_size_t(std::string(parts[0]))}; indices.emplace_back(c); } else if (parts.size() == 2) { - const CpuIndex cfirst = CpuIndex{str_to_size_t(parts[0])}; - const CpuIndex clast = CpuIndex{str_to_size_t(parts[1])}; + const CpuIndex cfirst = CpuIndex{str_to_size_t(std::string(parts[0]))}; + const CpuIndex clast = CpuIndex{str_to_size_t(std::string(parts[1]))}; for (size_t c = cfirst; c <= clast; ++c) { indices.emplace_back(c); diff --git a/src/tt.cpp b/src/tt.cpp index 4b55e53f..50750753 100644 --- a/src/tt.cpp +++ b/src/tt.cpp @@ -193,13 +193,20 @@ void TranspositionTable::clear(ThreadPool& threads) { // Returns an approximation of the hashtable // occupation during a search. The hash is x permill full, as per UCI protocol. // Only counts entries which match the current generation. -int TranspositionTable::hashfull() const { - +int TranspositionTable::hashfull(int maxAge) const { int cnt = 0; for (int i = 0; i < 1000; ++i) for (int j = 0; j < ClusterSize; ++j) - cnt += table[i].entry[j].is_occupied() - && (table[i].entry[j].genBound8 & GENERATION_MASK) == generation8; + { + if (table[i].entry[j].is_occupied()) + { + int age = (generation8 >> GENERATION_BITS) + - ((table[i].entry[j].genBound8 & GENERATION_MASK) >> GENERATION_BITS); + if (age < 0) + age += 1 << (8 - GENERATION_BITS); + cnt += age <= maxAge; + } + } return cnt / ClusterSize; } diff --git a/src/tt.h b/src/tt.h index 1bece002..e7bb5c45 100644 --- a/src/tt.h +++ b/src/tt.h @@ -73,7 +73,7 @@ class TranspositionTable { void resize(size_t mbSize, ThreadPool& threads); // Set TT size void clear(ThreadPool& threads); // Re-initialize memory, multithreaded - int hashfull() + int hashfull(int maxAge = 0) const; // Approximate what fraction of entries (permille) have been written to during this root search void diff --git a/src/uci.cpp b/src/uci.cpp index c94f8b91..cfb34db7 100644 --- a/src/uci.cpp +++ b/src/uci.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -30,6 +31,7 @@ #include "benchmark.h" #include "engine.h" +#include "memory.h" #include "movegen.h" #include "position.h" #include "score.h" @@ -39,6 +41,8 @@ namespace Stockfish { +constexpr auto BenchmarkCommand = "speedtest"; + constexpr auto StartFEN = "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1"; template struct overload: Ts... { @@ -48,7 +52,7 @@ struct overload: Ts... { template overload(Ts...) -> overload; -void UCIEngine::print_info_string(const std::string& str) { +void UCIEngine::print_info_string(std::string_view str) { sync_cout_start(); for (auto& line : split(str, "\n")) { @@ -69,11 +73,16 @@ UCIEngine::UCIEngine(int argc, char** argv) : print_info_string(*str); }); + init_search_update_listeners(); +} + +void UCIEngine::init_search_update_listeners() { engine.set_on_iter([](const auto& i) { on_iter(i); }); engine.set_on_update_no_moves([](const auto& i) { on_update_no_moves(i); }); engine.set_on_update_full( [this](const auto& i) { on_update_full(i, engine.get_options()["UCI_ShowWDL"]); }); engine.set_on_bestmove([](const auto& bm, const auto& p) { on_bestmove(bm, p); }); + engine.set_on_verify_networks([](const auto& s) { print_info_string(s); }); } void UCIEngine::loop() { @@ -117,7 +126,7 @@ void UCIEngine::loop() { { // send info strings after the go command is sent for old GUIs and python-chess print_info_string(engine.numa_config_information_as_string()); - print_info_string(engine.thread_binding_information_as_string()); + print_info_string(engine.thread_allocation_information_as_string()); go(is); } else if (token == "position") @@ -133,6 +142,8 @@ void UCIEngine::loop() { engine.flip(); else if (token == "bench") bench(is); + else if (token == BenchmarkCommand) + benchmark(is); else if (token == "d") sync_cout << engine.visualize() << sync_endl; else if (token == "eval") @@ -285,6 +296,165 @@ void UCIEngine::bench(std::istream& args) { engine.set_on_update_full([&](const auto& i) { on_update_full(i, options["UCI_ShowWDL"]); }); } +void UCIEngine::benchmark(std::istream& args) { + // Probably not very important for a test this long, but include for completeness and sanity. + static constexpr int NUM_WARMUP_POSITIONS = 3; + + std::string token; + uint64_t nodes = 0, cnt = 1; + uint64_t nodesSearched = 0; + + engine.set_on_update_full([&](const Engine::InfoFull& i) { nodesSearched = i.nodes; }); + + engine.set_on_iter([](const auto&) {}); + engine.set_on_update_no_moves([](const auto&) {}); + engine.set_on_bestmove([](const auto&, const auto&) {}); + engine.set_on_verify_networks([](const auto&) {}); + + Benchmark::BenchmarkSetup setup = Benchmark::setup_benchmark(args); + + const int numGoCommands = count_if(setup.commands.begin(), setup.commands.end(), + [](const std::string& s) { return s.find("go ") == 0; }); + + TimePoint totalTime = 0; + + // Set options once at the start. + auto ss = std::istringstream("name Threads value " + std::to_string(setup.threads)); + setoption(ss); + ss = std::istringstream("name Hash value " + std::to_string(setup.ttSize)); + setoption(ss); + ss = std::istringstream("name UCI_Chess960 value false"); + setoption(ss); + + // Warmup + for (const auto& cmd : setup.commands) + { + std::istringstream is(cmd); + is >> std::skipws >> token; + + if (token == "go") + { + // One new line is produced by the search, so omit it here + std::cerr << "\rWarmup position " << cnt++ << '/' << NUM_WARMUP_POSITIONS; + + Search::LimitsType limits = parse_limits(is); + + TimePoint elapsed = now(); + + // Run with silenced network verification + engine.go(limits); + engine.wait_for_search_finished(); + + totalTime += now() - elapsed; + + nodes += nodesSearched; + nodesSearched = 0; + } + else if (token == "position") + position(is); + else if (token == "ucinewgame") + { + engine.search_clear(); // search_clear may take a while + } + + if (cnt > NUM_WARMUP_POSITIONS) + break; + } + + std::cerr << "\n"; + + cnt = 1; + nodes = 0; + + int numHashfullReadings = 0; + constexpr int hashfullAges[] = {0, 999}; // Only normal hashfull and touched hash. + int totalHashfull[std::size(hashfullAges)] = {0}; + int maxHashfull[std::size(hashfullAges)] = {0}; + + auto updateHashfullReadings = [&]() { + numHashfullReadings += 1; + + for (int i = 0; i < static_cast(std::size(hashfullAges)); ++i) + { + const int hashfull = engine.get_hashfull(hashfullAges[i]); + maxHashfull[i] = std::max(maxHashfull[i], hashfull); + totalHashfull[i] += hashfull; + } + }; + + engine.search_clear(); // search_clear may take a while + + for (const auto& cmd : setup.commands) + { + std::istringstream is(cmd); + is >> std::skipws >> token; + + if (token == "go") + { + // One new line is produced by the search, so omit it here + std::cerr << "\rPosition " << cnt++ << '/' << numGoCommands; + + Search::LimitsType limits = parse_limits(is); + + TimePoint elapsed = now(); + + // Run with silenced network verification + engine.go(limits); + engine.wait_for_search_finished(); + + totalTime += now() - elapsed; + + updateHashfullReadings(); + + nodes += nodesSearched; + nodesSearched = 0; + } + else if (token == "position") + position(is); + else if (token == "ucinewgame") + { + engine.search_clear(); // search_clear may take a while + } + } + + totalTime = std::max(totalTime, 1); // Ensure positivity to avoid a 'divide by zero' + + dbg_print(); + + std::cerr << "\n"; + + static_assert( + std::size(hashfullAges) == 2 && hashfullAges[0] == 0 && hashfullAges[1] == 999, + "Hardcoded for display. Would complicate the code needlessly in the current state."); + + std::string threadBinding = engine.thread_binding_information_as_string(); + if (threadBinding.empty()) + threadBinding = "none"; + + std::cerr << "===========================" + << "\nVersion : " + << engine_version_info() + // "\nCompiled by : " + << compiler_info() + << "Large pages : " << (has_large_pages() ? "yes" : "no") + << "\nUser invocation : " << BenchmarkCommand << " " + << setup.originalInvocation << "\nFilled invocation : " << BenchmarkCommand + << " " << setup.filledInvocation + << "\nAvailable processors : " << engine.get_numa_config_as_string() + << "\nThread count : " << setup.threads + << "\nThread binding : " << threadBinding + << "\nTT size [MiB] : " << setup.ttSize + << "\nHash max, avg [per mille] : " + << "\n single search : " << maxHashfull[0] << ", " + << totalHashfull[0] / numHashfullReadings + << "\n single game : " << maxHashfull[1] << ", " + << totalHashfull[1] / numHashfullReadings + << "\nTotal nodes searched : " << nodes + << "\nTotal search time [s] : " << totalTime / 1000.0 + << "\nNodes/second : " << 1000 * nodes / totalTime << std::endl; + + init_search_update_listeners(); +} void UCIEngine::setoption(std::istringstream& is) { engine.wait_for_search_finished(); diff --git a/src/uci.h b/src/uci.h index 23745f96..6adf74cb 100644 --- a/src/uci.h +++ b/src/uci.h @@ -58,10 +58,11 @@ class UCIEngine { Engine engine; CommandLine cli; - static void print_info_string(const std::string& str); + static void print_info_string(std::string_view str); void go(std::istringstream& is); void bench(std::istream& args); + void benchmark(std::istream& args); void position(std::istringstream& is); void setoption(std::istringstream& is); std::uint64_t perft(const Search::LimitsType&); @@ -70,6 +71,8 @@ class UCIEngine { static void on_update_full(const Engine::InfoFull& info, bool showWDL); static void on_iter(const Engine::InfoIter& info); static void on_bestmove(std::string_view bestmove, std::string_view ponder); + + void init_search_update_listeners(); }; } // namespace Stockfish From 56444ce1f7e2204d69c35f5826f74130adc77b2c Mon Sep 17 00:00:00 2001 From: peregrineshahin <41402573+peregrineshahin@users.noreply.github.com> Date: Fri, 20 Sep 2024 14:09:03 +0300 Subject: [PATCH 257/315] Push expected cutting late moves up in the move ordering. since the passing of the LMR verification is coming from a relatively late move this means we have wasted some time trying/picking other moves, and it would make sense to push it up in the move ordering for future positions not to be as late. Passed STC: https://tests.stockfishchess.org/tests/view/66f0f69186d5ee47d953b2aa LLR: 2.93 (-2.94,2.94) <0.00,2.00> Total: 34144 W: 9024 L: 8709 D: 16411 Ptnml(0-2): 137, 3875, 8732, 4192, 136 Passed LTC: https://tests.stockfishchess.org/tests/view/66f1d84a86d5ee47d953b325 LLR: 2.94 (-2.94,2.94) <0.50,2.50> Total: 62808 W: 16054 L: 15684 D: 31070 Ptnml(0-2): 24, 6725, 17555, 7057, 43 closes https://github.com/official-stockfish/Stockfish/pull/5608 bench: 1452807 --- src/search.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index d87a6b9a..7d84bd38 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1201,8 +1201,8 @@ moves_loop: // When in check, search starts here value = -search(pos, ss + 1, -(alpha + 1), -alpha, newDepth, !cutNode); // Post LMR continuation history updates (~1 Elo) - int bonus = value >= beta ? stat_bonus(newDepth) : -stat_malus(newDepth); - + int bonus = value >= beta ? (1 + 2 * (moveCount > depth)) * stat_bonus(newDepth) + : -stat_malus(newDepth); update_continuation_histories(ss, movedPiece, move.to_sq(), bonus); } } From d6043970bd156b1d2ab6cb51e8d5cb0c6a40797c Mon Sep 17 00:00:00 2001 From: Shawn Xu Date: Tue, 17 Sep 2024 14:29:55 -0700 Subject: [PATCH 258/315] Make Correction History Size Uniform Passed Non-regression STC: LLR: 2.93 (-2.94,2.94) <-1.75,0.25> Total: 207232 W: 53834 L: 53802 D: 99596 Ptnml(0-2): 695, 24486, 53200, 24562, 673 https://tests.stockfishchess.org/tests/view/66e9f5a886d5ee47d953ada1 Passed Non-regression LTC: LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 99120 W: 25264 L: 25123 D: 48733 Ptnml(0-2): 66, 10803, 27675, 10956, 60 https://tests.stockfishchess.org/tests/view/66ed7ebc86d5ee47d953b056 Passed Non-regression LTC vs #5606: LLR: 2.96 (-2.94,2.94) <-1.75,0.25> Total: 208950 W: 53049 L: 53019 D: 102882 Ptnml(0-2): 111, 23232, 57760, 23260, 112 https://tests.stockfishchess.org/tests/view/66f1843886d5ee47d953b2f2 closes https://github.com/official-stockfish/Stockfish/pull/5609 bench 1575189 --- src/movepick.h | 32 ++++++++++++++------------------ src/search.cpp | 3 +-- 2 files changed, 15 insertions(+), 20 deletions(-) diff --git a/src/movepick.h b/src/movepick.h index 13b9635b..c5e565fe 100644 --- a/src/movepick.h +++ b/src/movepick.h @@ -34,18 +34,14 @@ namespace Stockfish { -constexpr int PAWN_HISTORY_SIZE = 512; // has to be a power of 2 -constexpr int PAWN_CORRECTION_HISTORY_SIZE = 16384; // has to be a power of 2 -constexpr int MATERIAL_CORRECTION_HISTORY_SIZE = 32768; // has to be a power of 2 -constexpr int MAJOR_PIECE_CORRECTION_HISTORY_SIZE = 32768; // has to be a power of 2 -constexpr int MINOR_PIECE_CORRECTION_HISTORY_SIZE = 32768; // has to be a power of 2 -constexpr int NON_PAWN_CORRECTION_HISTORY_SIZE = 32768; // has to be a power of 2 -constexpr int CORRECTION_HISTORY_LIMIT = 1024; +constexpr int PAWN_HISTORY_SIZE = 512; // has to be a power of 2 +constexpr int CORRECTION_HISTORY_SIZE = 32768; // has to be a power of 2 +constexpr int CORRECTION_HISTORY_LIMIT = 1024; static_assert((PAWN_HISTORY_SIZE & (PAWN_HISTORY_SIZE - 1)) == 0, "PAWN_HISTORY_SIZE has to be a power of 2"); -static_assert((PAWN_CORRECTION_HISTORY_SIZE & (PAWN_CORRECTION_HISTORY_SIZE - 1)) == 0, +static_assert((CORRECTION_HISTORY_SIZE & (CORRECTION_HISTORY_SIZE - 1)) == 0, "CORRECTION_HISTORY_SIZE has to be a power of 2"); enum PawnHistoryType { @@ -55,24 +51,24 @@ enum PawnHistoryType { template inline int pawn_structure_index(const Position& pos) { - return pos.pawn_key() & ((T == Normal ? PAWN_HISTORY_SIZE : PAWN_CORRECTION_HISTORY_SIZE) - 1); + return pos.pawn_key() & ((T == Normal ? PAWN_HISTORY_SIZE : CORRECTION_HISTORY_SIZE) - 1); } inline int material_index(const Position& pos) { - return pos.material_key() & (MATERIAL_CORRECTION_HISTORY_SIZE - 1); + return pos.material_key() & (CORRECTION_HISTORY_SIZE - 1); } inline int major_piece_index(const Position& pos) { - return pos.major_piece_key() & (MAJOR_PIECE_CORRECTION_HISTORY_SIZE - 1); + return pos.major_piece_key() & (CORRECTION_HISTORY_SIZE - 1); } inline int minor_piece_index(const Position& pos) { - return pos.minor_piece_key() & (MINOR_PIECE_CORRECTION_HISTORY_SIZE - 1); + return pos.minor_piece_key() & (CORRECTION_HISTORY_SIZE - 1); } template inline int non_pawn_index(const Position& pos) { - return pos.non_pawn_key(c) & (NON_PAWN_CORRECTION_HISTORY_SIZE - 1); + return pos.non_pawn_key(c) & (CORRECTION_HISTORY_SIZE - 1); } // StatsEntry stores the stat table value. It is usually a number but could @@ -161,23 +157,23 @@ using PawnHistory = Stats // PawnCorrectionHistory is addressed by color and pawn structure using PawnCorrectionHistory = - Stats; + Stats; // MaterialCorrectionHistory is addressed by color and material configuration using MaterialCorrectionHistory = - Stats; + Stats; // MajorPieceCorrectionHistory is addressed by color and king/major piece (Queen, Rook) positions using MajorPieceCorrectionHistory = - Stats; + Stats; // MinorPieceCorrectionHistory is addressed by color and king/minor piece (Knight, Bishop) positions using MinorPieceCorrectionHistory = - Stats; + Stats; // NonPawnCorrectionHistory is addressed by color and non-pawn material positions using NonPawnCorrectionHistory = - Stats; + Stats; // The MovePicker class is used to pick one pseudo-legal move at a time from the // current position. The most important method is next_move(), which emits one diff --git a/src/search.cpp b/src/search.cpp index 7d84bd38..4d581a85 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -88,8 +88,7 @@ Value to_corrected_static_eval(Value v, const Worker& w, const Position& pos) { const auto wnpcv = w.nonPawnCorrectionHistory[WHITE][us][non_pawn_index(pos)]; const auto bnpcv = w.nonPawnCorrectionHistory[BLACK][us][non_pawn_index(pos)]; const auto cv = - (99916 * pcv + 55067 * mcv + 55530 * macv + 95324 * micv + 105056 * (wnpcv + bnpcv)) - / 2097152; + (6245 * pcv + 3442 * mcv + 3471 * macv + 5958 * micv + 6566 * (wnpcv + bnpcv)) / 131072; v += cv; return std::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1); } From c85f802185dd223bae1197269d17b9b1d5e935a0 Mon Sep 17 00:00:00 2001 From: Taras Vuk <117687515+TarasVuk@users.noreply.github.com> Date: Mon, 30 Sep 2024 18:58:48 +0200 Subject: [PATCH 259/315] Tweak ttCapture reduction More reduction at shallow depth for quiet moves when ttMove is a capture. Passed STC: LLR: 2.93 (-2.94,2.94) <0.00,2.00> Total: 365728 W: 95896 L: 95090 D: 174742 Ptnml(0-2): 1283, 43133, 93262, 43867, 1319 https://tests.stockfishchess.org/tests/view/66edd35986d5ee47d953b0d5 Passed LTC: LLR: 2.96 (-2.94,2.94) <0.50,2.50> Total: 200526 W: 51197 L: 50540 D: 98789 Ptnml(0-2): 119, 21952, 55462, 22613, 117 https://tests.stockfishchess.org/tests/view/66f405dc86d5ee47d953b460 closes https://github.com/official-stockfish/Stockfish/pull/5610 bench: 1269487 --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index 4d581a85..a206cdda 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1157,7 +1157,7 @@ moves_loop: // When in check, search starts here // Increase reduction if ttMove is a capture but the current move is not a capture (~3 Elo) if (ttCapture && !capture) - r++; + r += 1 + (depth < 8); // Increase reduction if next ply has a lot of fail high (~5 Elo) if ((ss + 1)->cutoffCnt > 3) From 2b9154882a0e924c28d4de7b98309d889334428c Mon Sep 17 00:00:00 2001 From: Linmiao Xu Date: Mon, 16 Sep 2024 01:49:04 -0400 Subject: [PATCH 260/315] Tweak 7 eval params Values found from 120k / 120k spsa games at 30+0.3 Passed STC: https://tests.stockfishchess.org/tests/view/66ecd7ce86d5ee47d953b003 LLR: 2.93 (-2.94,2.94) <0.00,2.00> Total: 241312 W: 62994 L: 62373 D: 115945 Ptnml(0-2): 754, 28684, 61280, 29063, 875 Passed LTC: https://tests.stockfishchess.org/tests/view/66f1f3a286d5ee47d953b331 LLR: 2.94 (-2.94,2.94) <0.50,2.50> Total: 304896 W: 77580 L: 76709 D: 150607 Ptnml(0-2): 198, 33413, 84360, 34274, 203 closes https://github.com/official-stockfish/Stockfish/pull/5611 bench 1173651 --- src/evaluate.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index 221ccde8..087765e3 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -77,11 +77,11 @@ Value Eval::evaluate(const Eval::NNUE::Networks& networks, // Blend optimism and eval with nnue complexity int nnueComplexity = std::abs(psqt - positional); - optimism += optimism * nnueComplexity / (smallNet ? 433 : 453); - nnue -= nnue * nnueComplexity / (smallNet ? 18815 : 17864); + optimism += optimism * nnueComplexity / (smallNet ? 430 : 474); + nnue -= nnue * nnueComplexity / (smallNet ? 20233 : 17879); int material = (smallNet ? 553 : 532) * pos.count() + pos.non_pawn_material(); - v = (nnue * (73921 + material) + optimism * (8112 + material)) / (smallNet ? 68104 : 74715); + v = (nnue * (76898 + material) + optimism * (8112 + material)) / (smallNet ? 74411 : 76256); // Evaluation grain (to get more alpha-beta cuts) with randomization (for robustness) v = (v / 16) * 16 - 1 + (pos.key() & 0x2); From 0186904f53e6b9c90935cd8fe822da795ca9d333 Mon Sep 17 00:00:00 2001 From: Linmiao Xu Date: Sun, 29 Sep 2024 04:22:37 -0400 Subject: [PATCH 261/315] Remove evaluation grain Passed non-regression STC: https://tests.stockfishchess.org/tests/view/66fa345a86d5ee47d953b86e LLR: 2.96 (-2.94,2.94) <-1.75,0.25> Total: 39776 W: 10528 L: 10306 D: 18942 Ptnml(0-2): 134, 4674, 10063, 4870, 147 Passed non-regression LTC: https://tests.stockfishchess.org/tests/view/66facfb886d5ee47d953b8a8 LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 64230 W: 16484 L: 16305 D: 31441 Ptnml(0-2): 38, 7195, 17483, 7348, 51 closes https://github.com/official-stockfish/Stockfish/pull/5613 bench 1013135 --- src/evaluate.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index 087765e3..b1c7283e 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -83,9 +83,6 @@ Value Eval::evaluate(const Eval::NNUE::Networks& networks, int material = (smallNet ? 553 : 532) * pos.count() + pos.non_pawn_material(); v = (nnue * (76898 + material) + optimism * (8112 + material)) / (smallNet ? 74411 : 76256); - // Evaluation grain (to get more alpha-beta cuts) with randomization (for robustness) - v = (v / 16) * 16 - 1 + (pos.key() & 0x2); - // Damp down the evaluation linearly when shuffling v -= v * pos.rule50_count() / 212; From 7ac745a736a37f69632d6612d422aa3127f85509 Mon Sep 17 00:00:00 2001 From: Michael Chaly Date: Wed, 2 Oct 2024 10:49:23 +0300 Subject: [PATCH 262/315] Refactor root history into low ply history This patch changes root history to low ply history - butterfly history for plies < 4. Doubles weight of this history for root, latter plies have lesser effect. Passed STC: https://tests.stockfishchess.org/tests/view/66f77d2386d5ee47d953b65d LLR: 2.94 (-2.94,2.94) <0.00,2.00> Total: 180992 W: 47362 L: 46830 D: 86800 Ptnml(0-2): 554, 21499, 45928, 21891, 624 Passed LTC: https://tests.stockfishchess.org/tests/view/66fb557986d5ee47d953b8e5 LLR: 2.95 (-2.94,2.94) <0.50,2.50> Total: 42462 W: 11013 L: 10682 D: 20767 Ptnml(0-2): 33, 4518, 11795, 4855, 30 closes https://github.com/official-stockfish/Stockfish/pull/5614 Bench 1264335 --- src/movepick.cpp | 12 +++++----- src/movepick.h | 12 ++++++---- src/search.cpp | 62 +++++++++++++++++++----------------------------- src/search.h | 2 +- 4 files changed, 40 insertions(+), 48 deletions(-) diff --git a/src/movepick.cpp b/src/movepick.cpp index f4ef0e54..1d1aef0f 100644 --- a/src/movepick.cpp +++ b/src/movepick.cpp @@ -82,20 +82,20 @@ MovePicker::MovePicker(const Position& p, Move ttm, Depth d, const ButterflyHistory* mh, - const ButterflyHistory* rh, + const LowPlyHistory* lph, const CapturePieceToHistory* cph, const PieceToHistory** ch, const PawnHistory* ph, - bool rn) : + int pl) : pos(p), mainHistory(mh), - rootHistory(rh), + lowPlyHistory(lph), captureHistory(cph), continuationHistory(ch), pawnHistory(ph), ttMove(ttm), depth(d), - rootNode(rn) { + ply(pl) { if (pos.checkers()) stage = EVASION_TT + !(ttm && pos.pseudo_legal(ttm)); @@ -179,8 +179,8 @@ void MovePicker::score() { : pt == ROOK && bool(to & threatenedByMinor) ? 24335 : 0); - if (rootNode) - m.value += 4 * (*rootHistory)[pos.side_to_move()][m.from_to()]; + if (ply < 4) + m.value += 8 * (*lowPlyHistory)[ply][m.from_to()] / (1 + 2 * ply); } else // Type == EVASIONS diff --git a/src/movepick.h b/src/movepick.h index c5e565fe..8deefd14 100644 --- a/src/movepick.h +++ b/src/movepick.h @@ -135,6 +135,10 @@ enum StatsType { // see https://www.chessprogramming.org/Butterfly_Boards (~11 elo) using ButterflyHistory = Stats; +// LowPlyHistory is adressed by play and move's from and to squares, used +// to improve move ordering near the root +using LowPlyHistory = Stats; + // CapturePieceToHistory is addressed by a move's [piece][to][captured piece type] using CapturePieceToHistory = Stats; @@ -195,11 +199,11 @@ class MovePicker { Move, Depth, const ButterflyHistory*, - const ButterflyHistory*, + const LowPlyHistory*, const CapturePieceToHistory*, const PieceToHistory**, const PawnHistory*, - bool); + int); MovePicker(const Position&, Move, int, const CapturePieceToHistory*); Move next_move(bool skipQuiets = false); @@ -213,7 +217,7 @@ class MovePicker { const Position& pos; const ButterflyHistory* mainHistory; - const ButterflyHistory* rootHistory; + const LowPlyHistory* lowPlyHistory; const CapturePieceToHistory* captureHistory; const PieceToHistory** continuationHistory; const PawnHistory* pawnHistory; @@ -222,7 +226,7 @@ class MovePicker { int stage; int threshold; Depth depth; - bool rootNode; + int ply; ExtMove moves[MAX_MOVES]; }; diff --git a/src/search.cpp b/src/search.cpp index a206cdda..0ed7b6a7 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -105,21 +105,16 @@ Value value_to_tt(Value v, int ply); Value value_from_tt(Value v, int ply, int r50c); void update_pv(Move* pv, Move move, const Move* childPv); void update_continuation_histories(Stack* ss, Piece pc, Square to, int bonus); -void update_quiet_histories(const Position& pos, - Stack* ss, - Search::Worker& workerThread, - Move move, - int bonus, - bool rootNode); -void update_all_stats(const Position& pos, - Stack* ss, - Search::Worker& workerThread, - Move bestMove, - Square prevSq, - ValueList& quietsSearched, - ValueList& capturesSearched, - Depth depth, - bool rootNode); +void update_quiet_histories( + const Position& pos, Stack* ss, Search::Worker& workerThread, Move move, int bonus); +void update_all_stats(const Position& pos, + Stack* ss, + Search::Worker& workerThread, + Move bestMove, + Square prevSq, + ValueList& quietsSearched, + ValueList& capturesSearched, + Depth depth); } // namespace @@ -273,7 +268,7 @@ void Search::Worker::iterative_deepening() { int searchAgainCounter = 0; - rootHistory.fill(0); + lowPlyHistory.fill(0); // Iterative deepening loop until requested to stop or the target depth is reached while (++rootDepth < MAX_PLY && !threads.stop @@ -499,7 +494,7 @@ void Search::Worker::iterative_deepening() { // Reset histories, usually before a new game void Search::Worker::clear() { mainHistory.fill(0); - rootHistory.fill(0); + lowPlyHistory.fill(0); captureHistory.fill(-753); pawnHistory.fill(-1152); pawnCorrectionHistory.fill(0); @@ -638,7 +633,7 @@ Value Search::Worker::search( { // Bonus for a quiet ttMove that fails high (~2 Elo) if (!ttCapture) - update_quiet_histories(pos, ss, *this, ttData.move, stat_bonus(depth), rootNode); + update_quiet_histories(pos, ss, *this, ttData.move, stat_bonus(depth)); // Extra penalty for early quiet moves of // the previous ply (~1 Elo on STC, ~2 Elo on LTC) @@ -928,8 +923,8 @@ moves_loop: // When in check, search starts here (ss - 6)->continuationHistory}; - MovePicker mp(pos, ttData.move, depth, &thisThread->mainHistory, &thisThread->rootHistory, - &thisThread->captureHistory, contHist, &thisThread->pawnHistory, rootNode); + MovePicker mp(pos, ttData.move, depth, &thisThread->mainHistory, &thisThread->lowPlyHistory, + &thisThread->captureHistory, contHist, &thisThread->pawnHistory, ss->ply); value = bestValue; @@ -1355,8 +1350,7 @@ moves_loop: // When in check, search starts here // If there is a move that produces search value greater than alpha, // we update the stats of searched moves. else if (bestMove) - update_all_stats(pos, ss, *this, bestMove, prevSq, quietsSearched, capturesSearched, depth, - rootNode); + update_all_stats(pos, ss, *this, bestMove, prevSq, quietsSearched, capturesSearched, depth); // Bonus for prior countermove that caused the fail low else if (!priorCapture && prevSq != SQ_NONE) @@ -1557,9 +1551,8 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta) // Initialize a MovePicker object for the current position, and prepare to search // the moves. We presently use two stages of move generator in quiescence search: // captures, or evasions only when in check. - MovePicker mp(pos, ttData.move, DEPTH_QS, &thisThread->mainHistory, &thisThread->rootHistory, - &thisThread->captureHistory, contHist, &thisThread->pawnHistory, - nodeType == Root); + MovePicker mp(pos, ttData.move, DEPTH_QS, &thisThread->mainHistory, &thisThread->lowPlyHistory, + &thisThread->captureHistory, contHist, &thisThread->pawnHistory, ss->ply); // Step 5. Loop through all pseudo-legal moves until no moves remain or a beta // cutoff occurs. @@ -1768,8 +1761,7 @@ void update_all_stats(const Position& pos, Square prevSq, ValueList& quietsSearched, ValueList& capturesSearched, - Depth depth, - bool rootNode) { + Depth depth) { CapturePieceToHistory& captureHistory = workerThread.captureHistory; Piece moved_piece = pos.moved_piece(bestMove); @@ -1780,11 +1772,11 @@ void update_all_stats(const Position& pos, if (!pos.capture_stage(bestMove)) { - update_quiet_histories(pos, ss, workerThread, bestMove, quietMoveBonus, rootNode); + update_quiet_histories(pos, ss, workerThread, bestMove, quietMoveBonus); // Decrease stats for all non-best quiet moves for (Move move : quietsSearched) - update_quiet_histories(pos, ss, workerThread, move, -quietMoveMalus, rootNode); + update_quiet_histories(pos, ss, workerThread, move, -quietMoveMalus); } else { @@ -1826,17 +1818,13 @@ void update_continuation_histories(Stack* ss, Piece pc, Square to, int bonus) { // Updates move sorting heuristics -void update_quiet_histories(const Position& pos, - Stack* ss, - Search::Worker& workerThread, - Move move, - int bonus, - bool rootNode) { +void update_quiet_histories( + const Position& pos, Stack* ss, Search::Worker& workerThread, Move move, int bonus) { Color us = pos.side_to_move(); workerThread.mainHistory[us][move.from_to()] << bonus; - if (rootNode) - workerThread.rootHistory[us][move.from_to()] << bonus; + if (ss->ply < 4) + workerThread.lowPlyHistory[ss->ply][move.from_to()] << bonus; update_continuation_histories(ss, pos.moved_piece(move), move.to_sq(), bonus); diff --git a/src/search.h b/src/search.h index d7a909a8..0761328a 100644 --- a/src/search.h +++ b/src/search.h @@ -278,7 +278,7 @@ class Worker { // Public because they need to be updatable by the stats ButterflyHistory mainHistory; - ButterflyHistory rootHistory; + LowPlyHistory lowPlyHistory; CapturePieceToHistory captureHistory; ContinuationHistory continuationHistory[2][2]; From 81c1d310844e7b41caeabda0d5351ae275d799db Mon Sep 17 00:00:00 2001 From: Taras Vuk <117687515+TarasVuk@users.noreply.github.com> Date: Wed, 2 Oct 2024 15:55:59 +0200 Subject: [PATCH 263/315] Decrease probCutBeta based on opponentWorsening Passed STC: LLR: 2.97 (-2.94,2.94) <0.00,2.00> Total: 62112 W: 16305 L: 15947 D: 29860 Ptnml(0-2): 203, 7226, 15856, 7552, 219 https://tests.stockfishchess.org/tests/view/66f85fc986d5ee47d953b71e Passed LTC: LLR: 2.94 (-2.94,2.94) <0.50,2.50> Total: 129552 W: 33223 L: 32710 D: 63619 Ptnml(0-2): 94, 14250, 35573, 14767, 92 https://tests.stockfishchess.org/tests/view/66f93fef86d5ee47d953b7d2 closes https://github.com/official-stockfish/Stockfish/pull/5615 bench: 1511354 --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index 0ed7b6a7..d79b452d 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -840,7 +840,7 @@ Value Search::Worker::search( // Step 11. ProbCut (~10 Elo) // If we have a good enough capture (or queen promotion) and a reduced search // returns a value much above beta, we can (almost) safely prune the previous move. - probCutBeta = beta + 189 - 53 * improving; + probCutBeta = beta + 189 - 53 * improving - 30 * opponentWorsening; if (!PvNode && depth > 3 && std::abs(beta) < VALUE_TB_WIN_IN_MAX_PLY // If value from transposition table is lower than probCutBeta, don't attempt From 6592b13d56e43c247ac8b0d6f62564b2a4ca72a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20Faruk=20Tutkun?= Date: Fri, 4 Oct 2024 17:46:47 +0300 Subject: [PATCH 264/315] Introduce Continuation Correction History Continuation correction history uses last 2 move to correct static eval. ContCorrHist first introduced by @martinnovaak in Motor(https://github.com/martinnovaak/motor/pull/162). Earlier ideas using last move to correct eval is introduced by @MinusKelvin in Ice4(https://github.com/MinusKelvin/ice4/commit/45daf7d9ea64ea4efaf0d2b4e99f53e12e08c838) Passed STC: LLR: 2.96 (-2.94,2.94) <0.00,2.00> Total: 310144 W: 81267 L: 80538 D: 148339 Ptnml(0-2): 1160, 36607, 78834, 37286, 1185 https://tests.stockfishchess.org/tests/view/66f96cbc86d5ee47d953b7f7 Passed LTC: LLR: 2.94 (-2.94,2.94) <0.50,2.50> Total: 97470 W: 24892 L: 24447 D: 48131 Ptnml(0-2): 63, 10631, 26915, 11050, 76 https://tests.stockfishchess.org/tests/view/66fd59bc86d5ee47d953b9ea closes https://github.com/official-stockfish/Stockfish/pull/5617 Bench: 1143382 --- AUTHORS | 1 + src/movepick.h | 7 +++++++ src/search.cpp | 47 ++++++++++++++++++++++++++++++++++++----------- src/search.h | 36 +++++++++++++++++++----------------- 4 files changed, 63 insertions(+), 28 deletions(-) diff --git a/AUTHORS b/AUTHORS index c0a8beeb..725b3569 100644 --- a/AUTHORS +++ b/AUTHORS @@ -176,6 +176,7 @@ Ofek Shochat (OfekShochat, ghostway) Ondrej Mosnáček (WOnder93) Ondřej Mišina (AndrovT) Oskar Werkelin Ahlin +Ömer Faruk Tutkun (OmerFarukTutkun) Pablo Vazquez Panthee Pascal Romaret diff --git a/src/movepick.h b/src/movepick.h index 8deefd14..9a68aaa1 100644 --- a/src/movepick.h +++ b/src/movepick.h @@ -145,6 +145,9 @@ using CapturePieceToHistory = Stats; +// PieceToCorrectionHistory is addressed by a move's [piece][to] +using PieceToCorrectionHistory = Stats; + // ContinuationHistory is the combined history of a given pair of moves, usually // the current one given a previous one. The nested history table is based on // PieceToHistory instead of ButterflyBoards. @@ -179,6 +182,10 @@ using MinorPieceCorrectionHistory = using NonPawnCorrectionHistory = Stats; +// ContinuationCorrectionHistory is the combined correction history of a given pair of moves +using ContinuationCorrectionHistory = + Stats; + // The MovePicker class is used to pick one pseudo-legal move at a time from the // current position. The most important method is next_move(), which emits one // new pseudo-legal move on every call, until there are no moves left, when diff --git a/src/search.cpp b/src/search.cpp index d79b452d..c55118ec 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -79,16 +79,23 @@ constexpr int futility_move_count(bool improving, Depth depth) { // Add correctionHistory value to raw staticEval and guarantee evaluation // does not hit the tablebase range. -Value to_corrected_static_eval(Value v, const Worker& w, const Position& pos) { +Value to_corrected_static_eval(Value v, const Worker& w, const Position& pos, Stack* ss) { const Color us = pos.side_to_move(); + const auto m = (ss - 1)->currentMove; const auto pcv = w.pawnCorrectionHistory[us][pawn_structure_index(pos)]; const auto mcv = w.materialCorrectionHistory[us][material_index(pos)]; const auto macv = w.majorPieceCorrectionHistory[us][major_piece_index(pos)]; const auto micv = w.minorPieceCorrectionHistory[us][minor_piece_index(pos)]; const auto wnpcv = w.nonPawnCorrectionHistory[WHITE][us][non_pawn_index(pos)]; const auto bnpcv = w.nonPawnCorrectionHistory[BLACK][us][non_pawn_index(pos)]; - const auto cv = - (6245 * pcv + 3442 * mcv + 3471 * macv + 5958 * micv + 6566 * (wnpcv + bnpcv)) / 131072; + int cntcv = 1; + + if (m.is_ok()) + cntcv = int((*(ss - 2)->continuationCorrectionHistory)[pos.piece_on(m.to_sq())][m.to_sq()]); + + const auto cv = + (5932 * pcv + 2994 * mcv + 3269 * macv + 5660 * micv + 6237 * (wnpcv + bnpcv) + cntcv * 5555) + / 131072; v += cv; return std::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1); } @@ -240,7 +247,8 @@ void Search::Worker::iterative_deepening() { { (ss - i)->continuationHistory = &this->continuationHistory[0][0][NO_PIECE][0]; // Use as a sentinel - (ss - i)->staticEval = VALUE_NONE; + (ss - i)->continuationCorrectionHistory = &this->continuationCorrectionHistory[NO_PIECE][0]; + (ss - i)->staticEval = VALUE_NONE; } for (int i = 0; i <= MAX_PLY + 2; ++i) @@ -504,6 +512,10 @@ void Search::Worker::clear() { nonPawnCorrectionHistory[WHITE].fill(0); nonPawnCorrectionHistory[BLACK].fill(0); + for (auto& to : continuationCorrectionHistory) + for (auto& h : to) + h->fill(0); + for (bool inCheck : {false, true}) for (StatsType c : {NoCaptures, Captures}) for (auto& to : continuationHistory[inCheck][c]) @@ -727,7 +739,8 @@ Value Search::Worker::search( else if (PvNode) Eval::NNUE::hint_common_parent_position(pos, networks[numaAccessToken], refreshTable); - ss->staticEval = eval = to_corrected_static_eval(unadjustedStaticEval, *thisThread, pos); + ss->staticEval = eval = + to_corrected_static_eval(unadjustedStaticEval, *thisThread, pos, ss); // ttValue can be used as a better position evaluation (~7 Elo) if (ttData.value != VALUE_NONE @@ -738,7 +751,8 @@ Value Search::Worker::search( { unadjustedStaticEval = evaluate(networks[numaAccessToken], pos, refreshTable, thisThread->optimism[us]); - ss->staticEval = eval = to_corrected_static_eval(unadjustedStaticEval, *thisThread, pos); + ss->staticEval = eval = + to_corrected_static_eval(unadjustedStaticEval, *thisThread, pos, ss); // Static evaluation is saved as it was before adjustment by correction history ttWriter.write(posKey, VALUE_NONE, ss->ttPv, BOUND_NONE, DEPTH_UNSEARCHED, Move::none(), @@ -793,8 +807,9 @@ Value Search::Worker::search( // Null move dynamic reduction based on depth and eval Depth R = std::min(int(eval - beta) / 209, 6) + depth / 3 + 5; - ss->currentMove = Move::null(); - ss->continuationHistory = &thisThread->continuationHistory[0][0][NO_PIECE][0]; + ss->currentMove = Move::null(); + ss->continuationHistory = &thisThread->continuationHistory[0][0][NO_PIECE][0]; + ss->continuationCorrectionHistory = &thisThread->continuationCorrectionHistory[NO_PIECE][0]; pos.do_null_move(st, tt); @@ -876,6 +891,8 @@ Value Search::Worker::search( ss->currentMove = move; ss->continuationHistory = &this->continuationHistory[ss->inCheck][true][pos.moved_piece(move)][move.to_sq()]; + ss->continuationCorrectionHistory = + &this->continuationCorrectionHistory[pos.moved_piece(move)][move.to_sq()]; thisThread->nodes.fetch_add(1, std::memory_order_relaxed); pos.do_move(move, st); @@ -1124,7 +1141,8 @@ moves_loop: // When in check, search starts here ss->currentMove = move; ss->continuationHistory = &thisThread->continuationHistory[ss->inCheck][capture][movedPiece][move.to_sq()]; - + ss->continuationCorrectionHistory = + &thisThread->continuationCorrectionHistory[movedPiece][move.to_sq()]; uint64_t nodeCount = rootNode ? uint64_t(nodes) : 0; // Step 16. Make the move @@ -1401,6 +1419,8 @@ moves_loop: // When in check, search starts here && !(bestValue >= beta && bestValue <= ss->staticEval) && !(!bestMove && bestValue >= ss->staticEval)) { + const auto m = (ss - 1)->currentMove; + auto bonus = std::clamp(int(bestValue - ss->staticEval) * depth / 8, -CORRECTION_HISTORY_LIMIT / 4, CORRECTION_HISTORY_LIMIT / 4); thisThread->pawnCorrectionHistory[us][pawn_structure_index(pos)] @@ -1412,6 +1432,9 @@ moves_loop: // When in check, search starts here << bonus * 123 / 128; thisThread->nonPawnCorrectionHistory[BLACK][us][non_pawn_index(pos)] << bonus * 165 / 128; + + if (m.is_ok()) + (*(ss - 2)->continuationCorrectionHistory)[pos.piece_on(m.to_sq())][m.to_sq()] << bonus; } assert(bestValue > -VALUE_INFINITE && bestValue < VALUE_INFINITE); @@ -1507,7 +1530,7 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta) unadjustedStaticEval = evaluate(networks[numaAccessToken], pos, refreshTable, thisThread->optimism[us]); ss->staticEval = bestValue = - to_corrected_static_eval(unadjustedStaticEval, *thisThread, pos); + to_corrected_static_eval(unadjustedStaticEval, *thisThread, pos, ss); // ttValue can be used as a better position evaluation (~13 Elo) if (std::abs(ttData.value) < VALUE_TB_WIN_IN_MAX_PLY @@ -1522,7 +1545,7 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta) ? evaluate(networks[numaAccessToken], pos, refreshTable, thisThread->optimism[us]) : -(ss - 1)->staticEval; ss->staticEval = bestValue = - to_corrected_static_eval(unadjustedStaticEval, *thisThread, pos); + to_corrected_static_eval(unadjustedStaticEval, *thisThread, pos, ss); } // Stand pat. Return immediately if static value is at least beta @@ -1619,6 +1642,8 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta) ss->continuationHistory = &thisThread ->continuationHistory[ss->inCheck][capture][pos.moved_piece(move)][move.to_sq()]; + ss->continuationCorrectionHistory = + &thisThread->continuationCorrectionHistory[pos.moved_piece(move)][move.to_sq()]; // Step 7. Make and search the move thisThread->nodes.fetch_add(1, std::memory_order_relaxed); diff --git a/src/search.h b/src/search.h index 0761328a..a407e105 100644 --- a/src/search.h +++ b/src/search.h @@ -61,18 +61,19 @@ namespace Search { // shallower and deeper in the tree during the search. Each search thread has // its own array of Stack objects, indexed by the current ply. struct Stack { - Move* pv; - PieceToHistory* continuationHistory; - int ply; - Move currentMove; - Move excludedMove; - Value staticEval; - int statScore; - int moveCount; - bool inCheck; - bool ttPv; - bool ttHit; - int cutoffCnt; + Move* pv; + PieceToHistory* continuationHistory; + PieceToCorrectionHistory* continuationCorrectionHistory; + int ply; + Move currentMove; + Move excludedMove; + Value staticEval; + int statScore; + int moveCount; + bool inCheck; + bool ttPv; + bool ttHit; + int cutoffCnt; }; @@ -284,11 +285,12 @@ class Worker { ContinuationHistory continuationHistory[2][2]; PawnHistory pawnHistory; - PawnCorrectionHistory pawnCorrectionHistory; - MaterialCorrectionHistory materialCorrectionHistory; - MajorPieceCorrectionHistory majorPieceCorrectionHistory; - MinorPieceCorrectionHistory minorPieceCorrectionHistory; - NonPawnCorrectionHistory nonPawnCorrectionHistory[COLOR_NB]; + PawnCorrectionHistory pawnCorrectionHistory; + MaterialCorrectionHistory materialCorrectionHistory; + MajorPieceCorrectionHistory majorPieceCorrectionHistory; + MinorPieceCorrectionHistory minorPieceCorrectionHistory; + NonPawnCorrectionHistory nonPawnCorrectionHistory[COLOR_NB]; + ContinuationCorrectionHistory continuationCorrectionHistory; private: void iterative_deepening(); From e046c4ef0d743ce57c97c0d40f17610dc2ec3c56 Mon Sep 17 00:00:00 2001 From: Linmiao Xu Date: Mon, 30 Sep 2024 23:53:57 -0400 Subject: [PATCH 265/315] Simplify evaluation scaling Set digits in adjusted eval params all to 7. Passed non-regression STC: https://tests.stockfishchess.org/tests/view/66fc493d86d5ee47d953b94c LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 57696 W: 15098 L: 14898 D: 27700 Ptnml(0-2): 205, 6784, 14678, 6968, 213 Passed non-regression LTC: https://tests.stockfishchess.org/tests/view/66fd4b9386d5ee47d953b9d5 LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 93786 W: 23868 L: 23721 D: 46197 Ptnml(0-2): 55, 10322, 25993, 10467, 56 closes https://github.com/official-stockfish/Stockfish/pull/5618 Bench: 1277182 --- src/evaluate.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index b1c7283e..802913a0 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -59,9 +59,7 @@ Value Eval::evaluate(const Eval::NNUE::Networks& networks, assert(!pos.checkers()); - bool smallNet = use_smallnet(pos); - int v; - + bool smallNet = use_smallnet(pos); auto [psqt, positional] = smallNet ? networks.small.evaluate(pos, &caches.small) : networks.big.evaluate(pos, &caches.big); @@ -81,7 +79,7 @@ Value Eval::evaluate(const Eval::NNUE::Networks& networks, nnue -= nnue * nnueComplexity / (smallNet ? 20233 : 17879); int material = (smallNet ? 553 : 532) * pos.count() + pos.non_pawn_material(); - v = (nnue * (76898 + material) + optimism * (8112 + material)) / (smallNet ? 74411 : 76256); + int v = (nnue * (77777 + material) + optimism * (7777 + material)) / 77777; // Damp down the evaluation linearly when shuffling v -= v * pos.rule50_count() / 212; From dce72913feec523f077db8e86cc5797286c6548d Mon Sep 17 00:00:00 2001 From: Disservin Date: Fri, 4 Oct 2024 19:36:02 +0200 Subject: [PATCH 266/315] Temporarily fix clang-format mismatch closes https://github.com/official-stockfish/Stockfish/pull/5620 No functional change --- src/uci.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/uci.cpp b/src/uci.cpp index cfb34db7..8388cad8 100644 --- a/src/uci.cpp +++ b/src/uci.cpp @@ -431,6 +431,8 @@ void UCIEngine::benchmark(std::istream& args) { if (threadBinding.empty()) threadBinding = "none"; + // clang-format off + std::cerr << "===========================" << "\nVersion : " << engine_version_info() @@ -453,6 +455,8 @@ void UCIEngine::benchmark(std::istream& args) { << "\nTotal search time [s] : " << totalTime / 1000.0 << "\nNodes/second : " << 1000 * nodes / totalTime << std::endl; + // clang-format on + init_search_update_listeners(); } From 3348603770926e9865fc3f43baaaef8de99d3014 Mon Sep 17 00:00:00 2001 From: mstembera Date: Fri, 4 Oct 2024 10:39:51 -0700 Subject: [PATCH 267/315] Simplify previous #5608 https://github.com/official-stockfish/Stockfish/pull/5608 STC: https://tests.stockfishchess.org/tests/view/66fb1bab86d5ee47d953b8cc LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 25536 W: 6797 L: 6560 D: 12179 Ptnml(0-2): 93, 2953, 6460, 3148, 114 LTC https://tests.stockfishchess.org/tests/view/66fb690e86d5ee47d953b8eb LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 225114 W: 57200 L: 57188 D: 110726 Ptnml(0-2): 197, 25076, 61995, 25096, 193 closes https://github.com/official-stockfish/Stockfish/pull/5621 Bench: 1570076 --- src/search.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index c55118ec..34fb5a80 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1213,8 +1213,7 @@ moves_loop: // When in check, search starts here value = -search(pos, ss + 1, -(alpha + 1), -alpha, newDepth, !cutNode); // Post LMR continuation history updates (~1 Elo) - int bonus = value >= beta ? (1 + 2 * (moveCount > depth)) * stat_bonus(newDepth) - : -stat_malus(newDepth); + int bonus = value >= beta ? 3 * stat_bonus(newDepth) : -stat_malus(newDepth); update_continuation_histories(ss, movedPiece, move.to_sq(), bonus); } } From 9a21e3e9968ebdd36c24d9b2762646a76a4e448b Mon Sep 17 00:00:00 2001 From: FauziAkram Date: Sat, 5 Oct 2024 13:52:24 +0300 Subject: [PATCH 268/315] Simplify bestvalue formula Passed STC: LLR: 2.97 (-2.94,2.94) <-1.75,0.25> Total: 163680 W: 42689 L: 42605 D: 78386 Ptnml(0-2): 619, 19555, 41386, 19683, 597 https://tests.stockfishchess.org/tests/view/66f9451386d5ee47d953b7d9 Passed LTC: LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 96498 W: 24582 L: 24438 D: 47478 Ptnml(0-2): 62, 10642, 26718, 10744, 83 https://tests.stockfishchess.org/tests/view/66fd765786d5ee47d953ba1c closes https://github.com/official-stockfish/Stockfish/pull/5622 Bench: 1309815 --- src/search.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 34fb5a80..5598b5ff 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1610,11 +1610,11 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta) continue; } - // if static exchange evaluation is low enough + // If static exchange evaluation is low enough // we can prune this move. (~2 Elo) if (!pos.see_ge(move, alpha - futilityBase)) { - bestValue = (futilityBase > alpha) ? alpha : std::max(bestValue, futilityBase); + bestValue = std::min(alpha, futilityBase); continue; } } From 76923bb6fef2982dbce201227f6a33788390ce35 Mon Sep 17 00:00:00 2001 From: mstembera Date: Sat, 5 Oct 2024 16:18:21 -0700 Subject: [PATCH 269/315] Optimize magics Reduce the size of the Magics table by half on modern cpu's and lay it out to match our access pattern. Namely we typically access the magics for the same square for both bishop and rook back to back so we want those to be in the same cache line. https://tests.stockfishchess.org/tests/view/6701c9b386d5ee47d953bcf4 LLR: 2.94 (-2.94,2.94) <0.00,2.00> Total: 121664 W: 31931 L: 31497 D: 58236 Ptnml(0-2): 395, 13658, 32322, 14032, 425 A similar patch minus the size reduction finished yellow https://tests.stockfishchess.org/tests/view/6695f03f4ff211be9d4ec16c LLR: -2.94 (-2.94,2.94) <0.00,2.00> Total: 310688 W: 80940 L: 80746 D: 149002 Ptnml(0-2): 1119, 35032, 82846, 35230, 1117 closes https://github.com/official-stockfish/Stockfish/pull/5623 No functional change --- src/bitboard.cpp | 40 +++++++++++++++++++++++----------------- src/bitboard.h | 21 ++++++++++++--------- 2 files changed, 35 insertions(+), 26 deletions(-) diff --git a/src/bitboard.cpp b/src/bitboard.cpp index a8b4e5f4..deda6da2 100644 --- a/src/bitboard.cpp +++ b/src/bitboard.cpp @@ -34,15 +34,14 @@ Bitboard BetweenBB[SQUARE_NB][SQUARE_NB]; Bitboard PseudoAttacks[PIECE_TYPE_NB][SQUARE_NB]; Bitboard PawnAttacks[COLOR_NB][SQUARE_NB]; -Magic RookMagics[SQUARE_NB]; -Magic BishopMagics[SQUARE_NB]; +alignas(64) Magic Magics[SQUARE_NB][2]; namespace { Bitboard RookTable[0x19000]; // To store rook attacks Bitboard BishopTable[0x1480]; // To store bishop attacks -void init_magics(PieceType pt, Bitboard table[], Magic magics[]); +void init_magics(PieceType pt, Bitboard table[], Magic magics[][2]); // Returns the bitboard of target square for the given step // from the given square. If the step is off the board, returns empty bitboard. @@ -82,8 +81,8 @@ void Bitboards::init() { for (Square s2 = SQ_A1; s2 <= SQ_H8; ++s2) SquareDistance[s1][s2] = std::max(distance(s1, s2), distance(s1, s2)); - init_magics(ROOK, RookTable, RookMagics); - init_magics(BISHOP, BishopTable, BishopMagics); + init_magics(ROOK, RookTable, Magics); + init_magics(BISHOP, BishopTable, Magics); for (Square s1 = SQ_A1; s1 <= SQ_H8; ++s1) { @@ -142,39 +141,47 @@ Bitboard sliding_attack(PieceType pt, Square sq, Bitboard occupied) { // bitboards are used to look up attacks of sliding pieces. As a reference see // https://www.chessprogramming.org/Magic_Bitboards. In particular, here we use // the so called "fancy" approach. -void init_magics(PieceType pt, Bitboard table[], Magic magics[]) { +void init_magics(PieceType pt, Bitboard table[], Magic magics[][2]) { +#ifndef USE_PEXT // Optimal PRNG seeds to pick the correct magics in the shortest time int seeds[][RANK_NB] = {{8977, 44560, 54343, 38998, 5731, 95205, 104912, 17020}, {728, 10316, 55013, 32803, 12281, 15100, 16645, 255}}; - Bitboard occupancy[4096], reference[4096], edges, b; - int epoch[4096] = {}, cnt = 0, size = 0; + Bitboard occupancy[4096]; + int epoch[4096] = {}, cnt = 0; +#endif + Bitboard reference[4096]; + int size = 0; for (Square s = SQ_A1; s <= SQ_H8; ++s) { // Board edges are not considered in the relevant occupancies - edges = ((Rank1BB | Rank8BB) & ~rank_bb(s)) | ((FileABB | FileHBB) & ~file_bb(s)); + Bitboard edges = ((Rank1BB | Rank8BB) & ~rank_bb(s)) | ((FileABB | FileHBB) & ~file_bb(s)); // Given a square 's', the mask is the bitboard of sliding attacks from // 's' computed on an empty board. The index must be big enough to contain // all the attacks for each possible subset of the mask and so is 2 power // the number of 1s of the mask. Hence we deduce the size of the shift to // apply to the 64 or 32 bits word to get the index. - Magic& m = magics[s]; + Magic& m = magics[s][pt - BISHOP]; m.mask = sliding_attack(pt, s, 0) & ~edges; - m.shift = (Is64Bit ? 64 : 32) - popcount(m.mask); - +#ifndef USE_PEXT + m.shift = (Is64Bit ? 64 : 32) - popcount(m.mask); +#endif // Set the offset for the attacks table of the square. We have individual // table sizes for each square with "Fancy Magic Bitboards". - m.attacks = s == SQ_A1 ? table : magics[s - 1].attacks + size; + m.attacks = s == SQ_A1 ? table : magics[s - 1][pt - BISHOP].attacks + size; + size = 0; // Use Carry-Rippler trick to enumerate all subsets of masks[s] and // store the corresponding sliding attack bitboard in reference[]. - b = size = 0; + Bitboard b = 0; do { +#ifndef USE_PEXT occupancy[size] = b; +#endif reference[size] = sliding_attack(pt, s, b); if (HasPext) @@ -184,9 +191,7 @@ void init_magics(PieceType pt, Bitboard table[], Magic magics[]) { b = (b - m.mask) & m.mask; } while (b); - if (HasPext) - continue; - +#ifndef USE_PEXT PRNG rng(seeds[Is64Bit][rank_of(s)]); // Find a magic for square 's' picking up an (almost) random number @@ -215,6 +220,7 @@ void init_magics(PieceType pt, Bitboard table[], Magic magics[]) { break; } } +#endif } } } diff --git a/src/bitboard.h b/src/bitboard.h index cdff4c75..c4bf18b5 100644 --- a/src/bitboard.h +++ b/src/bitboard.h @@ -67,27 +67,31 @@ extern Bitboard PawnAttacks[COLOR_NB][SQUARE_NB]; // Magic holds all magic bitboards relevant data for a single square struct Magic { Bitboard mask; - Bitboard magic; Bitboard* attacks; - unsigned shift; +#ifndef USE_PEXT + Bitboard magic; + unsigned shift; +#endif // Compute the attack's index using the 'magic bitboards' approach unsigned index(Bitboard occupied) const { - if (HasPext) - return unsigned(pext(occupied, mask)); - +#ifdef USE_PEXT + return unsigned(pext(occupied, mask)); +#else if (Is64Bit) return unsigned(((occupied & mask) * magic) >> shift); unsigned lo = unsigned(occupied) & unsigned(mask); unsigned hi = unsigned(occupied >> 32) & unsigned(mask >> 32); return (lo * unsigned(magic) ^ hi * unsigned(magic >> 32)) >> shift; +#endif } + + Bitboard attacks_bb(Bitboard occupied) const { return attacks[index(occupied)]; } }; -extern Magic RookMagics[SQUARE_NB]; -extern Magic BishopMagics[SQUARE_NB]; +extern Magic Magics[SQUARE_NB][2]; constexpr Bitboard square_bb(Square s) { assert(is_ok(s)); @@ -229,9 +233,8 @@ inline Bitboard attacks_bb(Square s, Bitboard occupied) { switch (Pt) { case BISHOP : - return BishopMagics[s].attacks[BishopMagics[s].index(occupied)]; case ROOK : - return RookMagics[s].attacks[RookMagics[s].index(occupied)]; + return Magics[s][Pt - BISHOP].attacks_bb(occupied); case QUEEN : return attacks_bb(s, occupied) | attacks_bb(s, occupied); default : From d4358ddba7184aa7403d12397f2f49f5ea6364fd Mon Sep 17 00:00:00 2001 From: Mathias Parnaudeau Date: Sat, 5 Oct 2024 15:28:39 +0200 Subject: [PATCH 270/315] Add autodetection of ppc64 architectures That allows 'make -j profile-build' work on ppc64 architectures, setting the use of the appropriate SIMD extension, Altivec or VSX. For VSX, gcc allows to map SSE2 intrinsics and get benefit of the existing SIMD code. On PowerMac G5, using altivec provides a performance improvement of 30%. On Talos 2, using vsx provides a performance improvement of 120%. closes https://github.com/official-stockfish/Stockfish/pull/5624 No functional change --- AUTHORS | 1 + scripts/get_native_properties.sh | 18 ++++++++++++++ src/Makefile | 42 ++++++++++++++++++++++++++++++-- 3 files changed, 59 insertions(+), 2 deletions(-) diff --git a/AUTHORS b/AUTHORS index 725b3569..31a64c17 100644 --- a/AUTHORS +++ b/AUTHORS @@ -143,6 +143,7 @@ Maciej Żenczykowski (zenczykowski) Malcolm Campbell (xoto10) Mark Tenzer (31m059) marotear +Mathias Parnaudeau (mparnaudeau) Matt Ginsberg (mattginsberg) Matthew Lai (matthewlai) Matthew Sullivan (Matt14916) diff --git a/scripts/get_native_properties.sh b/scripts/get_native_properties.sh index dfbfac0e..ed5fc9af 100755 --- a/scripts/get_native_properties.sh +++ b/scripts/get_native_properties.sh @@ -54,6 +54,20 @@ set_arch_x86_64() { fi } +set_arch_ppc_64() { + if $(grep -q -w "altivec" /proc/cpuinfo); then + power=$(grep -oP -m 1 'cpu\t+: POWER\K\d+' /proc/cpuinfo) + if [ "0$power" -gt 7 ]; then + # VSX started with POWER8 + true_arch='ppc-64-vsx' + else + true_arch='ppc-64-altivec' + fi + else + true_arch='ppc-64' + fi +} + # Check the system type uname_s=$(uname -s) uname_m=$(uname -m) @@ -87,6 +101,10 @@ case $uname_s in file_os='ubuntu' true_arch='x86-32' ;; + 'ppc64'*) + file_os='ubuntu' + set_arch_ppc_64 + ;; 'aarch64') file_os='android' true_arch='armv8' diff --git a/src/Makefile b/src/Makefile index 6cb778a6..15066781 100644 --- a/src/Makefile +++ b/src/Makefile @@ -98,6 +98,8 @@ VPATH = syzygy:nnue:nnue/features # avx512 = yes/no --- -mavx512bw --- Use Intel Advanced Vector Extensions 512 # vnni256 = yes/no --- -mavx256vnni --- Use Intel Vector Neural Network Instructions 512 with 256bit operands # vnni512 = yes/no --- -mavx512vnni --- Use Intel Vector Neural Network Instructions 512 +# altivec = yes/no --- -maltivec --- Use PowerPC Altivec SIMD extension +# vsx = yes/no --- -mvsx --- Use POWER VSX SIMD extension # neon = yes/no --- -DUSE_NEON --- Use ARM SIMD architecture # dotprod = yes/no --- -DUSE_NEON_DOTPROD --- Use ARM advanced SIMD Int8 dot product instructions # lsx = yes/no --- -mlsx --- Use Loongson SIMD eXtension @@ -126,7 +128,7 @@ endif ifeq ($(ARCH), $(filter $(ARCH), \ x86-64-vnni512 x86-64-vnni256 x86-64-avx512 x86-64-avxvnni x86-64-bmi2 \ x86-64-avx2 x86-64-sse41-popcnt x86-64-modern x86-64-ssse3 x86-64-sse3-popcnt \ - x86-64 x86-32-sse41-popcnt x86-32-sse2 x86-32 ppc-64 ppc-32 e2k \ + x86-64 x86-32-sse41-popcnt x86-32-sse2 x86-32 ppc-64 ppc-64-altivec ppc-64-vsx ppc-32 e2k \ armv7 armv7-neon armv8 armv8-dotprod apple-silicon general-64 general-32 riscv64 \ loongarch64 loongarch64-lsx loongarch64-lasx)) SUPPORTED_ARCH=true @@ -151,6 +153,8 @@ avxvnni = no avx512 = no vnni256 = no vnni512 = no +altivec = no +vsx = no neon = no dotprod = no arm_version = 0 @@ -360,6 +364,20 @@ ifeq ($(ARCH),ppc-64) prefetch = yes endif +ifeq ($(ARCH),ppc-64-altivec) + arch = ppc64 + popcnt = yes + prefetch = yes + altivec = yes +endif + +ifeq ($(ARCH),ppc-64-vsx) + arch = ppc64 + popcnt = yes + prefetch = yes + vsx = yes +endif + ifeq ($(findstring e2k,$(ARCH)),e2k) arch = e2k mmx = yes @@ -650,7 +668,7 @@ else endif ifeq ($(popcnt),yes) - ifeq ($(arch),$(filter $(arch),ppc64 armv7 armv8 arm64)) + ifeq ($(arch),$(filter $(arch),ppc64 ppc64-altivec ppc64-vsx armv7 armv8 arm64)) CXXFLAGS += -DUSE_POPCNT else CXXFLAGS += -msse3 -mpopcnt -DUSE_POPCNT @@ -720,6 +738,20 @@ ifeq ($(mmx),yes) endif endif +ifeq ($(altivec),yes) + CXXFLAGS += -maltivec + ifeq ($(COMP),gcc) + CXXFLAGS += -mabi=altivec + endif +endif + +ifeq ($(vsx),yes) + CXXFLAGS += -mvsx + ifeq ($(COMP),gcc) + CXXFLAGS += -DNO_WARN_X86_INTRINSICS -DUSE_SSE2 + endif +endif + ifeq ($(neon),yes) CXXFLAGS += -DUSE_NEON=$(arm_version) ifeq ($(KERNEL),Linux) @@ -852,6 +884,8 @@ help: @echo "x86-32-sse2 > x86 32-bit with sse2 support" @echo "x86-32 > x86 32-bit generic (with mmx compile support)" @echo "ppc-64 > PPC 64-bit" + @echo "ppc-64-altivec > PPC 64-bit with altivec support" + @echo "ppc-64-vsx > PPC 64-bit with vsx support" @echo "ppc-32 > PPC 32-bit" @echo "armv7 > ARMv7 32-bit" @echo "armv7-neon > ARMv7 32-bit with popcnt and neon" @@ -987,6 +1021,8 @@ config-sanity: net @echo "avx512: '$(avx512)'" @echo "vnni256: '$(vnni256)'" @echo "vnni512: '$(vnni512)'" + @echo "altivec: '$(altivec)'" + @echo "vsx: '$(vsx)'" @echo "neon: '$(neon)'" @echo "dotprod: '$(dotprod)'" @echo "arm_version: '$(arm_version)'" @@ -1020,6 +1056,8 @@ config-sanity: net @test "$(avx512)" = "yes" || test "$(avx512)" = "no" @test "$(vnni256)" = "yes" || test "$(vnni256)" = "no" @test "$(vnni512)" = "yes" || test "$(vnni512)" = "no" + @test "$(altivec)" = "yes" || test "$(altivec)" = "no" + @test "$(vsx)" = "yes" || test "$(vsx)" = "no" @test "$(neon)" = "yes" || test "$(neon)" = "no" @test "$(lsx)" = "yes" || test "$(lsx)" = "no" @test "$(lasx)" = "yes" || test "$(lasx)" = "no" From aaadbe0572e793cea9ebdf37e32c79235e4d573b Mon Sep 17 00:00:00 2001 From: Nonlinear2 <131959792+Nonlinear2@users.noreply.github.com> Date: Wed, 9 Oct 2024 20:00:19 +0200 Subject: [PATCH 271/315] Introduce mean squared score for delta adjustments This patch introduces the value `meanSquaredScore`, which makes the initial delta sensitive to unstable iterative deepening scores. Passed STC: https://tests.stockfishchess.org/tests/view/66fed74286d5ee47d953bb42 LLR: 2.98 (-2.94,2.94) <0.00,2.00> Total: 71104 W: 18635 L: 18262 D: 34207 Ptnml(0-2): 234, 8365, 17993, 8714, 246 Passed LTC: https://tests.stockfishchess.org/tests/view/6700088e86d5ee47d953bbe9 LLR: 2.95 (-2.94,2.94) <0.50,2.50> Total: 212544 W: 54238 L: 53560 D: 104746 Ptnml(0-2): 120, 23093, 59172, 23763, 124 closes https://github.com/official-stockfish/Stockfish/pull/5627 Bench: 1395505 --- src/search.cpp | 8 ++++++-- src/search.h | 19 ++++++++++--------- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 5598b5ff..647bae76 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -312,8 +312,8 @@ void Search::Worker::iterative_deepening() { selDepth = 0; // Reset aspiration window starting size + delta = 5 + std::abs(rootMoves[pvIdx].meanSquaredScore) / 13797; Value avg = rootMoves[pvIdx].averageScore; - delta = 5 + avg * avg / 11797; alpha = std::max(avg - delta, -VALUE_INFINITE); beta = std::min(avg + delta, VALUE_INFINITE); @@ -1065,7 +1065,7 @@ moves_loop: // When in check, search starts here // (alpha, beta), then that move is singular and should be extended. To // verify this we do a reduced search on the position excluding the ttMove // and if the result is lower than ttValue minus a margin, then we will - // extend the ttMove. Recursive singular search is avoided. + // extend the ttMove. Recursive singular search is avoided. // Note: the depth margin and singularBeta margin are known for having // non-linear scaling. Their values are optimized to time controls of @@ -1265,6 +1265,10 @@ moves_loop: // When in check, search starts here rm.averageScore = rm.averageScore != -VALUE_INFINITE ? (value + rm.averageScore) / 2 : value; + rm.meanSquaredScore = rm.meanSquaredScore != -VALUE_INFINITE * VALUE_INFINITE + ? (value * std::abs(value) + rm.meanSquaredScore) / 2 + : value * std::abs(value); + // PV move or new best move? if (moveCount == 1 || value > alpha) { diff --git a/src/search.h b/src/search.h index a407e105..2342d9e9 100644 --- a/src/search.h +++ b/src/search.h @@ -91,15 +91,16 @@ struct RootMove { return m.score != score ? m.score < score : m.previousScore < previousScore; } - uint64_t effort = 0; - Value score = -VALUE_INFINITE; - Value previousScore = -VALUE_INFINITE; - Value averageScore = -VALUE_INFINITE; - Value uciScore = -VALUE_INFINITE; - bool scoreLowerbound = false; - bool scoreUpperbound = false; - int selDepth = 0; - int tbRank = 0; + uint64_t effort = 0; + Value score = -VALUE_INFINITE; + Value previousScore = -VALUE_INFINITE; + Value averageScore = -VALUE_INFINITE; + Value meanSquaredScore = -VALUE_INFINITE * VALUE_INFINITE; + Value uciScore = -VALUE_INFINITE; + bool scoreLowerbound = false; + bool scoreUpperbound = false; + int selDepth = 0; + int tbRank = 0; Value tbScore; std::vector pv; }; From b261df970d5207069a06e89b48983aece1c60925 Mon Sep 17 00:00:00 2001 From: Shawn Xu Date: Wed, 9 Oct 2024 20:16:14 -0700 Subject: [PATCH 272/315] Fix majorPieceKey Updates Passed STC: LLR: 2.98 (-2.94,2.94) <0.00,2.00> Total: 476160 W: 124285 L: 123311 D: 228564 Ptnml(0-2): 1662, 56266, 121219, 57302, 1631 https://tests.stockfishchess.org/tests/view/66ea3dc186d5ee47d953ae07 Failed Yellow LTC: LLR: -2.94 (-2.94,2.94) <0.50,2.50> Total: 230634 W: 58525 L: 58295 D: 113814 Ptnml(0-2): 113, 25301, 64299, 25451, 153 https://tests.stockfishchess.org/tests/view/66f1825e86d5ee47d953b2ec Passed Non-regression LTC: LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 112344 W: 28590 L: 28462 D: 55292 Ptnml(0-2): 71, 12439, 31039, 12537, 86 https://tests.stockfishchess.org/tests/view/6707474486d5ee47d953bfe3 closes https://github.com/official-stockfish/Stockfish/pull/5629 Bench: 1283457 --- src/position.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/position.cpp b/src/position.cpp index f596b015..bab7a1fc 100644 --- a/src/position.cpp +++ b/src/position.cpp @@ -759,7 +759,7 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) { st->nonPawnMaterial[them] -= PieceValue[captured]; st->nonPawnKey[them] ^= Zobrist::psq[captured][capsq]; - if (type_of(pc) == QUEEN || type_of(pc) == ROOK) + if (type_of(captured) == QUEEN || type_of(captured) == ROOK) st->majorPieceKey ^= Zobrist::psq[captured][capsq]; else From 9766db8139ce8815110c15bdde8381d0564a63fa Mon Sep 17 00:00:00 2001 From: Michael Chaly Date: Sat, 12 Oct 2024 08:32:15 +0300 Subject: [PATCH 273/315] Make low ply history size fixed Size of low ply history should always be the same, so ensure it. closes https://github.com/official-stockfish/Stockfish/pull/5630 No functional change --- src/movepick.cpp | 2 +- src/movepick.h | 3 ++- src/search.cpp | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/movepick.cpp b/src/movepick.cpp index 1d1aef0f..06495189 100644 --- a/src/movepick.cpp +++ b/src/movepick.cpp @@ -179,7 +179,7 @@ void MovePicker::score() { : pt == ROOK && bool(to & threatenedByMinor) ? 24335 : 0); - if (ply < 4) + if (ply < LOW_PLY_HISTORY_SIZE) m.value += 8 * (*lowPlyHistory)[ply][m.from_to()] / (1 + 2 * ply); } diff --git a/src/movepick.h b/src/movepick.h index 9a68aaa1..5c312531 100644 --- a/src/movepick.h +++ b/src/movepick.h @@ -37,6 +37,7 @@ namespace Stockfish { constexpr int PAWN_HISTORY_SIZE = 512; // has to be a power of 2 constexpr int CORRECTION_HISTORY_SIZE = 32768; // has to be a power of 2 constexpr int CORRECTION_HISTORY_LIMIT = 1024; +constexpr int LOW_PLY_HISTORY_SIZE = 4; static_assert((PAWN_HISTORY_SIZE & (PAWN_HISTORY_SIZE - 1)) == 0, "PAWN_HISTORY_SIZE has to be a power of 2"); @@ -137,7 +138,7 @@ using ButterflyHistory = Stats; +using LowPlyHistory = Stats; // CapturePieceToHistory is addressed by a move's [piece][to][captured piece type] using CapturePieceToHistory = Stats; diff --git a/src/search.cpp b/src/search.cpp index 647bae76..6e513b45 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1851,7 +1851,7 @@ void update_quiet_histories( Color us = pos.side_to_move(); workerThread.mainHistory[us][move.from_to()] << bonus; - if (ss->ply < 4) + if (ss->ply < LOW_PLY_HISTORY_SIZE) workerThread.lowPlyHistory[ss->ply][move.from_to()] << bonus; update_continuation_histories(ss, pos.moved_piece(move), move.to_sq(), bonus); From 656b2cb6459cf3c91f8d8ed6aa770026f77ee7b9 Mon Sep 17 00:00:00 2001 From: Linmiao Xu Date: Sun, 6 Oct 2024 20:11:19 -0400 Subject: [PATCH 274/315] Update default main net to nn-1cedc0ffeeee.nnue Created by setting output weights (256) and biases (8) of the previous main net nn-1111cefa1111.nnue to values found with spsa after 38k / 120k games at 120+1.2 using the same method as: https://github.com/official-stockfish/Stockfish/pull/5459 nn-1111cefa1111.nnue -> nn-1cedc0ffeeee.nnue # weights changed: 185 mean: 0.0703 +/- 2.53 min: -6 max: 6 Passed STC: https://tests.stockfishchess.org/tests/view/6703589b86d5ee47d953bda1 LLR: 2.93 (-2.94,2.94) <0.00,2.00> Total: 101984 W: 26690 L: 26275 D: 49019 Ptnml(0-2): 375, 11944, 25926, 12385, 362 Passed LTC: https://tests.stockfishchess.org/tests/view/670542d286d5ee47d953befa LLR: 2.94 (-2.94,2.94) <0.50,2.50> Total: 106224 W: 27079 L: 26618 D: 52527 Ptnml(0-2): 71, 11508, 29487, 11981, 65 closes https://github.com/official-stockfish/Stockfish/pull/5632 Bench: 1351413 --- src/evaluate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/evaluate.h b/src/evaluate.h index c9041efb..9bd436b5 100644 --- a/src/evaluate.h +++ b/src/evaluate.h @@ -33,7 +33,7 @@ namespace Eval { // for the build process (profile-build and fishtest) to work. Do not change the // name of the macro or the location where this macro is defined, as it is used // in the Makefile/Fishtest. -#define EvalFileDefaultNameBig "nn-1111cefa1111.nnue" +#define EvalFileDefaultNameBig "nn-1cedc0ffeeee.nnue" #define EvalFileDefaultNameSmall "nn-37f18f62d772.nnue" namespace NNUE { From 2f3e6198e878818f9f90de8cb31e287de34bed0e Mon Sep 17 00:00:00 2001 From: FauziAkram Date: Sun, 13 Oct 2024 13:59:20 +0300 Subject: [PATCH 275/315] Simplify optimism divisor. Passed STC: LLR: 2.97 (-2.94,2.94) <-1.75,0.25> Total: 139360 W: 36143 L: 36033 D: 67184 Ptnml(0-2): 436, 16456, 35778, 16582, 428 https://tests.stockfishchess.org/tests/view/66fc49c786d5ee47d953b94e Passed LTC: LLR: 2.95 (-2.94,2.94) <-1.75,0.25> Total: 257748 W: 65163 L: 65184 D: 127401 Ptnml(0-2): 173, 28471, 71611, 28442, 177 https://tests.stockfishchess.org/tests/view/66ff01ae86d5ee47d953bb54 Passed LTC against rebased version: LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 53610 W: 13691 L: 13501 D: 26418 Ptnml(0-2): 52, 5942, 14605, 6176, 30 https://tests.stockfishchess.org/tests/view/670a9c5c86d5ee47d953c231 closes https://github.com/official-stockfish/Stockfish/pull/5633 Bench: 1282078 --- src/evaluate.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index 802913a0..7c7b54a4 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -75,7 +75,7 @@ Value Eval::evaluate(const Eval::NNUE::Networks& networks, // Blend optimism and eval with nnue complexity int nnueComplexity = std::abs(psqt - positional); - optimism += optimism * nnueComplexity / (smallNet ? 430 : 474); + optimism += optimism * nnueComplexity / 468; nnue -= nnue * nnueComplexity / (smallNet ? 20233 : 17879); int material = (smallNet ? 553 : 532) * pos.count() + pos.non_pawn_material(); From bf2a0d53925da1a0d58a91ef78d577a448eb4b5a Mon Sep 17 00:00:00 2001 From: Taras Vuk <117687515+TarasVuk@users.noreply.github.com> Date: Mon, 14 Oct 2024 21:30:18 +0200 Subject: [PATCH 276/315] Simplify internal iterative reductions Passed STC: LLR: 2.92 (-2.94,2.94) <-1.75,0.25> Total: 138656 W: 36182 L: 36074 D: 66400 Ptnml(0-2): 523, 16422, 35310, 16570, 503 https://tests.stockfishchess.org/tests/view/6702beb386d5ee47d953bd41 Passed LTC: LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 680844 W: 172021 L: 172480 D: 336343 Ptnml(0-2): 492, 76259, 187419, 75720, 532 https://tests.stockfishchess.org/tests/view/67042b1f86d5ee47d953be7c closes https://github.com/official-stockfish/Stockfish/pull/5634 Bench: 1169252 --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index 6e513b45..75a8c963 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -850,7 +850,7 @@ Value Search::Worker::search( // For cutNodes, if depth is high enough, decrease depth by 2 if there is no ttMove, // or by 1 if there is a ttMove with an upper bound. if (cutNode && depth >= 7 && (!ttData.move || ttData.bound == BOUND_UPPER)) - depth -= 1 + !ttData.move; + depth -= 2; // Step 11. ProbCut (~10 Elo) // If we have a good enough capture (or queen promotion) and a reduced search From 7f386d109e1b38d530d98f81e7213a2f1b2090af Mon Sep 17 00:00:00 2001 From: FauziAkram Date: Wed, 16 Oct 2024 03:06:58 +0300 Subject: [PATCH 277/315] Remove material corrHist Passed STC: LLR: 2.96 (-2.94,2.94) <-1.75,0.25> Total: 80832 W: 21150 L: 20975 D: 38707 Ptnml(0-2): 283, 9531, 20598, 9736, 268 https://tests.stockfishchess.org/tests/view/670302fe86d5ee47d953bd68 Passed LTC: LLR: 2.95 (-2.94,2.94) <-1.75,0.25> Total: 46008 W: 11621 L: 11423 D: 22964 Ptnml(0-2): 30, 5072, 12606, 5262, 34 https://tests.stockfishchess.org/tests/view/6704074686d5ee47d953be53 Passed LTC Rebased: LLR: 2.95 (-2.94,2.94) <-1.75,0.25> Total: 95814 W: 24340 L: 24195 D: 47279 Ptnml(0-2): 71, 10497, 26602, 10690, 47 https://tests.stockfishchess.org/tests/view/670ae1ac86d5ee47d953c262 closes https://github.com/official-stockfish/Stockfish/pull/5636 Bench: 1119774 --- src/movepick.h | 4 ---- src/search.cpp | 6 +----- src/search.h | 1 - 3 files changed, 1 insertion(+), 10 deletions(-) diff --git a/src/movepick.h b/src/movepick.h index 5c312531..6ad13397 100644 --- a/src/movepick.h +++ b/src/movepick.h @@ -167,10 +167,6 @@ using PawnHistory = Stats using PawnCorrectionHistory = Stats; -// MaterialCorrectionHistory is addressed by color and material configuration -using MaterialCorrectionHistory = - Stats; - // MajorPieceCorrectionHistory is addressed by color and king/major piece (Queen, Rook) positions using MajorPieceCorrectionHistory = Stats; diff --git a/src/search.cpp b/src/search.cpp index 75a8c963..568e147c 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -83,7 +83,6 @@ Value to_corrected_static_eval(Value v, const Worker& w, const Position& pos, St const Color us = pos.side_to_move(); const auto m = (ss - 1)->currentMove; const auto pcv = w.pawnCorrectionHistory[us][pawn_structure_index(pos)]; - const auto mcv = w.materialCorrectionHistory[us][material_index(pos)]; const auto macv = w.majorPieceCorrectionHistory[us][major_piece_index(pos)]; const auto micv = w.minorPieceCorrectionHistory[us][minor_piece_index(pos)]; const auto wnpcv = w.nonPawnCorrectionHistory[WHITE][us][non_pawn_index(pos)]; @@ -94,8 +93,7 @@ Value to_corrected_static_eval(Value v, const Worker& w, const Position& pos, St cntcv = int((*(ss - 2)->continuationCorrectionHistory)[pos.piece_on(m.to_sq())][m.to_sq()]); const auto cv = - (5932 * pcv + 2994 * mcv + 3269 * macv + 5660 * micv + 6237 * (wnpcv + bnpcv) + cntcv * 5555) - / 131072; + (5932 * pcv + 3269 * macv + 5660 * micv + 6666 * (wnpcv + bnpcv) + 5555 * cntcv) / 131072; v += cv; return std::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1); } @@ -506,7 +504,6 @@ void Search::Worker::clear() { captureHistory.fill(-753); pawnHistory.fill(-1152); pawnCorrectionHistory.fill(0); - materialCorrectionHistory.fill(0); majorPieceCorrectionHistory.fill(0); minorPieceCorrectionHistory.fill(0); nonPawnCorrectionHistory[WHITE].fill(0); @@ -1428,7 +1425,6 @@ moves_loop: // When in check, search starts here -CORRECTION_HISTORY_LIMIT / 4, CORRECTION_HISTORY_LIMIT / 4); thisThread->pawnCorrectionHistory[us][pawn_structure_index(pos)] << bonus * 101 / 128; - thisThread->materialCorrectionHistory[us][material_index(pos)] << bonus * 99 / 128; thisThread->majorPieceCorrectionHistory[us][major_piece_index(pos)] << bonus * 157 / 128; thisThread->minorPieceCorrectionHistory[us][minor_piece_index(pos)] << bonus * 153 / 128; thisThread->nonPawnCorrectionHistory[WHITE][us][non_pawn_index(pos)] diff --git a/src/search.h b/src/search.h index 2342d9e9..b599da11 100644 --- a/src/search.h +++ b/src/search.h @@ -287,7 +287,6 @@ class Worker { PawnHistory pawnHistory; PawnCorrectionHistory pawnCorrectionHistory; - MaterialCorrectionHistory materialCorrectionHistory; MajorPieceCorrectionHistory majorPieceCorrectionHistory; MinorPieceCorrectionHistory minorPieceCorrectionHistory; NonPawnCorrectionHistory nonPawnCorrectionHistory[COLOR_NB]; From b325b2c348df02e415b6c78121e0502622d57f34 Mon Sep 17 00:00:00 2001 From: FauziAkram Date: Wed, 16 Oct 2024 13:14:13 +0300 Subject: [PATCH 278/315] Simplify bestValue formula Passed STC: LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 45888 W: 12051 L: 11841 D: 21996 Ptnml(0-2): 123, 5356, 11807, 5504, 154 https://tests.stockfishchess.org/tests/view/670bb89086d5ee47d953c2d8 Passed LTC: LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 51336 W: 13021 L: 12830 D: 25485 Ptnml(0-2): 34, 5594, 14227, 5773, 40 https://tests.stockfishchess.org/tests/view/670c587f86d5ee47d953c31b closes https://github.com/official-stockfish/Stockfish/pull/5637 Bench: 1192999 --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index 568e147c..c398b7d2 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1551,7 +1551,7 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta) if (bestValue >= beta) { if (std::abs(bestValue) < VALUE_TB_WIN_IN_MAX_PLY) - bestValue = (3 * bestValue + beta) / 4; + bestValue = (bestValue + beta) / 2; if (!ss->ttHit) ttWriter.write(posKey, value_to_tt(bestValue, ss->ply), false, BOUND_LOWER, DEPTH_UNSEARCHED, Move::none(), unadjustedStaticEval, From 2ce47573b4d3664dca4cbc4354c8c600540d16ad Mon Sep 17 00:00:00 2001 From: FauziAkram Date: Wed, 16 Oct 2024 19:40:49 +0300 Subject: [PATCH 279/315] Remove -stat_malus(newDepth) Passed STC: LLR: 2.97 (-2.94,2.94) <-1.75,0.25> Total: 92544 W: 23940 L: 23778 D: 44826 Ptnml(0-2): 286, 10936, 23638, 11154, 258 https://tests.stockfishchess.org/tests/view/670c3d6986d5ee47d953c30b Passed LTC: LLR: 2.95 (-2.94,2.94) <-1.75,0.25> Total: 43164 W: 10986 L: 10786 D: 21392 Ptnml(0-2): 27, 4713, 11905, 4907, 30 https://tests.stockfishchess.org/tests/view/670eda3d86d5ee47d953c51d closes https://github.com/official-stockfish/Stockfish/pull/5639 Bench: 1281912 --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index c398b7d2..c78acb6c 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1210,7 +1210,7 @@ moves_loop: // When in check, search starts here value = -search(pos, ss + 1, -(alpha + 1), -alpha, newDepth, !cutNode); // Post LMR continuation history updates (~1 Elo) - int bonus = value >= beta ? 3 * stat_bonus(newDepth) : -stat_malus(newDepth); + int bonus = 2 * (value >= beta) * stat_bonus(newDepth); update_continuation_histories(ss, movedPiece, move.to_sq(), bonus); } } From c15113554f53890d7944c00a70d0f2d8a78916fb Mon Sep 17 00:00:00 2001 From: Disservin Date: Sat, 19 Oct 2024 16:56:02 +0200 Subject: [PATCH 280/315] Speedup Makefile on Windows The Makefile is notoriously slow on windows, because of new processes being spawned I believe. This pr improves it a little bit for the help and config-sanity targets, with the latter also improving `make -j build` because it depends on that. On the same machine ubuntu (wsl) is more than 3 times faster, if there are other improvements we can make I'd be happy to hear about them. Ultimately https://github.com/official-stockfish/Stockfish/pull/5543 also aims to improve this I believe, but it will take some additional time before that lands. ``` make config-sanity: patch: 6.199s master: 12.738s make help: patch: 3.1s master: 11.49s make -j build: patch: 36s master: 43.25s make -j build: master ubuntu: 10s ``` closes https://github.com/official-stockfish/Stockfish/pull/5642 No functional change --- src/Makefile | 264 ++++++++++++++++++++++++++------------------------- 1 file changed, 133 insertions(+), 131 deletions(-) diff --git a/src/Makefile b/src/Makefile index 15066781..4307b7c7 100644 --- a/src/Makefile +++ b/src/Makefile @@ -851,75 +851,75 @@ endif ### ========================================================================== help: - @echo "" - @echo "To compile stockfish, type: " - @echo "" - @echo "make -j target [ARCH=arch] [COMP=compiler] [COMPCXX=cxx]" - @echo "" - @echo "Supported targets:" - @echo "" - @echo "help > Display architecture details" - @echo "profile-build > standard build with profile-guided optimization" - @echo "build > skip profile-guided optimization" - @echo "net > Download the default nnue nets" - @echo "strip > Strip executable" - @echo "install > Install executable" - @echo "clean > Clean up" - @echo "" - @echo "Supported archs:" - @echo "" - @echo "native > select the best architecture for the host processor (default)" - @echo "x86-64-vnni512 > x86 64-bit with vnni 512bit support" - @echo "x86-64-vnni256 > x86 64-bit with vnni 512bit support, limit operands to 256bit wide" - @echo "x86-64-avx512 > x86 64-bit with avx512 support" - @echo "x86-64-avxvnni > x86 64-bit with vnni 256bit support" - @echo "x86-64-bmi2 > x86 64-bit with bmi2 support" - @echo "x86-64-avx2 > x86 64-bit with avx2 support" - @echo "x86-64-sse41-popcnt > x86 64-bit with sse41 and popcnt support" - @echo "x86-64-modern > deprecated, currently x86-64-sse41-popcnt" - @echo "x86-64-ssse3 > x86 64-bit with ssse3 support" - @echo "x86-64-sse3-popcnt > x86 64-bit with sse3 compile and popcnt support" - @echo "x86-64 > x86 64-bit generic (with sse2 support)" - @echo "x86-32-sse41-popcnt > x86 32-bit with sse41 and popcnt support" - @echo "x86-32-sse2 > x86 32-bit with sse2 support" - @echo "x86-32 > x86 32-bit generic (with mmx compile support)" - @echo "ppc-64 > PPC 64-bit" - @echo "ppc-64-altivec > PPC 64-bit with altivec support" - @echo "ppc-64-vsx > PPC 64-bit with vsx support" - @echo "ppc-32 > PPC 32-bit" - @echo "armv7 > ARMv7 32-bit" - @echo "armv7-neon > ARMv7 32-bit with popcnt and neon" - @echo "armv8 > ARMv8 64-bit with popcnt and neon" - @echo "armv8-dotprod > ARMv8 64-bit with popcnt, neon and dot product support" - @echo "e2k > Elbrus 2000" - @echo "apple-silicon > Apple silicon ARM64" - @echo "general-64 > unspecified 64-bit" - @echo "general-32 > unspecified 32-bit" - @echo "riscv64 > RISC-V 64-bit" - @echo "loongarch64 > LoongArch 64-bit" - @echo "loongarch64-lsx > LoongArch 64-bit with SIMD eXtension" - @echo "loongarch64-lasx > LoongArch 64-bit with Advanced SIMD eXtension" - @echo "" - @echo "Supported compilers:" - @echo "" - @echo "gcc > GNU compiler (default)" - @echo "mingw > GNU compiler with MinGW under Windows" - @echo "clang > LLVM Clang compiler" - @echo "icx > Intel oneAPI DPC++/C++ Compiler" - @echo "ndk > Google NDK to cross-compile for Android" - @echo "" - @echo "Simple examples. If you don't know what to do, you likely want to run one of: " - @echo "" - @echo "make -j profile-build ARCH=x86-64-avx2 # typically a fast compile for common systems " - @echo "make -j profile-build ARCH=x86-64-sse41-popcnt # A more portable compile for 64-bit systems " - @echo "make -j profile-build ARCH=x86-64 # A portable compile for 64-bit systems " - @echo "" - @echo "Advanced examples, for experienced users: " - @echo "" - @echo "make -j profile-build ARCH=x86-64-avxvnni" - @echo "make -j profile-build ARCH=x86-64-avxvnni COMP=gcc COMPCXX=g++-12.0" - @echo "make -j build ARCH=x86-64-ssse3 COMP=clang" - @echo "" + @echo "" && \ + echo "To compile stockfish, type: " && \ + echo "" && \ + echo "make -j target [ARCH=arch] [COMP=compiler] [COMPCXX=cxx]" && \ + echo "" && \ + echo "Supported targets:" && \ + echo "" && \ + echo "help > Display architecture details" && \ + echo "profile-build > standard build with profile-guided optimization" && \ + echo "build > skip profile-guided optimization" && \ + echo "net > Download the default nnue nets" && \ + echo "strip > Strip executable" && \ + echo "install > Install executable" && \ + echo "clean > Clean up" && \ + echo "" && \ + echo "Supported archs:" && \ + echo "" && \ + echo "native > select the best architecture for the host processor (default)" && \ + echo "x86-64-vnni512 > x86 64-bit with vnni 512bit support" && \ + echo "x86-64-vnni256 > x86 64-bit with vnni 512bit support, limit operands to 256bit wide" && \ + echo "x86-64-avx512 > x86 64-bit with avx512 support" && \ + echo "x86-64-avxvnni > x86 64-bit with vnni 256bit support" && \ + echo "x86-64-bmi2 > x86 64-bit with bmi2 support" && \ + echo "x86-64-avx2 > x86 64-bit with avx2 support" && \ + echo "x86-64-sse41-popcnt > x86 64-bit with sse41 and popcnt support" && \ + echo "x86-64-modern > deprecated, currently x86-64-sse41-popcnt" && \ + echo "x86-64-ssse3 > x86 64-bit with ssse3 support" && \ + echo "x86-64-sse3-popcnt > x86 64-bit with sse3 compile and popcnt support" && \ + echo "x86-64 > x86 64-bit generic (with sse2 support)" && \ + echo "x86-32-sse41-popcnt > x86 32-bit with sse41 and popcnt support" && \ + echo "x86-32-sse2 > x86 32-bit with sse2 support" && \ + echo "x86-32 > x86 32-bit generic (with mmx compile support)" && \ + echo "ppc-64 > PPC 64-bit" && \ + echo "ppc-64-altivec > PPC 64-bit with altivec support" && \ + echo "ppc-64-vsx > PPC 64-bit with vsx support" && \ + echo "ppc-32 > PPC 32-bit" && \ + echo "armv7 > ARMv7 32-bit" && \ + echo "armv7-neon > ARMv7 32-bit with popcnt and neon" && \ + echo "armv8 > ARMv8 64-bit with popcnt and neon" && \ + echo "armv8-dotprod > ARMv8 64-bit with popcnt, neon and dot product support" && \ + echo "e2k > Elbrus 2000" && \ + echo "apple-silicon > Apple silicon ARM64" && \ + echo "general-64 > unspecified 64-bit" && \ + echo "general-32 > unspecified 32-bit" && \ + echo "riscv64 > RISC-V 64-bit" && \ + echo "loongarch64 > LoongArch 64-bit" && \ + echo "loongarch64-lsx > LoongArch 64-bit with SIMD eXtension" && \ + echo "loongarch64-lasx > LoongArch 64-bit with Advanced SIMD eXtension" && \ + echo "" && \ + echo "Supported compilers:" && \ + echo "" && \ + echo "gcc > GNU compiler (default)" && \ + echo "mingw > GNU compiler with MinGW under Windows" && \ + echo "clang > LLVM Clang compiler" && \ + echo "icx > Intel oneAPI DPC++/C++ Compiler" && \ + echo "ndk > Google NDK to cross-compile for Android" && \ + echo "" && \ + echo "Simple examples. If you don't know what to do, you likely want to run one of: " && \ + echo "" && \ + echo "make -j profile-build ARCH=x86-64-avx2 # typically a fast compile for common systems " && \ + echo "make -j profile-build ARCH=x86-64-sse41-popcnt # A more portable compile for 64-bit systems " && \ + echo "make -j profile-build ARCH=x86-64 # A portable compile for 64-bit systems " && \ + echo "" && \ + echo "Advanced examples, for experienced users: " && \ + echo "" && \ + echo "make -j profile-build ARCH=x86-64-avxvnni" && \ + echo "make -j profile-build ARCH=x86-64-avxvnni COMP=gcc COMPCXX=g++-12.0" && \ + echo "make -j build ARCH=x86-64-ssse3 COMP=clang" && \ + echo "" ifneq ($(SUPPORTED_ARCH), true) @echo "Specify a supported architecture with the ARCH option for more details" @echo "" @@ -1000,69 +1000,71 @@ all: $(EXE) .depend config-sanity: net @echo "" - @echo "Config:" - @echo "debug: '$(debug)'" - @echo "sanitize: '$(sanitize)'" - @echo "optimize: '$(optimize)'" - @echo "arch: '$(arch)'" - @echo "bits: '$(bits)'" - @echo "kernel: '$(KERNEL)'" - @echo "os: '$(OS)'" - @echo "prefetch: '$(prefetch)'" - @echo "popcnt: '$(popcnt)'" - @echo "pext: '$(pext)'" - @echo "sse: '$(sse)'" - @echo "mmx: '$(mmx)'" - @echo "sse2: '$(sse2)'" - @echo "ssse3: '$(ssse3)'" - @echo "sse41: '$(sse41)'" - @echo "avx2: '$(avx2)'" - @echo "avxvnni: '$(avxvnni)'" - @echo "avx512: '$(avx512)'" - @echo "vnni256: '$(vnni256)'" - @echo "vnni512: '$(vnni512)'" - @echo "altivec: '$(altivec)'" - @echo "vsx: '$(vsx)'" - @echo "neon: '$(neon)'" - @echo "dotprod: '$(dotprod)'" - @echo "arm_version: '$(arm_version)'" - @echo "lsx: '$(lsx)'" - @echo "lasx: '$(lasx)'" - @echo "target_windows: '$(target_windows)'" - @echo "" - @echo "Flags:" - @echo "CXX: $(CXX)" - @echo "CXXFLAGS: $(CXXFLAGS)" - @echo "LDFLAGS: $(LDFLAGS)" - @echo "" - @echo "Testing config sanity. If this fails, try 'make help' ..." - @echo "" - @test "$(debug)" = "yes" || test "$(debug)" = "no" - @test "$(optimize)" = "yes" || test "$(optimize)" = "no" - @test "$(SUPPORTED_ARCH)" = "true" - @test "$(arch)" = "any" || test "$(arch)" = "x86_64" || test "$(arch)" = "i386" || \ + @echo "Config:" && \ + echo "debug: '$(debug)'" && \ + echo "sanitize: '$(sanitize)'" && \ + echo "optimize: '$(optimize)'" && \ + echo "arch: '$(arch)'" && \ + echo "bits: '$(bits)'" && \ + echo "kernel: '$(KERNEL)'" && \ + echo "os: '$(OS)'" && \ + echo "prefetch: '$(prefetch)'" && \ + echo "popcnt: '$(popcnt)'" && \ + echo "pext: '$(pext)'" && \ + echo "sse: '$(sse)'" && \ + echo "mmx: '$(mmx)'" && \ + echo "sse2: '$(sse2)'" && \ + echo "ssse3: '$(ssse3)'" && \ + echo "sse41: '$(sse41)'" && \ + echo "avx2: '$(avx2)'" && \ + echo "avxvnni: '$(avxvnni)'" && \ + echo "avx512: '$(avx512)'" && \ + echo "vnni256: '$(vnni256)'" && \ + echo "vnni512: '$(vnni512)'" && \ + echo "altivec: '$(altivec)'" && \ + echo "vsx: '$(vsx)'" && \ + echo "neon: '$(neon)'" && \ + echo "dotprod: '$(dotprod)'" && \ + echo "arm_version: '$(arm_version)'" && \ + echo "lsx: '$(lsx)'" && \ + echo "lasx: '$(lasx)'" && \ + echo "target_windows: '$(target_windows)'" && \ + echo "" && \ + echo "Flags:" && \ + echo "CXX: $(CXX)" && \ + echo "CXXFLAGS: $(CXXFLAGS)" && \ + echo "LDFLAGS: $(LDFLAGS)" && \ + echo "" && \ + echo "Testing config sanity. If this fails, try 'make help' ..." && \ + echo "" && \ + (test "$(debug)" = "yes" || test "$(debug)" = "no") && \ + (test "$(optimize)" = "yes" || test "$(optimize)" = "no") && \ + (test "$(SUPPORTED_ARCH)" = "true") && \ + (test "$(arch)" = "any" || test "$(arch)" = "x86_64" || test "$(arch)" = "i386" || \ test "$(arch)" = "ppc64" || test "$(arch)" = "ppc" || test "$(arch)" = "e2k" || \ - test "$(arch)" = "armv7" || test "$(arch)" = "armv8" || test "$(arch)" = "arm64" || test "$(arch)" = "riscv64" || test "$(arch)" = "loongarch64" - @test "$(bits)" = "32" || test "$(bits)" = "64" - @test "$(prefetch)" = "yes" || test "$(prefetch)" = "no" - @test "$(popcnt)" = "yes" || test "$(popcnt)" = "no" - @test "$(pext)" = "yes" || test "$(pext)" = "no" - @test "$(sse)" = "yes" || test "$(sse)" = "no" - @test "$(mmx)" = "yes" || test "$(mmx)" = "no" - @test "$(sse2)" = "yes" || test "$(sse2)" = "no" - @test "$(ssse3)" = "yes" || test "$(ssse3)" = "no" - @test "$(sse41)" = "yes" || test "$(sse41)" = "no" - @test "$(avx2)" = "yes" || test "$(avx2)" = "no" - @test "$(avx512)" = "yes" || test "$(avx512)" = "no" - @test "$(vnni256)" = "yes" || test "$(vnni256)" = "no" - @test "$(vnni512)" = "yes" || test "$(vnni512)" = "no" - @test "$(altivec)" = "yes" || test "$(altivec)" = "no" - @test "$(vsx)" = "yes" || test "$(vsx)" = "no" - @test "$(neon)" = "yes" || test "$(neon)" = "no" - @test "$(lsx)" = "yes" || test "$(lsx)" = "no" - @test "$(lasx)" = "yes" || test "$(lasx)" = "no" - @test "$(comp)" = "gcc" || test "$(comp)" = "icx" || test "$(comp)" = "mingw" || test "$(comp)" = "clang" \ - || test "$(comp)" = "armv7a-linux-androideabi16-clang" || test "$(comp)" = "aarch64-linux-android21-clang" + test "$(arch)" = "armv7" || test "$(arch)" = "armv8" || test "$(arch)" = "arm64" || \ + test "$(arch)" = "riscv64" || test "$(arch)" = "loongarch64") && \ + (test "$(bits)" = "32" || test "$(bits)" = "64") && \ + (test "$(prefetch)" = "yes" || test "$(prefetch)" = "no") && \ + (test "$(popcnt)" = "yes" || test "$(popcnt)" = "no") && \ + (test "$(pext)" = "yes" || test "$(pext)" = "no") && \ + (test "$(sse)" = "yes" || test "$(sse)" = "no") && \ + (test "$(mmx)" = "yes" || test "$(mmx)" = "no") && \ + (test "$(sse2)" = "yes" || test "$(sse2)" = "no") && \ + (test "$(ssse3)" = "yes" || test "$(ssse3)" = "no") && \ + (test "$(sse41)" = "yes" || test "$(sse41)" = "no") && \ + (test "$(avx2)" = "yes" || test "$(avx2)" = "no") && \ + (test "$(avx512)" = "yes" || test "$(avx512)" = "no") && \ + (test "$(vnni256)" = "yes" || test "$(vnni256)" = "no") && \ + (test "$(vnni512)" = "yes" || test "$(vnni512)" = "no") && \ + (test "$(altivec)" = "yes" || test "$(altivec)" = "no") && \ + (test "$(vsx)" = "yes" || test "$(vsx)" = "no") && \ + (test "$(neon)" = "yes" || test "$(neon)" = "no") && \ + (test "$(lsx)" = "yes" || test "$(lsx)" = "no") && \ + (test "$(lasx)" = "yes" || test "$(lasx)" = "no") && \ + (test "$(comp)" = "gcc" || test "$(comp)" = "icx" || test "$(comp)" = "mingw" || \ + test "$(comp)" = "clang" || test "$(comp)" = "armv7a-linux-androideabi16-clang" || \ + test "$(comp)" = "aarch64-linux-android21-clang") $(EXE): $(OBJS) +$(CXX) -o $@ $(OBJS) $(LDFLAGS) From 8ef403c7869b2d3b7e480cedae97e97d3b271f56 Mon Sep 17 00:00:00 2001 From: Michael Chaly Date: Mon, 21 Oct 2024 10:42:31 +0300 Subject: [PATCH 281/315] Small cleanup for stats adjustments After some simplifications bonuses and maluses are the same for quiet and non-quiet moves so it makes no sense to use quietMoveBonus/Malus, instead use just bonus/malus. closes https://github.com/official-stockfish/Stockfish/pull/5649 No functional change --- src/search.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index c78acb6c..8a7bd810 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1791,35 +1791,35 @@ void update_all_stats(const Position& pos, Piece moved_piece = pos.moved_piece(bestMove); PieceType captured; - int quietMoveBonus = stat_bonus(depth); - int quietMoveMalus = stat_malus(depth); + int bonus = stat_bonus(depth); + int malus = stat_malus(depth); if (!pos.capture_stage(bestMove)) { - update_quiet_histories(pos, ss, workerThread, bestMove, quietMoveBonus); + update_quiet_histories(pos, ss, workerThread, bestMove, bonus); // Decrease stats for all non-best quiet moves for (Move move : quietsSearched) - update_quiet_histories(pos, ss, workerThread, move, -quietMoveMalus); + update_quiet_histories(pos, ss, workerThread, move, -malus); } else { // Increase stats for the best move in case it was a capture move captured = type_of(pos.piece_on(bestMove.to_sq())); - captureHistory[moved_piece][bestMove.to_sq()][captured] << quietMoveBonus; + captureHistory[moved_piece][bestMove.to_sq()][captured] << bonus; } // Extra penalty for a quiet early move that was not a TT move in // previous ply when it gets refuted. if (prevSq != SQ_NONE && ((ss - 1)->moveCount == 1 + (ss - 1)->ttHit) && !pos.captured_piece()) - update_continuation_histories(ss - 1, pos.piece_on(prevSq), prevSq, -quietMoveMalus); + update_continuation_histories(ss - 1, pos.piece_on(prevSq), prevSq, -malus); // Decrease stats for all non-best capture moves for (Move move : capturesSearched) { moved_piece = pos.moved_piece(move); captured = type_of(pos.piece_on(move.to_sq())); - captureHistory[moved_piece][move.to_sq()][captured] << -quietMoveMalus; + captureHistory[moved_piece][move.to_sq()][captured] << -malus; } } From 4a9c980f3bb666648054a9710ec0346561229312 Mon Sep 17 00:00:00 2001 From: Shawn Xu Date: Tue, 22 Oct 2024 14:57:07 -0700 Subject: [PATCH 282/315] Template Corrhist Avoids duplication of `using ... = Stats;` closes https://github.com/official-stockfish/Stockfish/pull/5650 No functional change Co-authored-by: Disservin --- src/movepick.h | 41 ++++++++++++++++++++++------------------- src/search.h | 36 ++++++++++++++++++------------------ 2 files changed, 40 insertions(+), 37 deletions(-) diff --git a/src/movepick.h b/src/movepick.h index 6ad13397..dff09f79 100644 --- a/src/movepick.h +++ b/src/movepick.h @@ -146,9 +146,6 @@ using CapturePieceToHistory = Stats; -// PieceToCorrectionHistory is addressed by a move's [piece][to] -using PieceToCorrectionHistory = Stats; - // ContinuationHistory is the combined history of a given pair of moves, usually // the current one given a previous one. The nested history table is based on // PieceToHistory instead of ButterflyBoards. @@ -162,26 +159,32 @@ using PawnHistory = Stats // positions and their search score. It is used to improve the static evaluation // used by some search heuristics. // see https://www.chessprogramming.org/Static_Evaluation_Correction_History +enum CorrHistType { + Pawn, // By color and pawn structure + Major, // By color and positions of major pieces (Queen, Rook) and King + Minor, // By color and positions of minor pieces (Knight, Bishop) and King + NonPawn, // By color and non-pawn material positions + PieceTo, // By [piece][to] move + Continuation, // Combined history of move pairs +}; -// PawnCorrectionHistory is addressed by color and pawn structure -using PawnCorrectionHistory = - Stats; +template +struct CorrHistTypedef { + using type = Stats; +}; -// MajorPieceCorrectionHistory is addressed by color and king/major piece (Queen, Rook) positions -using MajorPieceCorrectionHistory = - Stats; +template<> +struct CorrHistTypedef { + using type = Stats; +}; -// MinorPieceCorrectionHistory is addressed by color and king/minor piece (Knight, Bishop) positions -using MinorPieceCorrectionHistory = - Stats; +template<> +struct CorrHistTypedef { + using type = Stats::type, NOT_USED, PIECE_NB, SQUARE_NB>; +}; -// NonPawnCorrectionHistory is addressed by color and non-pawn material positions -using NonPawnCorrectionHistory = - Stats; - -// ContinuationCorrectionHistory is the combined correction history of a given pair of moves -using ContinuationCorrectionHistory = - Stats; +template +using CorrectionHistory = typename CorrHistTypedef::type; // The MovePicker class is used to pick one pseudo-legal move at a time from the // current position. The most important method is next_move(), which emits one diff --git a/src/search.h b/src/search.h index b599da11..751a3984 100644 --- a/src/search.h +++ b/src/search.h @@ -61,19 +61,19 @@ namespace Search { // shallower and deeper in the tree during the search. Each search thread has // its own array of Stack objects, indexed by the current ply. struct Stack { - Move* pv; - PieceToHistory* continuationHistory; - PieceToCorrectionHistory* continuationCorrectionHistory; - int ply; - Move currentMove; - Move excludedMove; - Value staticEval; - int statScore; - int moveCount; - bool inCheck; - bool ttPv; - bool ttHit; - int cutoffCnt; + Move* pv; + PieceToHistory* continuationHistory; + CorrectionHistory* continuationCorrectionHistory; + int ply; + Move currentMove; + Move excludedMove; + Value staticEval; + int statScore; + int moveCount; + bool inCheck; + bool ttPv; + bool ttHit; + int cutoffCnt; }; @@ -286,11 +286,11 @@ class Worker { ContinuationHistory continuationHistory[2][2]; PawnHistory pawnHistory; - PawnCorrectionHistory pawnCorrectionHistory; - MajorPieceCorrectionHistory majorPieceCorrectionHistory; - MinorPieceCorrectionHistory minorPieceCorrectionHistory; - NonPawnCorrectionHistory nonPawnCorrectionHistory[COLOR_NB]; - ContinuationCorrectionHistory continuationCorrectionHistory; + CorrectionHistory pawnCorrectionHistory; + CorrectionHistory majorPieceCorrectionHistory; + CorrectionHistory minorPieceCorrectionHistory; + CorrectionHistory nonPawnCorrectionHistory[COLOR_NB]; + CorrectionHistory continuationCorrectionHistory; private: void iterative_deepening(); From 8681d3c2b38096120829c2fb47acaeb32b2fbf8b Mon Sep 17 00:00:00 2001 From: FauziAkram Date: Fri, 25 Oct 2024 15:28:10 +0300 Subject: [PATCH 283/315] Simplify Time Management Formula Decreasing the number of operations Passed STC: LLR: 2.97 (-2.94,2.94) <-1.75,0.25> Total: 38880 W: 10038 L: 9823 D: 19019 Ptnml(0-2): 92, 4334, 10395, 4505, 114 https://tests.stockfishchess.org/tests/view/67112bf586d5ee47d953c6be Passed LTC: LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 242844 W: 61425 L: 61431 D: 119988 Ptnml(0-2): 145, 25175, 70797, 25151, 154 https://tests.stockfishchess.org/tests/view/6712387486d5ee47d953c737 closes https://github.com/official-stockfish/Stockfish/pull/5655 Bench: 1281912 --- src/search.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 8a7bd810..c7a8c28b 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -443,9 +443,9 @@ void Search::Worker::iterative_deepening() { { int nodesEffort = rootMoves[0].effort * 100 / std::max(size_t(1), size_t(nodes)); - double fallingEval = (1067 + 223 * (mainThread->bestPreviousAverageScore - bestValue) - + 97 * (mainThread->iterValue[iterIdx] - bestValue)) - / 10000.0; + double fallingEval = (11 + 2 * (mainThread->bestPreviousAverageScore - bestValue) + + (mainThread->iterValue[iterIdx] - bestValue)) + / 100.0; fallingEval = std::clamp(fallingEval, 0.580, 1.667); // If the bestMove is stable over several iterations, reduce time accordingly From 24c57793e1917b2110d1e3ce8edc634f43eadc67 Mon Sep 17 00:00:00 2001 From: MinetaS Date: Wed, 30 Oct 2024 21:30:21 +0900 Subject: [PATCH 284/315] Remove moveCountPruning in search.cpp The definition of moveCountPruning may cause confusion by implying that the variable is unconstrained. However, once it is set to true, it should not be reset to false, otherwise it would break the internal logic of MovePicker. Several patches have overlooked this constraint. For example: https://tests.stockfishchess.org/tests/view/671e7c0486d5ee47d953d226 https://tests.stockfishchess.org/tests/view/66a1de7b4ff211be9d4eccea The implementation approach was suggested by Disservin. Passed non-regression STC: LLR: 3.02 (-2.94,2.94) <-1.75,0.25> Total: 180672 W: 47072 L: 47006 D: 86594 Ptnml(0-2): 536, 19482, 50247, 19522, 549 https://tests.stockfishchess.org/tests/view/6720df6f86d5ee47d953d542 closes https://github.com/official-stockfish/Stockfish/pull/5661 No functional change --- src/movepick.cpp | 4 +++- src/movepick.h | 4 +++- src/search.cpp | 8 ++++---- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/movepick.cpp b/src/movepick.cpp index 06495189..2a1fb837 100644 --- a/src/movepick.cpp +++ b/src/movepick.cpp @@ -216,7 +216,7 @@ Move MovePicker::select(Pred filter) { // This is the most important method of the MovePicker class. We emit one // new pseudo-legal move on every call until there are no more moves left, // picking the move with the highest score from a list of generated moves. -Move MovePicker::next_move(bool skipQuiets) { +Move MovePicker::next_move() { auto quiet_threshold = [](Depth d) { return -3560 * d; }; @@ -322,4 +322,6 @@ top: return Move::none(); // Silence warning } +void MovePicker::skip_quiet_moves() { skipQuiets = true; } + } // namespace Stockfish diff --git a/src/movepick.h b/src/movepick.h index dff09f79..f8f84d02 100644 --- a/src/movepick.h +++ b/src/movepick.h @@ -212,7 +212,8 @@ class MovePicker { const PawnHistory*, int); MovePicker(const Position&, Move, int, const CapturePieceToHistory*); - Move next_move(bool skipQuiets = false); + Move next_move(); + void skip_quiet_moves(); private: template @@ -234,6 +235,7 @@ class MovePicker { int threshold; Depth depth; int ply; + bool skipQuiets = false; ExtMove moves[MAX_MOVES]; }; diff --git a/src/search.cpp b/src/search.cpp index c7a8c28b..4864057c 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -942,12 +942,11 @@ moves_loop: // When in check, search starts here value = bestValue; - int moveCount = 0; - bool moveCountPruning = false; + int moveCount = 0; // Step 13. Loop through all pseudo-legal moves until no moves remain // or a beta cutoff occurs. - while ((move = mp.next_move(moveCountPruning)) != Move::none()) + while ((move = mp.next_move()) != Move::none()) { assert(move.is_ok()); @@ -993,7 +992,8 @@ moves_loop: // When in check, search starts here if (!rootNode && pos.non_pawn_material(us) && bestValue > VALUE_TB_LOSS_IN_MAX_PLY) { // Skip quiet moves if movecount exceeds our FutilityMoveCount threshold (~8 Elo) - moveCountPruning = moveCount >= futility_move_count(improving, depth); + if (moveCount >= futility_move_count(improving, depth)) + mp.skip_quiet_moves(); // Reduced depth of the next LMR search int lmrDepth = newDepth - r; From 16fee2a7da25c6d0267930eb9677862cb1f009c7 Mon Sep 17 00:00:00 2001 From: mstembera Date: Wed, 23 Oct 2024 03:37:32 -0700 Subject: [PATCH 285/315] Cleanup TT::hashfull() closes https://github.com/official-stockfish/Stockfish/pull/5651 No functional change --- src/tt.cpp | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/src/tt.cpp b/src/tt.cpp index 50750753..75689562 100644 --- a/src/tt.cpp +++ b/src/tt.cpp @@ -194,19 +194,12 @@ void TranspositionTable::clear(ThreadPool& threads) { // occupation during a search. The hash is x permill full, as per UCI protocol. // Only counts entries which match the current generation. int TranspositionTable::hashfull(int maxAge) const { - int cnt = 0; + int maxAgeInternal = maxAge << GENERATION_BITS; + int cnt = 0; for (int i = 0; i < 1000; ++i) for (int j = 0; j < ClusterSize; ++j) - { - if (table[i].entry[j].is_occupied()) - { - int age = (generation8 >> GENERATION_BITS) - - ((table[i].entry[j].genBound8 & GENERATION_MASK) >> GENERATION_BITS); - if (age < 0) - age += 1 << (8 - GENERATION_BITS); - cnt += age <= maxAge; - } - } + cnt += table[i].entry[j].is_occupied() + && table[i].entry[j].relative_age(generation8) <= maxAgeInternal; return cnt / ClusterSize; } From c2611efe5c317969b583a5ff81352439f905e722 Mon Sep 17 00:00:00 2001 From: Shawn Xu Date: Thu, 24 Oct 2024 13:16:49 -0700 Subject: [PATCH 286/315] Move history code to a separate header Since no correction histories are ever used inside Movepick, and many existing histories are closely integrated into search, it might be more logical to separate them into their own header. PR based on #5650 closes https://github.com/official-stockfish/Stockfish/pull/5652 No functional change --- src/Makefile | 2 +- src/history.h | 185 +++++++++++++++++++++++++++++++++++++++++++++++ src/movepick.cpp | 2 + src/movepick.h | 163 +---------------------------------------- src/search.cpp | 1 + src/search.h | 2 +- 6 files changed, 192 insertions(+), 163 deletions(-) create mode 100644 src/history.h diff --git a/src/Makefile b/src/Makefile index 4307b7c7..e7f8ce55 100644 --- a/src/Makefile +++ b/src/Makefile @@ -57,7 +57,7 @@ SRCS = benchmark.cpp bitboard.cpp evaluate.cpp main.cpp \ search.cpp thread.cpp timeman.cpp tt.cpp uci.cpp ucioption.cpp tune.cpp syzygy/tbprobe.cpp \ nnue/nnue_misc.cpp nnue/features/half_ka_v2_hm.cpp nnue/network.cpp engine.cpp score.cpp memory.cpp -HEADERS = benchmark.h bitboard.h evaluate.h misc.h movegen.h movepick.h \ +HEADERS = benchmark.h bitboard.h evaluate.h misc.h movegen.h movepick.h history.h \ nnue/nnue_misc.h nnue/features/half_ka_v2_hm.h nnue/layers/affine_transform.h \ nnue/layers/affine_transform_sparse_input.h nnue/layers/clipped_relu.h nnue/layers/simd.h \ nnue/layers/sqr_clipped_relu.h nnue/nnue_accumulator.h nnue/nnue_architecture.h \ diff --git a/src/history.h b/src/history.h new file mode 100644 index 00000000..8d14a7a7 --- /dev/null +++ b/src/history.h @@ -0,0 +1,185 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifndef HISTORY_H_INCLUDED +#define HISTORY_H_INCLUDED + +#include +#include +#include +#include +#include +#include +#include +#include // IWYU pragma: keep + +#include "position.h" + +namespace Stockfish { + +constexpr int PAWN_HISTORY_SIZE = 512; // has to be a power of 2 +constexpr int CORRECTION_HISTORY_SIZE = 32768; // has to be a power of 2 +constexpr int CORRECTION_HISTORY_LIMIT = 1024; +constexpr int LOW_PLY_HISTORY_SIZE = 4; + +static_assert((PAWN_HISTORY_SIZE & (PAWN_HISTORY_SIZE - 1)) == 0, + "PAWN_HISTORY_SIZE has to be a power of 2"); + +static_assert((CORRECTION_HISTORY_SIZE & (CORRECTION_HISTORY_SIZE - 1)) == 0, + "CORRECTION_HISTORY_SIZE has to be a power of 2"); + +enum PawnHistoryType { + Normal, + Correction +}; + +template +inline int pawn_structure_index(const Position& pos) { + return pos.pawn_key() & ((T == Normal ? PAWN_HISTORY_SIZE : CORRECTION_HISTORY_SIZE) - 1); +} + +inline int major_piece_index(const Position& pos) { + return pos.major_piece_key() & (CORRECTION_HISTORY_SIZE - 1); +} + +inline int minor_piece_index(const Position& pos) { + return pos.minor_piece_key() & (CORRECTION_HISTORY_SIZE - 1); +} + +template +inline int non_pawn_index(const Position& pos) { + return pos.non_pawn_key(c) & (CORRECTION_HISTORY_SIZE - 1); +} + +// StatsEntry stores the stat table value. It is usually a number but could +// be a move or even a nested history. We use a class instead of a naked value +// to directly call history update operator<<() on the entry so to use stats +// tables at caller sites as simple multi-dim arrays. +template +class StatsEntry { + + T entry; + + public: + void operator=(const T& v) { entry = v; } + T* operator&() { return &entry; } + T* operator->() { return &entry; } + operator const T&() const { return entry; } + + void operator<<(int bonus) { + static_assert(D <= std::numeric_limits::max(), "D overflows T"); + + // Make sure that bonus is in range [-D, D] + int clampedBonus = std::clamp(bonus, -D, D); + entry += clampedBonus - entry * std::abs(clampedBonus) / D; + + assert(std::abs(entry) <= D); + } +}; + +// Stats is a generic N-dimensional array used to store various statistics. +// The first template parameter T is the base type of the array, and the second +// template parameter D limits the range of updates in [-D, D] when we update +// values with the << operator, while the last parameters (Size and Sizes) +// encode the dimensions of the array. +template +struct Stats: public std::array, Size> { + using stats = Stats; + + void fill(const T& v) { + + // For standard-layout 'this' points to the first struct member + assert(std::is_standard_layout_v); + + using entry = StatsEntry; + entry* p = reinterpret_cast(this); + std::fill(p, p + sizeof(*this) / sizeof(entry), v); + } +}; + +template +struct Stats: public std::array, Size> {}; + +// In stats table, D=0 means that the template parameter is not used +enum StatsParams { + NOT_USED = 0 +}; +enum StatsType { + NoCaptures, + Captures +}; + +// ButterflyHistory records how often quiet moves have been successful or unsuccessful +// during the current search, and is used for reduction and move ordering decisions. +// It uses 2 tables (one for each color) indexed by the move's from and to squares, +// see https://www.chessprogramming.org/Butterfly_Boards (~11 elo) +using ButterflyHistory = Stats; + +// LowPlyHistory is adressed by play and move's from and to squares, used +// to improve move ordering near the root +using LowPlyHistory = Stats; + +// CapturePieceToHistory is addressed by a move's [piece][to][captured piece type] +using CapturePieceToHistory = Stats; + +// PieceToHistory is like ButterflyHistory but is addressed by a move's [piece][to] +using PieceToHistory = Stats; + +// ContinuationHistory is the combined history of a given pair of moves, usually +// the current one given a previous one. The nested history table is based on +// PieceToHistory instead of ButterflyBoards. +// (~63 elo) +using ContinuationHistory = Stats; + +// PawnHistory is addressed by the pawn structure and a move's [piece][to] +using PawnHistory = Stats; + +// Correction histories record differences between the static evaluation of +// positions and their search score. It is used to improve the static evaluation +// used by some search heuristics. +// see https://www.chessprogramming.org/Static_Evaluation_Correction_History +enum CorrHistType { + Pawn, // By color and pawn structure + Major, // By color and positions of major pieces (Queen, Rook) and King + Minor, // By color and positions of minor pieces (Knight, Bishop) and King + NonPawn, // By color and non-pawn material positions + PieceTo, // By [piece][to] move + Continuation, // Combined history of move pairs +}; + +template +struct CorrHistTypedef { + using type = Stats; +}; + +template<> +struct CorrHistTypedef { + using type = Stats; +}; + +template<> +struct CorrHistTypedef { + using type = Stats::type, NOT_USED, PIECE_NB, SQUARE_NB>; +}; + +template +using CorrectionHistory = typename CorrHistTypedef::type; + +} // namespace Stockfish + +#endif // #ifndef HISTORY_H_INCLUDED diff --git a/src/movepick.cpp b/src/movepick.cpp index 2a1fb837..720f2e03 100644 --- a/src/movepick.cpp +++ b/src/movepick.cpp @@ -19,7 +19,9 @@ #include "movepick.h" #include +#include #include +#include #include #include "bitboard.h" diff --git a/src/movepick.h b/src/movepick.h index f8f84d02..0278b70e 100644 --- a/src/movepick.h +++ b/src/movepick.h @@ -19,172 +19,13 @@ #ifndef MOVEPICK_H_INCLUDED #define MOVEPICK_H_INCLUDED -#include -#include -#include -#include -#include -#include -#include -#include // IWYU pragma: keep - +#include "history.h" #include "movegen.h" -#include "position.h" #include "types.h" namespace Stockfish { -constexpr int PAWN_HISTORY_SIZE = 512; // has to be a power of 2 -constexpr int CORRECTION_HISTORY_SIZE = 32768; // has to be a power of 2 -constexpr int CORRECTION_HISTORY_LIMIT = 1024; -constexpr int LOW_PLY_HISTORY_SIZE = 4; - -static_assert((PAWN_HISTORY_SIZE & (PAWN_HISTORY_SIZE - 1)) == 0, - "PAWN_HISTORY_SIZE has to be a power of 2"); - -static_assert((CORRECTION_HISTORY_SIZE & (CORRECTION_HISTORY_SIZE - 1)) == 0, - "CORRECTION_HISTORY_SIZE has to be a power of 2"); - -enum PawnHistoryType { - Normal, - Correction -}; - -template -inline int pawn_structure_index(const Position& pos) { - return pos.pawn_key() & ((T == Normal ? PAWN_HISTORY_SIZE : CORRECTION_HISTORY_SIZE) - 1); -} - -inline int material_index(const Position& pos) { - return pos.material_key() & (CORRECTION_HISTORY_SIZE - 1); -} - -inline int major_piece_index(const Position& pos) { - return pos.major_piece_key() & (CORRECTION_HISTORY_SIZE - 1); -} - -inline int minor_piece_index(const Position& pos) { - return pos.minor_piece_key() & (CORRECTION_HISTORY_SIZE - 1); -} - -template -inline int non_pawn_index(const Position& pos) { - return pos.non_pawn_key(c) & (CORRECTION_HISTORY_SIZE - 1); -} - -// StatsEntry stores the stat table value. It is usually a number but could -// be a move or even a nested history. We use a class instead of a naked value -// to directly call history update operator<<() on the entry so to use stats -// tables at caller sites as simple multi-dim arrays. -template -class StatsEntry { - - T entry; - - public: - void operator=(const T& v) { entry = v; } - T* operator&() { return &entry; } - T* operator->() { return &entry; } - operator const T&() const { return entry; } - - void operator<<(int bonus) { - static_assert(D <= std::numeric_limits::max(), "D overflows T"); - - // Make sure that bonus is in range [-D, D] - int clampedBonus = std::clamp(bonus, -D, D); - entry += clampedBonus - entry * std::abs(clampedBonus) / D; - - assert(std::abs(entry) <= D); - } -}; - -// Stats is a generic N-dimensional array used to store various statistics. -// The first template parameter T is the base type of the array, and the second -// template parameter D limits the range of updates in [-D, D] when we update -// values with the << operator, while the last parameters (Size and Sizes) -// encode the dimensions of the array. -template -struct Stats: public std::array, Size> { - using stats = Stats; - - void fill(const T& v) { - - // For standard-layout 'this' points to the first struct member - assert(std::is_standard_layout_v); - - using entry = StatsEntry; - entry* p = reinterpret_cast(this); - std::fill(p, p + sizeof(*this) / sizeof(entry), v); - } -}; - -template -struct Stats: public std::array, Size> {}; - -// In stats table, D=0 means that the template parameter is not used -enum StatsParams { - NOT_USED = 0 -}; -enum StatsType { - NoCaptures, - Captures -}; - -// ButterflyHistory records how often quiet moves have been successful or unsuccessful -// during the current search, and is used for reduction and move ordering decisions. -// It uses 2 tables (one for each color) indexed by the move's from and to squares, -// see https://www.chessprogramming.org/Butterfly_Boards (~11 elo) -using ButterflyHistory = Stats; - -// LowPlyHistory is adressed by play and move's from and to squares, used -// to improve move ordering near the root -using LowPlyHistory = Stats; - -// CapturePieceToHistory is addressed by a move's [piece][to][captured piece type] -using CapturePieceToHistory = Stats; - -// PieceToHistory is like ButterflyHistory but is addressed by a move's [piece][to] -using PieceToHistory = Stats; - -// ContinuationHistory is the combined history of a given pair of moves, usually -// the current one given a previous one. The nested history table is based on -// PieceToHistory instead of ButterflyBoards. -// (~63 elo) -using ContinuationHistory = Stats; - -// PawnHistory is addressed by the pawn structure and a move's [piece][to] -using PawnHistory = Stats; - -// Correction histories record differences between the static evaluation of -// positions and their search score. It is used to improve the static evaluation -// used by some search heuristics. -// see https://www.chessprogramming.org/Static_Evaluation_Correction_History -enum CorrHistType { - Pawn, // By color and pawn structure - Major, // By color and positions of major pieces (Queen, Rook) and King - Minor, // By color and positions of minor pieces (Knight, Bishop) and King - NonPawn, // By color and non-pawn material positions - PieceTo, // By [piece][to] move - Continuation, // Combined history of move pairs -}; - -template -struct CorrHistTypedef { - using type = Stats; -}; - -template<> -struct CorrHistTypedef { - using type = Stats; -}; - -template<> -struct CorrHistTypedef { - using type = Stats::type, NOT_USED, PIECE_NB, SQUARE_NB>; -}; - -template -using CorrectionHistory = typename CorrHistTypedef::type; +class Position; // The MovePicker class is used to pick one pseudo-legal move at a time from the // current position. The most important method is next_move(), which emits one diff --git a/src/search.cpp b/src/search.cpp index 4864057c..d6914da0 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -34,6 +34,7 @@ #include #include "evaluate.h" +#include "history.h" #include "misc.h" #include "movegen.h" #include "movepick.h" diff --git a/src/search.h b/src/search.h index 751a3984..b618855b 100644 --- a/src/search.h +++ b/src/search.h @@ -31,8 +31,8 @@ #include #include +#include "history.h" #include "misc.h" -#include "movepick.h" #include "nnue/network.h" #include "nnue/nnue_accumulator.h" #include "numa.h" From ecf5646f6e8446a3498aca04723dcee2b74f2d77 Mon Sep 17 00:00:00 2001 From: Michael Chaly Date: Fri, 1 Nov 2024 02:04:35 +0300 Subject: [PATCH 287/315] Refine definition of improving This patch also allows improving flag to be true if static evaluation of the position is good enough. Passed STC: https://tests.stockfishchess.org/tests/view/6720906086d5ee47d953d4d0 LLR: 2.94 (-2.94,2.94) <0.00,2.00> Total: 34816 W: 9172 L: 8858 D: 16786 Ptnml(0-2): 113, 3988, 8887, 4312, 108 Passed LTC: https://tests.stockfishchess.org/tests/view/6721162686d5ee47d953d597 LLR: 2.96 (-2.94,2.94) <0.50,2.50> Total: 145374 W: 37118 L: 36574 D: 71682 Ptnml(0-2): 91, 15875, 40212, 16417, 92 closes https://github.com/official-stockfish/Stockfish/pull/5662 Bench: 1518856 --- src/search.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/search.cpp b/src/search.cpp index d6914da0..1b9b745c 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -795,6 +795,8 @@ Value Search::Worker::search( && eval < VALUE_TB_WIN_IN_MAX_PLY) return beta + (eval - beta) / 3; + improving |= ss->staticEval >= beta + 100; + // Step 9. Null move search with verification search (~35 Elo) if (cutNode && (ss - 1)->currentMove != Move::null() && eval >= beta && ss->staticEval >= beta - 23 * depth + 400 && !excludedMove && pos.non_pawn_material(us) From 54cf226604cfc9d17f432fa0b5bca56277e5561c Mon Sep 17 00:00:00 2001 From: FauziAkram Date: Fri, 1 Nov 2024 13:54:50 +0300 Subject: [PATCH 288/315] Revert VLTC regression from #5634 https://tests.stockfishchess.org/tests/view/671bf61b86d5ee47d953cf23 And thanks to @xu-shawn for suggesting running a VLTC regress test since depth modifications affect scaling. Also, the LTC was showing a slight regress after 680+k games ~= -0.34 , for reference: https://tests.stockfishchess.org/tests/view/67042b1f86d5ee47d953be7c closes https://github.com/official-stockfish/Stockfish/pull/5663 Bench: 1307308 --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index 1b9b745c..5c6a62c8 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -850,7 +850,7 @@ Value Search::Worker::search( // For cutNodes, if depth is high enough, decrease depth by 2 if there is no ttMove, // or by 1 if there is a ttMove with an upper bound. if (cutNode && depth >= 7 && (!ttData.move || ttData.bound == BOUND_UPPER)) - depth -= 2; + depth -= 1 + !ttData.move; // Step 11. ProbCut (~10 Elo) // If we have a good enough capture (or queen promotion) and a reduced search From f77bac3dcab84a31238289ade55f9d85b650ac1a Mon Sep 17 00:00:00 2001 From: mstembera Date: Sun, 3 Nov 2024 16:50:47 -0800 Subject: [PATCH 289/315] Remove stale Cache::clear() method closes https://github.com/official-stockfish/Stockfish/pull/5666 No functional change --- src/nnue/nnue_accumulator.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/nnue/nnue_accumulator.h b/src/nnue/nnue_accumulator.h index b8dcf1e4..b92901e4 100644 --- a/src/nnue/nnue_accumulator.h +++ b/src/nnue/nnue_accumulator.h @@ -80,11 +80,6 @@ struct AccumulatorCaches { entry.clear(network.featureTransformer->biases); } - void clear(const BiasType* biases) { - for (auto& entry : entries) - entry.clear(biases); - } - std::array& operator[](Square sq) { return entries[sq]; } std::array, SQUARE_NB> entries; From cc5c67c564f52a0611ba38d04af02636291280b6 Mon Sep 17 00:00:00 2001 From: Shawn Xu Date: Sun, 27 Oct 2024 14:07:03 -0700 Subject: [PATCH 290/315] Introduce Fractional LMR Tuning Run (90k Games): https://tests.stockfishchess.org/tests/view/67202b1c86d5ee47d953d442 Passed STC: LLR: 2.94 (-2.94,2.94) <0.00,2.00> Total: 241024 W: 62616 L: 62001 D: 116407 Ptnml(0-2): 716, 28231, 62015, 28822, 728 https://tests.stockfishchess.org/tests/view/6725196786d5ee47d953d9f2 Passed LTC: LLR: 2.95 (-2.94,2.94) <0.50,2.50> Total: 92532 W: 23678 L: 23246 D: 45608 Ptnml(0-2): 45, 9981, 25797, 10383, 60 https://tests.stockfishchess.org/tests/view/6727d3cb86d5ee47d953db9d closes https://github.com/official-stockfish/Stockfish/pull/5667 Bench: 1066071 --- src/search.cpp | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 5c6a62c8..c807f1bd 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -999,7 +999,7 @@ moves_loop: // When in check, search starts here mp.skip_quiet_moves(); // Reduced depth of the next LMR search - int lmrDepth = newDepth - r; + int lmrDepth = newDepth - r / 1024; if (capture || givesCheck) { @@ -1156,36 +1156,36 @@ moves_loop: // When in check, search starts here // Decrease reduction if position is or has been on the PV (~7 Elo) if (ss->ttPv) - r -= 1 + (ttData.value > alpha) + (ttData.depth >= depth); + r -= 1024 + (ttData.value > alpha) * 1024 + (ttData.depth >= depth) * 1024; // Decrease reduction for PvNodes (~0 Elo on STC, ~2 Elo on LTC) if (PvNode) - r--; + r -= 1024; // These reduction adjustments have no proven non-linear scaling // Increase reduction for cut nodes (~4 Elo) if (cutNode) - r += 2 - (ttData.depth >= depth && ss->ttPv); + r += 2518 - (ttData.depth >= depth && ss->ttPv) * 991; // Increase reduction if ttMove is a capture but the current move is not a capture (~3 Elo) if (ttCapture && !capture) - r += 1 + (depth < 8); + r += 1043 + (depth < 8) * 999; // Increase reduction if next ply has a lot of fail high (~5 Elo) if ((ss + 1)->cutoffCnt > 3) - r += 1 + allNode; + r += 938 + allNode * 960; // For first picked move (ttMove) reduce reduction (~3 Elo) else if (move == ttData.move) - r -= 2; + r -= 1879; ss->statScore = 2 * thisThread->mainHistory[us][move.from_to()] + (*contHist[0])[movedPiece][move.to_sq()] + (*contHist[1])[movedPiece][move.to_sq()] - 4410; // Decrease/increase reduction for moves with a good/bad history (~8 Elo) - r -= ss->statScore / 11016; + r -= ss->statScore * 1287 / 16384; // Step 17. Late moves reduction / extension (LMR, ~117 Elo) if (depth >= 2 && moveCount > 1) @@ -1195,7 +1195,7 @@ moves_loop: // When in check, search starts here // beyond the first move depth. // To prevent problems when the max value is less than the min value, // std::clamp has been replaced by a more robust implementation. - Depth d = std::max(1, std::min(newDepth - r, newDepth + !allNode)); + Depth d = std::max(1, std::min(newDepth - r / 1024, newDepth + !allNode)); value = -search(pos, ss + 1, -(alpha + 1), -alpha, d, true); @@ -1223,10 +1223,11 @@ moves_loop: // When in check, search starts here { // Increase reduction if ttMove is not present (~6 Elo) if (!ttData.move) - r += 2; + r += 2037; // Note that if expected reduction is high, we reduce search depth by 1 here (~9 Elo) - value = -search(pos, ss + 1, -(alpha + 1), -alpha, newDepth - (r > 3), !cutNode); + value = + -search(pos, ss + 1, -(alpha + 1), -alpha, newDepth - (r > 2983), !cutNode); } // For PV nodes only, do a full PV search on the first move or after a fail high, @@ -1700,7 +1701,7 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta) Depth Search::Worker::reduction(bool i, Depth d, int mn, int delta) const { int reductionScale = reductions[d] * reductions[mn]; - return (reductionScale + 1239 - delta * 795 / rootDelta) / 1024 + (!i && reductionScale > 1341); + return (reductionScale + 1239 - delta * 795 / rootDelta) + (!i && reductionScale > 1341) * 1135; } // elapsed() returns the time elapsed since the search started. If the From 3d084e9164a96bff265b3afb32f2da0aa4e97c47 Mon Sep 17 00:00:00 2001 From: Muzhen Gaming <61100393+XInTheDark@users.noreply.github.com> Date: Wed, 13 Nov 2024 20:18:36 +0100 Subject: [PATCH 291/315] VVLTC Search Tune A single tuning run of 190k games was conducted: https://tests.stockfishchess.org/tests/view/670f3e3786d5ee47d953c554. Passed VVLTC 1st sprt: https://tests.stockfishchess.org/tests/view/672344dc86d5ee47d953d8c3 LLR: 2.94 (-2.94,2.94) <0.00,2.00> Total: 56768 W: 14615 L: 14323 D: 27830 Ptnml(0-2): 3, 5152, 17789, 5430, 10 Passed VVLTC 2nd sprt (rebased): https://tests.stockfishchess.org/tests/view/6726d83786d5ee47d953db03 LLR: 2.94 (-2.94,2.94) <0.50,2.50> Total: 57884 W: 14885 L: 14554 D: 28445 Ptnml(0-2): 5, 5300, 17999, 5635, 3 closes https://github.com/official-stockfish/Stockfish/pull/5669 Bench: 920336 --- src/search.cpp | 102 ++++++++++++++++++++++++------------------------- 1 file changed, 51 insertions(+), 51 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index c807f1bd..2a2331cb 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -67,7 +67,7 @@ namespace { // Futility margin Value futility_margin(Depth d, bool noTtCutNode, bool improving, bool oppWorsening) { - Value futilityMult = 118 - 33 * noTtCutNode; + Value futilityMult = 109 - 27 * noTtCutNode; Value improvingDeduction = improving * futilityMult * 2; Value worseningDeduction = oppWorsening * futilityMult / 3; @@ -94,16 +94,16 @@ Value to_corrected_static_eval(Value v, const Worker& w, const Position& pos, St cntcv = int((*(ss - 2)->continuationCorrectionHistory)[pos.piece_on(m.to_sq())][m.to_sq()]); const auto cv = - (5932 * pcv + 3269 * macv + 5660 * micv + 6666 * (wnpcv + bnpcv) + 5555 * cntcv) / 131072; + (6384 * pcv + 3583 * macv + 6492 * micv + 6725 * (wnpcv + bnpcv) + cntcv * 5880) / 131072; v += cv; return std::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1); } // History and stats update bonus, based on depth -int stat_bonus(Depth d) { return std::min(179 * d - 108, 1598); } +int stat_bonus(Depth d) { return std::min(168 * d - 100, 1718); } // History and stats update malus, based on depth -int stat_malus(Depth d) { return std::min(820 * d - 261, 2246); } +int stat_malus(Depth d) { return std::min(768 * d - 257, 2351); } // Add a small random component to draw evaluations to avoid 3-fold blindness Value value_draw(size_t nodes) { return VALUE_DRAW - 1 + Value(nodes & 0x2); } @@ -311,13 +311,13 @@ void Search::Worker::iterative_deepening() { selDepth = 0; // Reset aspiration window starting size - delta = 5 + std::abs(rootMoves[pvIdx].meanSquaredScore) / 13797; + delta = 5 + std::abs(rootMoves[pvIdx].meanSquaredScore) / 13461; Value avg = rootMoves[pvIdx].averageScore; alpha = std::max(avg - delta, -VALUE_INFINITE); beta = std::min(avg + delta, VALUE_INFINITE); // Adjust optimism based on root move's averageScore (~4 Elo) - optimism[us] = 132 * avg / (std::abs(avg) + 89); + optimism[us] = 150 * avg / (std::abs(avg) + 85); optimism[~us] = -optimism[us]; // Start with a small aspiration window and, in the case of a fail @@ -502,8 +502,8 @@ void Search::Worker::iterative_deepening() { void Search::Worker::clear() { mainHistory.fill(0); lowPlyHistory.fill(0); - captureHistory.fill(-753); - pawnHistory.fill(-1152); + captureHistory.fill(-758); + pawnHistory.fill(-1158); pawnCorrectionHistory.fill(0); majorPieceCorrectionHistory.fill(0); minorPieceCorrectionHistory.fill(0); @@ -518,10 +518,10 @@ void Search::Worker::clear() { for (StatsType c : {NoCaptures, Captures}) for (auto& to : continuationHistory[inCheck][c]) for (auto& h : to) - h->fill(-678); + h->fill(-645); for (size_t i = 1; i < reductions.size(); ++i) - reductions[i] = int((18.43 + std::log(size_t(options["Threads"])) / 2) * std::log(i)); + reductions[i] = int((19.43 + std::log(size_t(options["Threads"])) / 2) * std::log(i)); refreshTable.clear(networks[numaAccessToken]); } @@ -760,7 +760,7 @@ Value Search::Worker::search( // Use static evaluation difference to improve quiet move ordering (~9 Elo) if (((ss - 1)->currentMove).is_ok() && !(ss - 1)->inCheck && !priorCapture) { - int bonus = std::clamp(-10 * int((ss - 1)->staticEval + ss->staticEval), -1641, 1423) + 760; + int bonus = std::clamp(-10 * int((ss - 1)->staticEval + ss->staticEval), -1831, 1428) + 623; thisThread->mainHistory[~us][((ss - 1)->currentMove).from_to()] << bonus; if (type_of(pos.piece_on(prevSq)) != PAWN && ((ss - 1)->currentMove).type_of() != PROMOTION) thisThread->pawnHistory[pawn_structure_index(pos)][pos.piece_on(prevSq)][prevSq] @@ -778,7 +778,7 @@ Value Search::Worker::search( // Step 7. Razoring (~1 Elo) // If eval is really low, check with qsearch if we can exceed alpha. If the // search suggests we cannot exceed alpha, return a speculative fail low. - if (eval < alpha - 501 - 272 * depth * depth) + if (eval < alpha - 469 - 307 * depth * depth) { value = qsearch(pos, ss, alpha - 1, alpha); if (value < alpha && std::abs(value) < VALUE_TB_WIN_IN_MAX_PLY) @@ -787,9 +787,9 @@ Value Search::Worker::search( // Step 8. Futility pruning: child node (~40 Elo) // The depth condition is important for mate finding. - if (!ss->ttPv && depth < 13 + if (!ss->ttPv && depth < 14 && eval - futility_margin(depth, cutNode && !ss->ttHit, improving, opponentWorsening) - - (ss - 1)->statScore / 272 + - (ss - 1)->statScore / 290 >= beta && eval >= beta && (!ttData.move || ttCapture) && beta > VALUE_TB_LOSS_IN_MAX_PLY && eval < VALUE_TB_WIN_IN_MAX_PLY) @@ -799,13 +799,13 @@ Value Search::Worker::search( // Step 9. Null move search with verification search (~35 Elo) if (cutNode && (ss - 1)->currentMove != Move::null() && eval >= beta - && ss->staticEval >= beta - 23 * depth + 400 && !excludedMove && pos.non_pawn_material(us) + && ss->staticEval >= beta - 21 * depth + 421 && !excludedMove && pos.non_pawn_material(us) && ss->ply >= thisThread->nmpMinPly && beta > VALUE_TB_LOSS_IN_MAX_PLY) { assert(eval - beta >= 0); // Null move dynamic reduction based on depth and eval - Depth R = std::min(int(eval - beta) / 209, 6) + depth / 3 + 5; + Depth R = std::min(int(eval - beta) / 235, 7) + depth / 3 + 5; ss->currentMove = Move::null(); ss->continuationHistory = &thisThread->continuationHistory[0][0][NO_PIECE][0]; @@ -855,7 +855,7 @@ Value Search::Worker::search( // Step 11. ProbCut (~10 Elo) // If we have a good enough capture (or queen promotion) and a reduced search // returns a value much above beta, we can (almost) safely prune the previous move. - probCutBeta = beta + 189 - 53 * improving - 30 * opponentWorsening; + probCutBeta = beta + 187 - 53 * improving - 27 * opponentWorsening; if (!PvNode && depth > 3 && std::abs(beta) < VALUE_TB_WIN_IN_MAX_PLY // If value from transposition table is lower than probCutBeta, don't attempt @@ -926,7 +926,7 @@ Value Search::Worker::search( moves_loop: // When in check, search starts here // Step 12. A small Probcut idea (~4 Elo) - probCutBeta = beta + 379; + probCutBeta = beta + 417; if ((ttData.bound & BOUND_LOWER) && ttData.depth >= depth - 4 && ttData.value >= probCutBeta && std::abs(beta) < VALUE_TB_WIN_IN_MAX_PLY && std::abs(ttData.value) < VALUE_TB_WIN_IN_MAX_PLY) @@ -1010,15 +1010,15 @@ moves_loop: // When in check, search starts here // Futility pruning for captures (~2 Elo) if (!givesCheck && lmrDepth < 7 && !ss->inCheck) { - Value futilityValue = ss->staticEval + 300 + 238 * lmrDepth + Value futilityValue = ss->staticEval + 287 + 253 * lmrDepth + PieceValue[capturedPiece] + captHist / 7; if (futilityValue <= alpha) continue; } // SEE based pruning for captures and checks (~11 Elo) - int seeHist = std::clamp(captHist / 32, -159 * depth, 160 * depth); - if (!pos.see_ge(move, -167 * depth - seeHist)) + int seeHist = std::clamp(captHist / 33, -161 * depth, 156 * depth); + if (!pos.see_ge(move, -162 * depth - seeHist)) continue; } else @@ -1029,15 +1029,15 @@ moves_loop: // When in check, search starts here + thisThread->pawnHistory[pawn_structure_index(pos)][movedPiece][move.to_sq()]; // Continuation history based pruning (~2 Elo) - if (history < -4071 * depth) + if (history < -3884 * depth) continue; history += 2 * thisThread->mainHistory[us][move.from_to()]; - lmrDepth += history / 3653; + lmrDepth += history / 3609; Value futilityValue = - ss->staticEval + (bestValue < ss->staticEval - 51 ? 145 : 49) + 144 * lmrDepth; + ss->staticEval + (bestValue < ss->staticEval - 45 ? 140 : 43) + 141 * lmrDepth; // Futility pruning: parent node (~13 Elo) if (!ss->inCheck && lmrDepth < 12 && futilityValue <= alpha) @@ -1051,7 +1051,7 @@ moves_loop: // When in check, search starts here lmrDepth = std::max(lmrDepth, 0); // Prune moves with negative SEE (~4 Elo) - if (!pos.see_ge(move, -24 * lmrDepth * lmrDepth)) + if (!pos.see_ge(move, -25 * lmrDepth * lmrDepth)) continue; } } @@ -1074,11 +1074,11 @@ moves_loop: // When in check, search starts here // and lower extension margins scale well. if (!rootNode && move == ttData.move && !excludedMove - && depth >= 4 - (thisThread->completedDepth > 36) + ss->ttPv + && depth >= 4 - (thisThread->completedDepth > 33) + ss->ttPv && std::abs(ttData.value) < VALUE_TB_WIN_IN_MAX_PLY && (ttData.bound & BOUND_LOWER) && ttData.depth >= depth - 3) { - Value singularBeta = ttData.value - (54 + 77 * (ss->ttPv && !PvNode)) * depth / 64; + Value singularBeta = ttData.value - (56 + 79 * (ss->ttPv && !PvNode)) * depth / 64; Depth singularDepth = newDepth / 2; ss->excludedMove = move; @@ -1088,8 +1088,8 @@ moves_loop: // When in check, search starts here if (value < singularBeta) { - int doubleMargin = 262 * PvNode - 204 * !ttCapture; - int tripleMargin = 97 + 266 * PvNode - 255 * !ttCapture + 94 * ss->ttPv; + int doubleMargin = 249 * PvNode - 194 * !ttCapture; + int tripleMargin = 94 + 287 * PvNode - 249 * !ttCapture + 99 * ss->ttPv; extension = 1 + (value < singularBeta - doubleMargin) + (value < singularBeta - tripleMargin); @@ -1127,7 +1127,7 @@ moves_loop: // When in check, search starts here else if (PvNode && move.to_sq() == prevSq && thisThread->captureHistory[movedPiece][move.to_sq()] [type_of(pos.piece_on(move.to_sq()))] - > 4299) + > 4321) extension = 1; } @@ -1182,7 +1182,7 @@ moves_loop: // When in check, search starts here ss->statScore = 2 * thisThread->mainHistory[us][move.from_to()] + (*contHist[0])[movedPiece][move.to_sq()] - + (*contHist[1])[movedPiece][move.to_sq()] - 4410; + + (*contHist[1])[movedPiece][move.to_sq()] - 3996; // Decrease/increase reduction for moves with a good/bad history (~8 Elo) r -= ss->statScore * 1287 / 16384; @@ -1204,8 +1204,8 @@ moves_loop: // When in check, search starts here { // Adjust full-depth search based on LMR results - if the result was // good enough search deeper, if it was bad enough search shallower. - const bool doDeeperSearch = value > (bestValue + 38 + 2 * newDepth); // (~1 Elo) - const bool doShallowerSearch = value < bestValue + 8; // (~2 Elo) + const bool doDeeperSearch = value > (bestValue + 42 + 2 * newDepth); // (~1 Elo) + const bool doShallowerSearch = value < bestValue + 10; // (~2 Elo) newDepth += doDeeperSearch - doShallowerSearch; @@ -1377,29 +1377,29 @@ moves_loop: // When in check, search starts here // Bonus for prior countermove that caused the fail low else if (!priorCapture && prevSq != SQ_NONE) { - int bonus = (118 * (depth > 5) + 38 * !allNode + 169 * ((ss - 1)->moveCount > 8) - + 116 * (!ss->inCheck && bestValue <= ss->staticEval - 101) - + 133 * (!(ss - 1)->inCheck && bestValue <= -(ss - 1)->staticEval - 92)); + int bonus = (117 * (depth > 5) + 39 * !allNode + 168 * ((ss - 1)->moveCount > 8) + + 115 * (!ss->inCheck && bestValue <= ss->staticEval - 108) + + 119 * (!(ss - 1)->inCheck && bestValue <= -(ss - 1)->staticEval - 83)); // Proportional to "how much damage we have to undo" - bonus += std::min(-(ss - 1)->statScore / 102, 305); + bonus += std::min(-(ss - 1)->statScore / 113, 300); bonus = std::max(bonus, 0); update_continuation_histories(ss - 1, pos.piece_on(prevSq), prevSq, - stat_bonus(depth) * bonus / 107); + stat_bonus(depth) * bonus / 93); thisThread->mainHistory[~us][((ss - 1)->currentMove).from_to()] - << stat_bonus(depth) * bonus / 174; + << stat_bonus(depth) * bonus / 179; if (type_of(pos.piece_on(prevSq)) != PAWN && ((ss - 1)->currentMove).type_of() != PROMOTION) thisThread->pawnHistory[pawn_structure_index(pos)][pos.piece_on(prevSq)][prevSq] - << stat_bonus(depth) * bonus / 25; + << stat_bonus(depth) * bonus / 24; } // Bonus when search fails low and there is a TT move else if (ttData.move && !allNode) - thisThread->mainHistory[us][ttData.move.from_to()] << stat_bonus(depth) / 4; + thisThread->mainHistory[us][ttData.move.from_to()] << stat_bonus(depth) * 23 / 100; if (PvNode) bestValue = std::min(bestValue, maxValue); @@ -1428,13 +1428,13 @@ moves_loop: // When in check, search starts here auto bonus = std::clamp(int(bestValue - ss->staticEval) * depth / 8, -CORRECTION_HISTORY_LIMIT / 4, CORRECTION_HISTORY_LIMIT / 4); thisThread->pawnCorrectionHistory[us][pawn_structure_index(pos)] - << bonus * 101 / 128; - thisThread->majorPieceCorrectionHistory[us][major_piece_index(pos)] << bonus * 157 / 128; - thisThread->minorPieceCorrectionHistory[us][minor_piece_index(pos)] << bonus * 153 / 128; + << bonus * 107 / 128; + thisThread->majorPieceCorrectionHistory[us][major_piece_index(pos)] << bonus * 162 / 128; + thisThread->minorPieceCorrectionHistory[us][minor_piece_index(pos)] << bonus * 148 / 128; thisThread->nonPawnCorrectionHistory[WHITE][us][non_pawn_index(pos)] - << bonus * 123 / 128; + << bonus * 122 / 128; thisThread->nonPawnCorrectionHistory[BLACK][us][non_pawn_index(pos)] - << bonus * 165 / 128; + << bonus * 185 / 128; if (m.is_ok()) (*(ss - 2)->continuationCorrectionHistory)[pos.piece_on(m.to_sq())][m.to_sq()] << bonus; @@ -1566,7 +1566,7 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta) if (bestValue > alpha) alpha = bestValue; - futilityBase = ss->staticEval + 280; + futilityBase = ss->staticEval + 306; } const PieceToHistory* contHist[] = {(ss - 1)->continuationHistory, @@ -1629,11 +1629,11 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta) + (*contHist[1])[pos.moved_piece(move)][move.to_sq()] + thisThread->pawnHistory[pawn_structure_index(pos)][pos.moved_piece(move)] [move.to_sq()] - <= 5036) + <= 5095) continue; // Do not search moves with bad enough SEE values (~5 Elo) - if (!pos.see_ge(move, -82)) + if (!pos.see_ge(move, -83)) continue; } @@ -1701,7 +1701,7 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta) Depth Search::Worker::reduction(bool i, Depth d, int mn, int delta) const { int reductionScale = reductions[d] * reductions[mn]; - return (reductionScale + 1239 - delta * 795 / rootDelta) + (!i && reductionScale > 1341) * 1135; + return (reductionScale + 1304 - delta * 814 / rootDelta) + (!i && reductionScale > 1423) * 1135; } // elapsed() returns the time elapsed since the search started. If the @@ -1832,7 +1832,7 @@ void update_all_stats(const Position& pos, // at ply -1, -2, -3, -4, and -6 with current move. void update_continuation_histories(Stack* ss, Piece pc, Square to, int bonus) { - bonus = bonus * 53 / 64; + bonus = bonus * 50 / 64; for (int i : {1, 2, 3, 4, 6}) { From 43e100ae06376d63461005422f26e5517db07c6d Mon Sep 17 00:00:00 2001 From: Nonlinear2 <131959792+Nonlinear2@users.noreply.github.com> Date: Wed, 6 Nov 2024 21:35:59 +0100 Subject: [PATCH 292/315] Use cutnode as TT Cutoff Condition At low enough depths, fail high with TT only when expected cutnode. Passed STC: https://tests.stockfishchess.org/tests/view/6726357b86d5ee47d953da8c LLR: 2.93 (-2.94,2.94) <0.00,2.00> Total: 41184 W: 10873 L: 10551 D: 19760 Ptnml(0-2): 131, 4728, 10554, 5046, 133 Passed LTC: https://tests.stockfishchess.org/tests/view/6727326a86d5ee47d953db30 LLR: 2.94 (-2.94,2.94) <0.50,2.50> Total: 270888 W: 69040 L: 68243 D: 133605 Ptnml(0-2): 180, 29385, 75485, 30246, 148 closes https://github.com/official-stockfish/Stockfish/pull/5670 Bench: 805776 --- src/search.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index 2a2331cb..5fdfdeb2 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -636,7 +636,8 @@ Value Search::Worker::search( // At non-PV nodes we check for an early TT cutoff if (!PvNode && !excludedMove && ttData.depth > depth - (ttData.value <= beta) && ttData.value != VALUE_NONE // Can happen when !ttHit or when access race in probe() - && (ttData.bound & (ttData.value >= beta ? BOUND_LOWER : BOUND_UPPER))) + && (ttData.bound & (ttData.value >= beta ? BOUND_LOWER : BOUND_UPPER)) + && (cutNode == (ttData.value >= beta) || depth > 8)) { // If ttMove is quiet, update move sorting heuristics on TT hit (~2 Elo) if (ttData.move && ttData.value >= beta) From 070db8b3a1ecfb4753753a3e285578b35acd63cd Mon Sep 17 00:00:00 2001 From: Linmiao Xu Date: Sun, 3 Nov 2024 22:48:42 -0500 Subject: [PATCH 293/315] Update default main net to nn-1c0000000000.nnue Found by updating 489 L2 weights with values found from around 31k / 60k spsa games. Spsa was configured to use 60k games, down from 120k games in: https://github.com/official-stockfish/Stockfish/pull/5459 623 spsa params: L2 weights from `nn-1cedc0ffeeee.nnue` where 24 <= |value| <= 30 A: 3000, alpha: 0.602, gamma: 0.101 weights: [-127, 127], c_end = 6 Passed STC: https://tests.stockfishchess.org/tests/view/6728d61e86d5ee47d953dcaf LLR: 2.93 (-2.94,2.94) <0.00,2.00> Total: 187168 W: 48642 L: 48107 D: 90419 Ptnml(0-2): 558, 21888, 48213, 22311, 614 Passed LTC: https://tests.stockfishchess.org/tests/view/672b018f86d5ee47d953de98 LLR: 2.94 (-2.94,2.94) <0.50,2.50> Total: 235074 W: 59924 L: 59202 D: 115948 Ptnml(0-2): 131, 25467, 65610, 26207, 122 closes https://github.com/official-stockfish/Stockfish/pull/5673 Bench: 898850 --- src/evaluate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/evaluate.h b/src/evaluate.h index 9bd436b5..4604321d 100644 --- a/src/evaluate.h +++ b/src/evaluate.h @@ -33,7 +33,7 @@ namespace Eval { // for the build process (profile-build and fishtest) to work. Do not change the // name of the macro or the location where this macro is defined, as it is used // in the Makefile/Fishtest. -#define EvalFileDefaultNameBig "nn-1cedc0ffeeee.nnue" +#define EvalFileDefaultNameBig "nn-1c0000000000.nnue" #define EvalFileDefaultNameSmall "nn-37f18f62d772.nnue" namespace NNUE { From ce2d9e27ea8b10abbd69ebd5dd73e7dcf0aa0655 Mon Sep 17 00:00:00 2001 From: FauziAkram Date: Sun, 10 Nov 2024 18:52:29 +0300 Subject: [PATCH 294/315] Simplify big-net reevaluation Passed STC: LLR: 2.95 (-2.94,2.94) <-1.75,0.25> Total: 37408 W: 9699 L: 9477 D: 18232 Ptnml(0-2): 130, 4326, 9577, 4534, 137 https://tests.stockfishchess.org/tests/view/672ffd8086d5ee47d953e633 Passed LTC: LLR: 2.95 (-2.94,2.94) <-1.75,0.25> Total: 151062 W: 38087 L: 37999 D: 74976 Ptnml(0-2): 63, 16686, 41958, 16748, 76 https://tests.stockfishchess.org/tests/view/673087aa86d5ee47d953e66b closes https://github.com/official-stockfish/Stockfish/pull/5674 Bench: 848812 --- src/evaluate.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index 7c7b54a4..bc86a742 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -66,7 +66,7 @@ Value Eval::evaluate(const Eval::NNUE::Networks& networks, Value nnue = (125 * psqt + 131 * positional) / 128; // Re-evaluate the position when higher eval accuracy is worth the time spent - if (smallNet && (nnue * psqt < 0 || std::abs(nnue) < 227)) + if (smallNet && (std::abs(nnue) < 236)) { std::tie(psqt, positional) = networks.big.evaluate(pos, &caches.big); nnue = (125 * psqt + 131 * positional) / 128; From 49138b8c33ca7bacff710efba4a90630a3490c08 Mon Sep 17 00:00:00 2001 From: Disservin Date: Wed, 13 Nov 2024 14:56:19 +0100 Subject: [PATCH 295/315] Fix CI Docker Buildx closes https://github.com/official-stockfish/Stockfish/pull/5678 No functional change --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index a826e6f0..b97aaa29 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -139,7 +139,7 @@ jobs: - name: Build Docker container if: matrix.config.base_image run: | - docker buildx build --load -t sf_builder - << EOF + docker buildx build --platform ${{ matrix.config.platform }} --load -t sf_builder - << EOF FROM ${{ matrix.config.base_image }} WORKDIR /app RUN apk update && apk add make g++ From 82b092ca48c2efeadf2108a8351bb1309c4b7780 Mon Sep 17 00:00:00 2001 From: Michael Chaly Date: Wed, 13 Nov 2024 20:43:04 +0300 Subject: [PATCH 296/315] Adjust statscore for captures Instead of using quiet histories use capture history with a different offset. Passed STC: https://tests.stockfishchess.org/tests/view/6731d5cc86d5ee47d953e719 LLR: 2.96 (-2.94,2.94) <0.00,2.00> Total: 428896 W: 111160 L: 110269 D: 207467 Ptnml(0-2): 1220, 50296, 110534, 51169, 1229 Passed LTC: https://tests.stockfishchess.org/tests/view/6733d9fd86d5ee47d953e962 LLR: 2.95 (-2.94,2.94) <0.50,2.50> Total: 105882 W: 26918 L: 26458 D: 52506 Ptnml(0-2): 66, 11430, 29482, 11904, 59 closes https://github.com/official-stockfish/Stockfish/pull/5679 Bench: 840721 --- src/search.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 5fdfdeb2..94b20c85 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1181,9 +1181,14 @@ moves_loop: // When in check, search starts here else if (move == ttData.move) r -= 1879; - ss->statScore = 2 * thisThread->mainHistory[us][move.from_to()] - + (*contHist[0])[movedPiece][move.to_sq()] - + (*contHist[1])[movedPiece][move.to_sq()] - 3996; + if (capture) + ss->statScore = + thisThread->captureHistory[movedPiece][move.to_sq()][type_of(pos.captured_piece())] + - 13000; + else + ss->statScore = 2 * thisThread->mainHistory[us][move.from_to()] + + (*contHist[0])[movedPiece][move.to_sq()] + + (*contHist[1])[movedPiece][move.to_sq()] - 3996; // Decrease/increase reduction for moves with a good/bad history (~8 Elo) r -= ss->statScore * 1287 / 16384; From f129bf0de94f2c5a7ee19e697612a7e83ccd28ff Mon Sep 17 00:00:00 2001 From: Michael Chaly Date: Sun, 17 Nov 2024 09:19:06 +0300 Subject: [PATCH 297/315] Tweak statscore for captures Followup of a recent patch that separated statscore for captures and non-captures. Lower value that we subtract from statscore if a move is a capture. Passed STC: https://tests.stockfishchess.org/tests/view/67385b6786d5ee47d953eeba LLR: 2.93 (-2.94,2.94) <0.00,2.00> Total: 50592 W: 13223 L: 12888 D: 24481 Ptnml(0-2): 154, 5853, 12931, 6220, 138 Passed LTC: https://tests.stockfishchess.org/tests/view/6739056e86d5ee47d953ef3f LLR: 2.95 (-2.94,2.94) <0.50,2.50> Total: 23598 W: 6155 L: 5862 D: 11581 Ptnml(0-2): 16, 2466, 6543, 2757, 17 closes https://github.com/official-stockfish/Stockfish/pull/5682 Bench: 771180 --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index 94b20c85..50b31d2a 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1184,7 +1184,7 @@ moves_loop: // When in check, search starts here if (capture) ss->statScore = thisThread->captureHistory[movedPiece][move.to_sq()][type_of(pos.captured_piece())] - - 13000; + - 11000; else ss->statScore = 2 * thisThread->mainHistory[us][move.from_to()] + (*contHist[0])[movedPiece][move.to_sq()] From 0282edc0b06017b5f03971510cdb23e105fe9851 Mon Sep 17 00:00:00 2001 From: FauziAkram Date: Wed, 20 Nov 2024 01:09:39 +0300 Subject: [PATCH 298/315] Simplify bonus formula Give full bonus instead of half. Passed STC: LLR: 2.95 (-2.94,2.94) <-1.75,0.25> Total: 23872 W: 6254 L: 6018 D: 11600 Ptnml(0-2): 80, 2691, 6152, 2939, 74 https://tests.stockfishchess.org/tests/view/673b709686d5ee47d953f19d Passed LTC: LLR: 2.95 (-2.94,2.94) <-1.75,0.25> Total: 42894 W: 10924 L: 10725 D: 21245 Ptnml(0-2): 30, 4592, 12011, 4777, 37 https://tests.stockfishchess.org/tests/view/673bb50386d5ee47d953f1eb closes https://github.com/official-stockfish/Stockfish/pull/5683 Bench: 836558 --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index 50b31d2a..f1942a4f 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -765,7 +765,7 @@ Value Search::Worker::search( thisThread->mainHistory[~us][((ss - 1)->currentMove).from_to()] << bonus; if (type_of(pos.piece_on(prevSq)) != PAWN && ((ss - 1)->currentMove).type_of() != PROMOTION) thisThread->pawnHistory[pawn_structure_index(pos)][pos.piece_on(prevSq)][prevSq] - << bonus / 2; + << bonus; } // Set up the improving flag, which is true if current static evaluation is From d29c8bd5d456b2a6fcee2069e1440ef82cba2b1e Mon Sep 17 00:00:00 2001 From: Guenther Demetz Date: Wed, 20 Nov 2024 14:48:23 +0100 Subject: [PATCH 299/315] Rewrite of 'Adjust correction history' condition Current condition is convoluted and hard to understand because of several negations. Also added 2 comments to make the concept behind the condition better understandable. closes https://github.com/official-stockfish/Stockfish/pull/5685 No functional change --- src/search.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index f1942a4f..93036398 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1425,9 +1425,9 @@ moves_loop: // When in check, search starts here depth, bestMove, unadjustedStaticEval, tt.generation()); // Adjust correction history - if (!ss->inCheck && (!bestMove || !pos.capture(bestMove)) - && !(bestValue >= beta && bestValue <= ss->staticEval) - && !(!bestMove && bestValue >= ss->staticEval)) + if (!ss->inCheck && !(bestMove && pos.capture(bestMove)) + && ((bestValue < ss->staticEval && bestValue < beta) // negative correction & no fail high + || (bestValue > ss->staticEval && bestMove))) // positive correction & no fail low { const auto m = (ss - 1)->currentMove; From cd3c13a883b2d1e8dc32400202f8e0bae7d8123a Mon Sep 17 00:00:00 2001 From: Michael Chaly Date: Wed, 20 Nov 2024 17:03:56 +0300 Subject: [PATCH 300/315] Further tweak statscore for captures Even lower offset. Passed STC: https://tests.stockfishchess.org/tests/view/673a66d786d5ee47d953f070 LLR: 2.94 (-2.94,2.94) <0.00,2.00> Total: 63776 W: 16570 L: 16216 D: 30990 Ptnml(0-2): 178, 7371, 16478, 7641, 220 Passed LTC: https://tests.stockfishchess.org/tests/view/673b2e2a86d5ee47d953f14b LLR: 2.95 (-2.94,2.94) <0.50,2.50> Total: 156960 W: 39999 L: 39435 D: 77526 Ptnml(0-2): 96, 16965, 43803, 17511, 105 closes https://github.com/official-stockfish/Stockfish/pull/5686 Bench: 867931 --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index 93036398..213bbdab 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1184,7 +1184,7 @@ moves_loop: // When in check, search starts here if (capture) ss->statScore = thisThread->captureHistory[movedPiece][move.to_sq()][type_of(pos.captured_piece())] - - 11000; + - 5454; else ss->statScore = 2 * thisThread->mainHistory[us][move.from_to()] + (*contHist[0])[movedPiece][move.to_sq()] From 4fcd78ceb4a5cf25ee652ee7793bb0a3fa1f95df Mon Sep 17 00:00:00 2001 From: Shawn Xu Date: Mon, 18 Nov 2024 09:08:26 -0800 Subject: [PATCH 301/315] Simplify Probcut Bonus Passed STC: LLR: 2.99 (-2.94,2.94) <-1.75,0.25> Total: 172288 W: 44656 L: 44580 D: 83052 Ptnml(0-2): 507, 20650, 43782, 20670, 535 https://tests.stockfishchess.org/tests/view/673b74f986d5ee47d953f1a3 Passed LTC: LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 94596 W: 24098 L: 23953 D: 46545 Ptnml(0-2): 57, 10322, 26393, 10471, 55 https://tests.stockfishchess.org/tests/view/673d191886d5ee47d953f337 closes https://github.com/official-stockfish/Stockfish/pull/5688 Bench: 1031022 --- src/search.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 213bbdab..ace6385d 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -910,8 +910,7 @@ Value Search::Worker::search( if (value >= probCutBeta) { - thisThread->captureHistory[movedPiece][move.to_sq()][type_of(captured)] - << stat_bonus(depth - 2); + thisThread->captureHistory[movedPiece][move.to_sq()][type_of(captured)] << 1300; // Save ProbCut data into transposition table ttWriter.write(posKey, value_to_tt(value, ss->ply), ss->ttPv, BOUND_LOWER, From fb6be17ad40d321b5fff02395bc156568fce3091 Mon Sep 17 00:00:00 2001 From: Daniel Monroe Date: Sun, 17 Nov 2024 20:46:30 -0800 Subject: [PATCH 302/315] Simplify statscore at captures Simplify statscores for captures, setting them to 0 A recent tweak of Vizvezdenec finds substantial elo gain from giving captures a separate statscore, which is used mainly for reductions. The idea is that the old combination of quiet histories was inappropriate and that a value based on the capture history is more suitable. This simplification sets the statscore for captures to 0, suggesting that the elo gain came from rectifying the quiet history/capture mismatch. Passed STC (against a slightly older version of Viz's patch) https://tests.stockfishchess.org/tests/view/673ac6e286d5ee47d953f0ec LR: 2.96 (-2.94,2.94) <-1.75,0.25> Total: 57312 W: 14872 L: 14672 D: 27768 Ptnml(0-2): 152, 6761, 14649, 6923, 171 Passed LTC (against Viz's newest patch) https://tests.stockfishchess.org/tests/view/673cd00686d5ee47d953f2db LLR: 2.95 (-2.94,2.94) <-1.75,0.25> Total: 88236 W: 22510 L: 22358 D: 43368 Ptnml(0-2): 70, 9530, 24745, 9724, 49 closes https://github.com/official-stockfish/Stockfish/pull/5691 Bench: 959947 --- src/search.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index ace6385d..560b031b 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -538,7 +538,7 @@ Value Search::Worker::search( // Dive into quiescence search when the depth reaches zero if (depth <= 0) - return qsearch < PvNode ? PV : NonPV > (pos, ss, alpha, beta); + return qsearch(pos, ss, alpha, beta); // Limit the depth if extensions made it too large depth = std::min(depth, MAX_PLY - 1); @@ -1181,9 +1181,7 @@ moves_loop: // When in check, search starts here r -= 1879; if (capture) - ss->statScore = - thisThread->captureHistory[movedPiece][move.to_sq()][type_of(pos.captured_piece())] - - 5454; + ss->statScore = 0; else ss->statScore = 2 * thisThread->mainHistory[us][move.from_to()] + (*contHist[0])[movedPiece][move.to_sq()] From b7f17346e55a9494d8fed610f613e1722da3042d Mon Sep 17 00:00:00 2001 From: Shawn Xu Date: Thu, 21 Nov 2024 22:17:47 -0800 Subject: [PATCH 303/315] Fix Sanitizer Tests closes https://github.com/official-stockfish/Stockfish/pull/5692 No functional change --- tests/instrumented.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/instrumented.py b/tests/instrumented.py index a3747d4e..db5ec8e0 100644 --- a/tests/instrumented.py +++ b/tests/instrumented.py @@ -352,10 +352,10 @@ class TestInteractive(metaclass=OrderedClassMembers): def test_fen_position_depth_27(self): self.stockfish.send_command("ucinewgame") self.stockfish.send_command( - "position fen 1NR2B2/5p2/5p2/1p1kpp2/1P2rp2/2P1pB2/2P1P1K1/8 b - -" + "position fen r1b2r1k/pp1p2pp/2p5/2B1q3/8/8/P1PN2PP/R4RK1 w - - 0 18" ) - self.stockfish.send_command("go depth 27") - self.stockfish.contains("score mate -2") + self.stockfish.send_command("go") + self.stockfish.contains("score mate 1") self.stockfish.starts_with("bestmove") From 55905e562a0de4aeef9c5081a9eab80e6ed4c542 Mon Sep 17 00:00:00 2001 From: Daniel Monroe Date: Wed, 13 Nov 2024 12:56:29 -0800 Subject: [PATCH 304/315] Simplify movepick coefficients This commit sets movepick weights for all continuation histories to 1 and doubles the weight for the main history, inspired by a recent tune. Passed STC https://tests.stockfishchess.org/tests/view/6735151a86d5ee47d953eaa2 LLR: 2.92 (-2.94,2.94) <-1.75,0.25> Total: 29984 W: 7840 L: 7612 D: 14532 Ptnml(0-2): 85, 3511, 7571, 3741, 84 Passed LTC https://tests.stockfishchess.org/tests/view/673667a986d5ee47d953ec78 LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 26268 W: 6726 L: 6510 D: 13032 Ptnml(0-2): 16, 2797, 7288, 3021, 12 closes https://github.com/official-stockfish/Stockfish/pull/5680 Bench: 1130293 --- src/movepick.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/movepick.cpp b/src/movepick.cpp index 720f2e03..df722ece 100644 --- a/src/movepick.cpp +++ b/src/movepick.cpp @@ -158,11 +158,11 @@ void MovePicker::score() { Square to = m.to_sq(); // histories - m.value = (*mainHistory)[pos.side_to_move()][m.from_to()]; + m.value = 2 * (*mainHistory)[pos.side_to_move()][m.from_to()]; m.value += 2 * (*pawnHistory)[pawn_structure_index(pos)][pc][to]; - m.value += 2 * (*continuationHistory[0])[pc][to]; + m.value += (*continuationHistory[0])[pc][to]; m.value += (*continuationHistory[1])[pc][to]; - m.value += (*continuationHistory[2])[pc][to] / 3; + m.value += (*continuationHistory[2])[pc][to]; m.value += (*continuationHistory[3])[pc][to]; m.value += (*continuationHistory[5])[pc][to]; From 70bb317afe870c8bc1979ef955f120e4d81f504e Mon Sep 17 00:00:00 2001 From: Daniel Monroe Date: Fri, 22 Nov 2024 16:56:50 -0800 Subject: [PATCH 305/315] Bonus for a prior capture that causes a fail low. This tweak adds a bonus equal to twice the stat_bonus for the current depth for a prior capture that caused a fail high, similar to the prior countermove bonus we currently have. Passed STC https://tests.stockfishchess.org/tests/view/673bc14b86d5ee47d953f1f2 LLR: 2.95 (-2.94,2.94) <0.00,2.00> Total: 105824 W: 27538 L: 27118 D: 51168 Ptnml(0-2): 358, 12370, 27024, 12814, 346 Passed LTC https://tests.stockfishchess.org/tests/view/673ccbff86d5ee47d953f2d9 LLR: 2.94 (-2.94,2.94) <0.50,2.50> Total: 134502 W: 34340 L: 33820 D: 66342 Ptnml(0-2): 102, 14634, 37229, 15214, 72 closes https://github.com/official-stockfish/Stockfish/pull/5695 Bench: 1107054 --- src/search.cpp | 19 +++++++++++++++---- src/search.h | 3 ++- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 560b031b..45f0f10f 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -538,7 +538,7 @@ Value Search::Worker::search( // Dive into quiescence search when the depth reaches zero if (depth <= 0) - return qsearch(pos, ss, alpha, beta); + return qsearch < PvNode ? PV : NonPV > (pos, ss, alpha, beta); // Limit the depth if extensions made it too large depth = std::min(depth, MAX_PLY - 1); @@ -889,7 +889,8 @@ Value Search::Worker::search( // Prefetch the TT entry for the resulting position prefetch(tt.first_entry(pos.key_after(move))); - ss->currentMove = move; + ss->currentMove = move; + ss->capturedPiece = captured; ss->continuationHistory = &this->continuationHistory[ss->inCheck][true][pos.moved_piece(move)][move.to_sq()]; ss->continuationCorrectionHistory = @@ -1138,7 +1139,8 @@ moves_loop: // When in check, search starts here prefetch(tt.first_entry(pos.key_after(move))); // Update the current move (this must be done after singular extension search) - ss->currentMove = move; + ss->currentMove = move; + ss->capturedPiece = pos.piece_on(move.to_sq()); ss->continuationHistory = &thisThread->continuationHistory[ss->inCheck][capture][movedPiece][move.to_sq()]; ss->continuationCorrectionHistory = @@ -1400,6 +1402,14 @@ moves_loop: // When in check, search starts here << stat_bonus(depth) * bonus / 24; } + else if (priorCapture && prevSq != SQ_NONE) + { + // bonus for prior countermoves that caused the fail low + Piece capturedPiece = (ss - 1)->capturedPiece; + thisThread->captureHistory[pos.piece_on(prevSq)][prevSq][type_of(capturedPiece)] + << stat_bonus(depth) * 2; + } + // Bonus when search fails low and there is a TT move else if (ttData.move && !allNode) thisThread->mainHistory[us][ttData.move.from_to()] << stat_bonus(depth) * 23 / 100; @@ -1644,7 +1654,8 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta) prefetch(tt.first_entry(pos.key_after(move))); // Update the current move - ss->currentMove = move; + ss->currentMove = move; + ss->capturedPiece = pos.piece_on(move.to_sq()); ss->continuationHistory = &thisThread ->continuationHistory[ss->inCheck][capture][pos.moved_piece(move)][move.to_sq()]; diff --git a/src/search.h b/src/search.h index b618855b..7868f607 100644 --- a/src/search.h +++ b/src/search.h @@ -66,6 +66,7 @@ struct Stack { CorrectionHistory* continuationCorrectionHistory; int ply; Move currentMove; + Piece capturedPiece; Move excludedMove; Value staticEval; int statScore; @@ -356,4 +357,4 @@ class Worker { } // namespace Stockfish -#endif // #ifndef SEARCH_H_INCLUDED +#endif // #ifndef SEARCH_H_INCLUDED \ No newline at end of file From 57e06be71f0177a69843750a9f456462d02f23b9 Mon Sep 17 00:00:00 2001 From: Nonlinear2 <131959792+Nonlinear2@users.noreply.github.com> Date: Fri, 22 Nov 2024 11:24:43 +0100 Subject: [PATCH 306/315] Add functions to check for decisive scores Thanks to peregrineshahin and robbyrobbyrob for their suggestions. closes https://github.com/official-stockfish/Stockfish/pull/5696 No functional change --- src/nnue/nnue_misc.cpp | 2 +- src/score.cpp | 2 +- src/search.cpp | 80 +++++++++++++++++++----------------------- src/thread.cpp | 10 +++--- src/types.h | 15 ++++++++ 5 files changed, 58 insertions(+), 51 deletions(-) diff --git a/src/nnue/nnue_misc.cpp b/src/nnue/nnue_misc.cpp index 122610a7..a2bece21 100644 --- a/src/nnue/nnue_misc.cpp +++ b/src/nnue/nnue_misc.cpp @@ -126,7 +126,7 @@ trace(Position& pos, const Eval::NNUE::Networks& networks, Eval::NNUE::Accumulat board[y][x] = board[y][x + 8] = board[y + 3][x + 8] = board[y + 3][x] = '+'; if (pc != NO_PIECE) board[y + 1][x + 4] = PieceToChar[pc]; - if (value != VALUE_NONE) + if (is_valid(value)) format_cp_compact(value, &board[y + 2][x + 2], pos); }; diff --git a/src/score.cpp b/src/score.cpp index 292f5340..179796d2 100644 --- a/src/score.cpp +++ b/src/score.cpp @@ -29,7 +29,7 @@ namespace Stockfish { Score::Score(Value v, const Position& pos) { assert(-VALUE_INFINITE < v && v < VALUE_INFINITE); - if (std::abs(v) < VALUE_TB_WIN_IN_MAX_PLY) + if (!is_decisive(v)) { score = InternalUnits{UCIEngine::to_cp(v, pos)}; } diff --git a/src/search.cpp b/src/search.cpp index 45f0f10f..2e904f40 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -388,7 +388,7 @@ void Search::Worker::iterative_deepening() { // if we would have had time to fully search other root-moves. Thus // we suppress this output and below pick a proven score/PV for this // thread (from the previous iteration). - && !(threads.abortedSearch && rootMoves[0].uciScore <= VALUE_TB_LOSS_IN_MAX_PLY)) + && !(threads.abortedSearch && is_loss(rootMoves[0].uciScore))) main_manager()->pv(*this, threads, tt, rootDepth); if (threads.stop) @@ -401,7 +401,7 @@ void Search::Worker::iterative_deepening() { // We make sure not to pick an unproven mated-in score, // in case this thread prematurely stopped search (aborted-search). if (threads.abortedSearch && rootMoves[0].score != -VALUE_INFINITE - && rootMoves[0].score <= VALUE_TB_LOSS_IN_MAX_PLY) + && is_loss(rootMoves[0].score)) { // Bring the last best move to the front for best thread selection. Utility::move_to_front(rootMoves, [&lastBestPV = std::as_const(lastBestPV)]( @@ -635,7 +635,7 @@ Value Search::Worker::search( // At non-PV nodes we check for an early TT cutoff if (!PvNode && !excludedMove && ttData.depth > depth - (ttData.value <= beta) - && ttData.value != VALUE_NONE // Can happen when !ttHit or when access race in probe() + && is_valid(ttData.value) // Can happen when !ttHit or when access race in probe() && (ttData.bound & (ttData.value >= beta ? BOUND_LOWER : BOUND_UPPER)) && (cutNode == (ttData.value >= beta) || depth > 8)) { @@ -732,7 +732,7 @@ Value Search::Worker::search( { // Never assume anything about values stored in TT unadjustedStaticEval = ttData.eval; - if (unadjustedStaticEval == VALUE_NONE) + if (!is_valid(unadjustedStaticEval)) unadjustedStaticEval = evaluate(networks[numaAccessToken], pos, refreshTable, thisThread->optimism[us]); else if (PvNode) @@ -742,7 +742,7 @@ Value Search::Worker::search( to_corrected_static_eval(unadjustedStaticEval, *thisThread, pos, ss); // ttValue can be used as a better position evaluation (~7 Elo) - if (ttData.value != VALUE_NONE + if (is_valid(ttData.value) && (ttData.bound & (ttData.value > eval ? BOUND_LOWER : BOUND_UPPER))) eval = ttData.value; } @@ -782,7 +782,7 @@ Value Search::Worker::search( if (eval < alpha - 469 - 307 * depth * depth) { value = qsearch(pos, ss, alpha - 1, alpha); - if (value < alpha && std::abs(value) < VALUE_TB_WIN_IN_MAX_PLY) + if (value < alpha && !is_decisive(value)) return value; } @@ -792,8 +792,7 @@ Value Search::Worker::search( && eval - futility_margin(depth, cutNode && !ss->ttHit, improving, opponentWorsening) - (ss - 1)->statScore / 290 >= beta - && eval >= beta && (!ttData.move || ttCapture) && beta > VALUE_TB_LOSS_IN_MAX_PLY - && eval < VALUE_TB_WIN_IN_MAX_PLY) + && eval >= beta && (!ttData.move || ttCapture) && !is_loss(beta) && !is_win(eval)) return beta + (eval - beta) / 3; improving |= ss->staticEval >= beta + 100; @@ -801,7 +800,7 @@ Value Search::Worker::search( // Step 9. Null move search with verification search (~35 Elo) if (cutNode && (ss - 1)->currentMove != Move::null() && eval >= beta && ss->staticEval >= beta - 21 * depth + 421 && !excludedMove && pos.non_pawn_material(us) - && ss->ply >= thisThread->nmpMinPly && beta > VALUE_TB_LOSS_IN_MAX_PLY) + && ss->ply >= thisThread->nmpMinPly && !is_loss(beta)) { assert(eval - beta >= 0); @@ -819,7 +818,7 @@ Value Search::Worker::search( pos.undo_null_move(); // Do not return unproven mate or TB scores - if (nullValue >= beta && nullValue < VALUE_TB_WIN_IN_MAX_PLY) + if (nullValue >= beta && !is_win(nullValue)) { if (thisThread->nmpMinPly || depth < 16) return nullValue; @@ -858,12 +857,12 @@ Value Search::Worker::search( // returns a value much above beta, we can (almost) safely prune the previous move. probCutBeta = beta + 187 - 53 * improving - 27 * opponentWorsening; if (!PvNode && depth > 3 - && std::abs(beta) < VALUE_TB_WIN_IN_MAX_PLY + && !is_decisive(beta) // If value from transposition table is lower than probCutBeta, don't attempt // probCut there and in further interactions with transposition table cutoff // depth is set to depth - 3 because probCut search has depth set to depth - 4 // but we also do a move before it. So effective depth is equal to depth - 3. - && !(ttData.depth >= depth - 3 && ttData.value != VALUE_NONE && ttData.value < probCutBeta)) + && !(ttData.depth >= depth - 3 && is_valid(ttData.value) && ttData.value < probCutBeta)) { assert(probCutBeta < VALUE_INFINITE && probCutBeta > beta); @@ -916,8 +915,7 @@ Value Search::Worker::search( // Save ProbCut data into transposition table ttWriter.write(posKey, value_to_tt(value, ss->ply), ss->ttPv, BOUND_LOWER, depth - 3, move, unadjustedStaticEval, tt.generation()); - return std::abs(value) < VALUE_TB_WIN_IN_MAX_PLY ? value - (probCutBeta - beta) - : value; + return is_decisive(value) ? value : value - (probCutBeta - beta); } } @@ -929,8 +927,7 @@ moves_loop: // When in check, search starts here // Step 12. A small Probcut idea (~4 Elo) probCutBeta = beta + 417; if ((ttData.bound & BOUND_LOWER) && ttData.depth >= depth - 4 && ttData.value >= probCutBeta - && std::abs(beta) < VALUE_TB_WIN_IN_MAX_PLY - && std::abs(ttData.value) < VALUE_TB_WIN_IN_MAX_PLY) + && !is_decisive(beta) && is_valid(ttData.value) && !is_decisive(ttData.value)) return probCutBeta; const PieceToHistory* contHist[] = {(ss - 1)->continuationHistory, @@ -993,7 +990,7 @@ moves_loop: // When in check, search starts here // Step 14. Pruning at shallow depth (~120 Elo). // Depth conditions are important for mate finding. - if (!rootNode && pos.non_pawn_material(us) && bestValue > VALUE_TB_LOSS_IN_MAX_PLY) + if (!rootNode && pos.non_pawn_material(us) && !is_loss(bestValue)) { // Skip quiet moves if movecount exceeds our FutilityMoveCount threshold (~8 Elo) if (moveCount >= futility_move_count(improving, depth)) @@ -1043,8 +1040,8 @@ moves_loop: // When in check, search starts here // Futility pruning: parent node (~13 Elo) if (!ss->inCheck && lmrDepth < 12 && futilityValue <= alpha) { - if (bestValue <= futilityValue && std::abs(bestValue) < VALUE_TB_WIN_IN_MAX_PLY - && futilityValue < VALUE_TB_WIN_IN_MAX_PLY) + if (bestValue <= futilityValue && !is_decisive(bestValue) + && !is_win(futilityValue)) bestValue = futilityValue; continue; } @@ -1076,8 +1073,8 @@ moves_loop: // When in check, search starts here if (!rootNode && move == ttData.move && !excludedMove && depth >= 4 - (thisThread->completedDepth > 33) + ss->ttPv - && std::abs(ttData.value) < VALUE_TB_WIN_IN_MAX_PLY && (ttData.bound & BOUND_LOWER) - && ttData.depth >= depth - 3) + && is_valid(ttData.value) && !is_decisive(ttData.value) + && (ttData.bound & BOUND_LOWER) && ttData.depth >= depth - 3) { Value singularBeta = ttData.value - (56 + 79 * (ss->ttPv && !PvNode)) * depth / 64; Depth singularDepth = newDepth / 2; @@ -1104,7 +1101,7 @@ moves_loop: // When in check, search starts here // over the original beta, we assume this expected cut-node is not // singular (multiple moves fail high), and we can prune the whole // subtree by returning a softbound. - else if (value >= beta && std::abs(value) < VALUE_TB_WIN_IN_MAX_PLY) + else if (value >= beta && !is_decisive(value)) return value; // Negative extensions @@ -1315,9 +1312,8 @@ moves_loop: // When in check, search starts here // In case we have an alternative move equal in eval to the current bestmove, // promote it to bestmove by pretending it just exceeds alpha (but not beta). - int inc = - (value == bestValue && (int(nodes) & 15) == 0 && ss->ply + 2 >= thisThread->rootDepth - && std::abs(value) + 1 < VALUE_TB_WIN_IN_MAX_PLY); + int inc = (value == bestValue && ss->ply + 2 >= thisThread->rootDepth + && (int(nodes) & 15) == 0 && !is_win(std::abs(value) + 1)); if (value + inc > bestValue) { @@ -1339,7 +1335,7 @@ moves_loop: // When in check, search starts here else { // Reduce other moves if we have found at least one score improvement (~2 Elo) - if (depth > 2 && depth < 14 && std::abs(value) < VALUE_TB_WIN_IN_MAX_PLY) + if (depth > 2 && depth < 14 && !is_decisive(value)) depth -= 2; assert(depth > 0); @@ -1367,8 +1363,8 @@ moves_loop: // When in check, search starts here assert(moveCount || !ss->inCheck || excludedMove || !MoveList(pos).size()); // Adjust best value for fail high cases at non-pv nodes - if (!PvNode && bestValue >= beta && std::abs(bestValue) < VALUE_TB_WIN_IN_MAX_PLY - && std::abs(beta) < VALUE_TB_WIN_IN_MAX_PLY && std::abs(alpha) < VALUE_TB_WIN_IN_MAX_PLY) + if (!PvNode && bestValue >= beta && !is_decisive(bestValue) && !is_decisive(beta) + && !is_decisive(alpha)) bestValue = (bestValue * depth + beta) / (depth + 1); if (!moveCount) @@ -1528,7 +1524,7 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta) // At non-PV nodes we check for an early TT cutoff if (!PvNode && ttData.depth >= DEPTH_QS - && ttData.value != VALUE_NONE // Can happen when !ttHit or when access race in probe() + && is_valid(ttData.value) // Can happen when !ttHit or when access race in probe() && (ttData.bound & (ttData.value >= beta ? BOUND_LOWER : BOUND_UPPER))) return ttData.value; @@ -1542,14 +1538,14 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta) { // Never assume anything about values stored in TT unadjustedStaticEval = ttData.eval; - if (unadjustedStaticEval == VALUE_NONE) + if (!is_valid(unadjustedStaticEval)) unadjustedStaticEval = evaluate(networks[numaAccessToken], pos, refreshTable, thisThread->optimism[us]); ss->staticEval = bestValue = to_corrected_static_eval(unadjustedStaticEval, *thisThread, pos, ss); // ttValue can be used as a better position evaluation (~13 Elo) - if (std::abs(ttData.value) < VALUE_TB_WIN_IN_MAX_PLY + if (is_valid(ttData.value) && !is_decisive(ttData.value) && (ttData.bound & (ttData.value > bestValue ? BOUND_LOWER : BOUND_UPPER))) bestValue = ttData.value; } @@ -1567,7 +1563,7 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta) // Stand pat. Return immediately if static value is at least beta if (bestValue >= beta) { - if (std::abs(bestValue) < VALUE_TB_WIN_IN_MAX_PLY) + if (!is_decisive(bestValue)) bestValue = (bestValue + beta) / 2; if (!ss->ttHit) ttWriter.write(posKey, value_to_tt(bestValue, ss->ply), false, BOUND_LOWER, @@ -1608,10 +1604,10 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta) moveCount++; // Step 6. Pruning - if (bestValue > VALUE_TB_LOSS_IN_MAX_PLY && pos.non_pawn_material(us)) + if (!is_loss(bestValue) && pos.non_pawn_material(us)) { // Futility pruning and moveCount pruning (~10 Elo) - if (!givesCheck && move.to_sq() != prevSq && futilityBase > VALUE_TB_LOSS_IN_MAX_PLY + if (!givesCheck && move.to_sq() != prevSq && !is_loss(futilityBase) && move.type_of() != PROMOTION) { if (moveCount > 2) @@ -1699,7 +1695,7 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta) return mated_in(ss->ply); // Plies to mate from the root } - if (std::abs(bestValue) < VALUE_TB_WIN_IN_MAX_PLY && bestValue >= beta) + if (!is_decisive(bestValue) && bestValue >= beta) bestValue = (3 * bestValue + beta) / 4; // Save gathered info in transposition table. The static evaluation @@ -1737,11 +1733,7 @@ namespace { // Adjusts a mate or TB score from "plies to mate from the root" to // "plies to mate from the current position". Standard scores are unchanged. // The function is called before storing a value in the transposition table. -Value value_to_tt(Value v, int ply) { - - assert(v != VALUE_NONE); - return v >= VALUE_TB_WIN_IN_MAX_PLY ? v + ply : v <= VALUE_TB_LOSS_IN_MAX_PLY ? v - ply : v; -} +Value value_to_tt(Value v, int ply) { return is_win(v) ? v + ply : is_loss(v) ? v - ply : v; } // Inverse of value_to_tt(): it adjusts a mate or TB score from the transposition @@ -1751,11 +1743,11 @@ Value value_to_tt(Value v, int ply) { // graph history interaction, we return the highest non-TB score instead. Value value_from_tt(Value v, int ply, int r50c) { - if (v == VALUE_NONE) + if (!is_valid(v)) return VALUE_NONE; // handle TB win or better - if (v >= VALUE_TB_WIN_IN_MAX_PLY) + if (is_win(v)) { // Downgrade a potentially false mate score if (v >= VALUE_MATE_IN_MAX_PLY && VALUE_MATE - v > 100 - r50c) @@ -1769,7 +1761,7 @@ Value value_from_tt(Value v, int ply, int r50c) { } // handle TB loss or worse - if (v <= VALUE_TB_LOSS_IN_MAX_PLY) + if (is_loss(v)) { // Downgrade a potentially false mate score. if (v <= VALUE_MATED_IN_MAX_PLY && VALUE_MATE + v > 100 - r50c) @@ -2108,7 +2100,7 @@ void SearchManager::pv(Search::Worker& worker, bool isExact = i != pvIdx || tb || !updated; // tablebase- and previous-scores are exact // Potentially correct and extend the PV, and in exceptional cases v - if (std::abs(v) >= VALUE_TB_WIN_IN_MAX_PLY && std::abs(v) < VALUE_MATE_IN_MAX_PLY + if (is_decisive(v) && std::abs(v) < VALUE_MATE_IN_MAX_PLY && ((!rootMoves[i].scoreLowerbound && !rootMoves[i].scoreUpperbound) || isExact)) syzygy_extend_pv(worker.options, worker.limits, pos, rootMoves[i], v); diff --git a/src/thread.cpp b/src/thread.cpp index b5d51594..5f73771e 100644 --- a/src/thread.cpp +++ b/src/thread.cpp @@ -329,13 +329,13 @@ Thread* ThreadPool::get_best_thread() const { const auto bestThreadMoveVote = votes[bestThreadPV[0]]; const auto newThreadMoveVote = votes[newThreadPV[0]]; - const bool bestThreadInProvenWin = bestThreadScore >= VALUE_TB_WIN_IN_MAX_PLY; - const bool newThreadInProvenWin = newThreadScore >= VALUE_TB_WIN_IN_MAX_PLY; + const bool bestThreadInProvenWin = is_win(bestThreadScore); + const bool newThreadInProvenWin = is_win(newThreadScore); const bool bestThreadInProvenLoss = - bestThreadScore != -VALUE_INFINITE && bestThreadScore <= VALUE_TB_LOSS_IN_MAX_PLY; + bestThreadScore != -VALUE_INFINITE && is_loss(bestThreadScore); const bool newThreadInProvenLoss = - newThreadScore != -VALUE_INFINITE && newThreadScore <= VALUE_TB_LOSS_IN_MAX_PLY; + newThreadScore != -VALUE_INFINITE && is_loss(newThreadScore); // We make sure not to pick a thread with truncated principal variation const bool betterVotingValue = @@ -355,7 +355,7 @@ Thread* ThreadPool::get_best_thread() const { bestThread = th.get(); } else if (newThreadInProvenWin || newThreadInProvenLoss - || (newThreadScore > VALUE_TB_LOSS_IN_MAX_PLY + || (!is_loss(newThreadScore) && (newThreadMoveVote > bestThreadMoveVote || (newThreadMoveVote == bestThreadMoveVote && betterVotingValue)))) bestThread = th.get(); diff --git a/src/types.h b/src/types.h index b12491d6..56444601 100644 --- a/src/types.h +++ b/src/types.h @@ -155,6 +155,21 @@ constexpr Value VALUE_TB = VALUE_MATE_IN_MAX_PLY - 1; constexpr Value VALUE_TB_WIN_IN_MAX_PLY = VALUE_TB - MAX_PLY; constexpr Value VALUE_TB_LOSS_IN_MAX_PLY = -VALUE_TB_WIN_IN_MAX_PLY; + +constexpr bool is_valid(Value value) { return value != VALUE_NONE; } + +constexpr bool is_win(Value value) { + assert(is_valid(value)); + return value >= VALUE_TB_WIN_IN_MAX_PLY; +} + +constexpr bool is_loss(Value value) { + assert(is_valid(value)); + return value <= VALUE_TB_LOSS_IN_MAX_PLY; +} + +constexpr bool is_decisive(Value value) { return is_win(value) || is_loss(value); } + // In the code, we make the assumption that these values // are such that non_pawn_material() can be used to uniquely // identify the material on the board. From da82942b541b2a6512189a9bad2284c6f45f3c44 Mon Sep 17 00:00:00 2001 From: Nonlinear2 <131959792+Nonlinear2@users.noreply.github.com> Date: Fri, 22 Nov 2024 11:24:43 +0100 Subject: [PATCH 307/315] Add functions to check for decisive scores Thanks to peregrineshahin and robbyrobbyrob for their suggestions. closes https://github.com/official-stockfish/Stockfish/pull/5696 No functional change --- src/search.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index 2e904f40..ea43017e 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1042,7 +1042,9 @@ moves_loop: // When in check, search starts here { if (bestValue <= futilityValue && !is_decisive(bestValue) && !is_win(futilityValue)) - bestValue = futilityValue; + if (bestValue <= futilityValue && !is_decisive(bestValue) + && !is_win(futilityValue)) + bestValue = futilityValue; continue; } From d5a36a3c92533782d6a74d16c080de0c1538f65d Mon Sep 17 00:00:00 2001 From: FauziAkram Date: Sat, 23 Nov 2024 14:37:08 +0300 Subject: [PATCH 308/315] Simplify probCutBeta formula After recent changes to the improving definition, seems like there is no need anymore to keep opponentWorsening in the probCutBeta formula. Passed STC: LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 214272 W: 55566 L: 55541 D: 103165 Ptnml(0-2): 620, 25540, 54817, 25513, 646 https://tests.stockfishchess.org/tests/view/6735243d86d5ee47d953eaea Passed LTC: LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 126708 W: 32329 L: 32216 D: 62163 Ptnml(0-2): 68, 13986, 35123, 14119, 58 https://tests.stockfishchess.org/tests/view/67393cf686d5ee47d953ef99 closes https://github.com/official-stockfish/Stockfish/pull/5697 Bench: 983067 --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index ea43017e..5209bd07 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -855,7 +855,7 @@ Value Search::Worker::search( // Step 11. ProbCut (~10 Elo) // If we have a good enough capture (or queen promotion) and a reduced search // returns a value much above beta, we can (almost) safely prune the previous move. - probCutBeta = beta + 187 - 53 * improving - 27 * opponentWorsening; + probCutBeta = beta + 187 - 56 * improving; if (!PvNode && depth > 3 && !is_decisive(beta) // If value from transposition table is lower than probCutBeta, don't attempt From 713000c517c63e6926bdbe1071e647280bc3da32 Mon Sep 17 00:00:00 2001 From: pb00067 Date: Sun, 24 Nov 2024 16:05:04 +0100 Subject: [PATCH 309/315] Same weight for black and white nonPawnCorrection history Since we don't have color dependent parameters in NNUE eval, it also has no sense IMO to have color dependent parameters in correction histories. Ideally a fixed depth search on a single thread should be determistic, so delivering the same result (move) if we just flip colors on the board. Patch replaces 2 parameters (122 and 185) with just one value 154 (= the avg of the two). Passed STC-non regression https://tests.stockfishchess.org/tests/view/6740a63286d5ee47d953f656 LLR: 2.95 (-2.94,2.94) <-1.75,0.25> Total: 122336 W: 31499 L: 31372 D: 59465 Ptnml(0-2): 336, 14535, 31301, 14658, 338 Passed LTC-non regression https://tests.stockfishchess.org/tests/view/67419bae86d5ee47d953f7b6 LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 101400 W: 25870 L: 25731 D: 49799 Ptnml(0-2): 78, 11109, 28166, 11290, 57 closes https://github.com/official-stockfish/Stockfish/pull/5698 Bench: 1215483 --- src/search.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 5209bd07..8ebbef5b 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1434,7 +1434,8 @@ moves_loop: // When in check, search starts here && ((bestValue < ss->staticEval && bestValue < beta) // negative correction & no fail high || (bestValue > ss->staticEval && bestMove))) // positive correction & no fail low { - const auto m = (ss - 1)->currentMove; + const auto m = (ss - 1)->currentMove; + static const int nonPawnWeight = 154; auto bonus = std::clamp(int(bestValue - ss->staticEval) * depth / 8, -CORRECTION_HISTORY_LIMIT / 4, CORRECTION_HISTORY_LIMIT / 4); @@ -1443,9 +1444,9 @@ moves_loop: // When in check, search starts here thisThread->majorPieceCorrectionHistory[us][major_piece_index(pos)] << bonus * 162 / 128; thisThread->minorPieceCorrectionHistory[us][minor_piece_index(pos)] << bonus * 148 / 128; thisThread->nonPawnCorrectionHistory[WHITE][us][non_pawn_index(pos)] - << bonus * 122 / 128; + << bonus * nonPawnWeight / 128; thisThread->nonPawnCorrectionHistory[BLACK][us][non_pawn_index(pos)] - << bonus * 185 / 128; + << bonus * nonPawnWeight / 128; if (m.is_ok()) (*(ss - 2)->continuationCorrectionHistory)[pos.piece_on(m.to_sq())][m.to_sq()] << bonus; From 1f9404434dcfd1013e20266a79dfed5d0271294a Mon Sep 17 00:00:00 2001 From: Carlos Esparza Date: Sun, 24 Nov 2024 16:17:42 -0800 Subject: [PATCH 310/315] Simplify picking of evasion moves Sort evasions before we start returning them in next_move() (just like every other kind of move) instead of looking for the biggest element on every call to next_move(). The bench number changes because the old method is not equivalent to a stable sort. Passed STC: LLR: 2.93 (-2.94,2.94) <-1.75,0.25> Total: 132064 W: 34318 L: 34204 D: 63542 Ptnml(0-2): 392, 15522, 34106, 15604, 408 https://tests.stockfishchess.org/tests/view/6743fee086d5ee47d953f9ca Passed LTC: LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 97542 W: 24899 L: 24757 D: 47886 Ptnml(0-2): 63, 10646, 27193, 10824, 45 https://tests.stockfishchess.org/tests/view/674509cd86d5ee47d953fb96 closes https://github.com/official-stockfish/Stockfish/pull/5700 Bench: 1094825 --- AUTHORS | 1 + src/movepick.cpp | 29 ++++++++++------------------- src/movepick.h | 7 +------ 3 files changed, 12 insertions(+), 25 deletions(-) diff --git a/AUTHORS b/AUTHORS index 31a64c17..ddc53ec0 100644 --- a/AUTHORS +++ b/AUTHORS @@ -45,6 +45,7 @@ Bruno de Melo Costa (BM123499) Bruno Pellanda (pellanda) Bryan Cross (crossbr) candirufish +Carlos Esparza Sánchez (ces42) Chess13234 Chris Cain (ceebo) Ciekce diff --git a/src/movepick.cpp b/src/movepick.cpp index df722ece..96f03171 100644 --- a/src/movepick.cpp +++ b/src/movepick.cpp @@ -18,11 +18,9 @@ #include "movepick.h" -#include #include #include #include -#include #include "bitboard.h" #include "position.h" @@ -199,19 +197,13 @@ void MovePicker::score() { // Returns the next move satisfying a predicate function. // This never returns the TT move, as it was emitted before. -template +template Move MovePicker::select(Pred filter) { - while (cur < endMoves) - { - if constexpr (T == Best) - std::swap(*cur, *std::max_element(cur, endMoves)); - + for (; cur < endMoves; ++cur) if (*cur != ttMove && filter()) return *cur++; - cur++; - } return Move::none(); } @@ -245,7 +237,7 @@ top: goto top; case GOOD_CAPTURE : - if (select([&]() { + if (select([&]() { // Move losing capture to endBadCaptures to be tried later return pos.see_ge(*cur, -cur->value / 18) ? true : (*endBadCaptures++ = *cur, false); @@ -269,7 +261,7 @@ top: [[fallthrough]]; case GOOD_QUIET : - if (!skipQuiets && select([]() { return true; })) + if (!skipQuiets && select([]() { return true; })) { if ((cur - 1)->value > -7998 || (cur - 1)->value <= quiet_threshold(depth)) return *(cur - 1); @@ -286,7 +278,7 @@ top: [[fallthrough]]; case BAD_CAPTURE : - if (select([]() { return true; })) + if (select([]() { return true; })) return *(cur - 1); // Prepare the pointers to loop over the bad quiets @@ -298,7 +290,7 @@ top: case BAD_QUIET : if (!skipQuiets) - return select([]() { return true; }); + return select([]() { return true; }); return Move::none(); @@ -307,17 +299,16 @@ top: endMoves = generate(pos, cur); score(); + partial_insertion_sort(cur, endMoves, std::numeric_limits::min()); ++stage; [[fallthrough]]; case EVASION : - return select([]() { return true; }); + case QCAPTURE : + return select([]() { return true; }); case PROBCUT : - return select([&]() { return pos.see_ge(*cur, threshold); }); - - case QCAPTURE : - return select([]() { return true; }); + return select([&]() { return pos.see_ge(*cur, threshold); }); } assert(false); diff --git a/src/movepick.h b/src/movepick.h index 0278b70e..ab4e832f 100644 --- a/src/movepick.h +++ b/src/movepick.h @@ -35,11 +35,6 @@ class Position; // a cut-off first. class MovePicker { - enum PickType { - Next, - Best - }; - public: MovePicker(const MovePicker&) = delete; MovePicker& operator=(const MovePicker&) = delete; @@ -57,7 +52,7 @@ class MovePicker { void skip_quiet_moves(); private: - template + template Move select(Pred); template void score(); From 6a8478c6adaf9fda6b885ea74e510910f5618c41 Mon Sep 17 00:00:00 2001 From: Shawn Xu Date: Fri, 22 Nov 2024 17:13:00 -0800 Subject: [PATCH 311/315] Simplify Prior Capture Countermove Bonus Passed Non-regression STC: LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 184032 W: 47626 L: 47568 D: 88838 Ptnml(0-2): 590, 21808, 47238, 21714, 666 https://tests.stockfishchess.org/tests/view/67412c7686d5ee47d953f743 Passed Non-regression LTC: LLR: 2.96 (-2.94,2.94) <-1.75,0.25> Total: 169218 W: 43395 L: 43323 D: 82500 Ptnml(0-2): 302, 18567, 46791, 18655, 294 https://tests.stockfishchess.org/tests/view/6743b7e086d5ee47d953f9a6 closes https://github.com/official-stockfish/Stockfish/pull/5701 Bench: 1130692 --- src/search.cpp | 12 +++++------- src/search.h | 3 +-- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 8ebbef5b..149222fc 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -888,8 +888,7 @@ Value Search::Worker::search( // Prefetch the TT entry for the resulting position prefetch(tt.first_entry(pos.key_after(move))); - ss->currentMove = move; - ss->capturedPiece = captured; + ss->currentMove = move; ss->continuationHistory = &this->continuationHistory[ss->inCheck][true][pos.moved_piece(move)][move.to_sq()]; ss->continuationCorrectionHistory = @@ -1138,8 +1137,7 @@ moves_loop: // When in check, search starts here prefetch(tt.first_entry(pos.key_after(move))); // Update the current move (this must be done after singular extension search) - ss->currentMove = move; - ss->capturedPiece = pos.piece_on(move.to_sq()); + ss->currentMove = move; ss->continuationHistory = &thisThread->continuationHistory[ss->inCheck][capture][movedPiece][move.to_sq()]; ss->continuationCorrectionHistory = @@ -1403,7 +1401,8 @@ moves_loop: // When in check, search starts here else if (priorCapture && prevSq != SQ_NONE) { // bonus for prior countermoves that caused the fail low - Piece capturedPiece = (ss - 1)->capturedPiece; + Piece capturedPiece = pos.captured_piece(); + assert(capturedPiece != NO_PIECE); thisThread->captureHistory[pos.piece_on(prevSq)][prevSq][type_of(capturedPiece)] << stat_bonus(depth) * 2; } @@ -1653,8 +1652,7 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta) prefetch(tt.first_entry(pos.key_after(move))); // Update the current move - ss->currentMove = move; - ss->capturedPiece = pos.piece_on(move.to_sq()); + ss->currentMove = move; ss->continuationHistory = &thisThread ->continuationHistory[ss->inCheck][capture][pos.moved_piece(move)][move.to_sq()]; diff --git a/src/search.h b/src/search.h index 7868f607..b618855b 100644 --- a/src/search.h +++ b/src/search.h @@ -66,7 +66,6 @@ struct Stack { CorrectionHistory* continuationCorrectionHistory; int ply; Move currentMove; - Piece capturedPiece; Move excludedMove; Value staticEval; int statScore; @@ -357,4 +356,4 @@ class Worker { } // namespace Stockfish -#endif // #ifndef SEARCH_H_INCLUDED \ No newline at end of file +#endif // #ifndef SEARCH_H_INCLUDED From e8d2ba194a563b8c8dc1b9ae603b6b9a45c93567 Mon Sep 17 00:00:00 2001 From: xu-shawn <50402888+xu-shawn@users.noreply.github.com> Date: Mon, 2 Dec 2024 16:17:58 -0800 Subject: [PATCH 312/315] Add Leela Data Attribution closes https://github.com/official-stockfish/Stockfish/pull/5705 No functional change --- README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/README.md b/README.md index 25da319d..621f1d13 100644 --- a/README.md +++ b/README.md @@ -120,6 +120,11 @@ where the source code can be found) to generate the exact binary you are distributing. If you make any changes to the source code, these changes must also be made available under GPL v3. +## Acknowledgements + +Stockfish uses neural networks trained on [data provided by the Leela Chess Zero +project][lc0-data-link], which is made available under the [Open Database License][odbl-link] (ODbL). + [authors-link]: https://github.com/official-stockfish/Stockfish/blob/master/AUTHORS [build-link]: https://github.com/official-stockfish/Stockfish/actions/workflows/stockfish.yml @@ -144,6 +149,8 @@ also be made available under GPL v3. [wiki-uci-link]: https://github.com/official-stockfish/Stockfish/wiki/UCI-&-Commands [wiki-usage-link]: https://github.com/official-stockfish/Stockfish/wiki/Download-and-usage [worker-link]: https://github.com/official-stockfish/fishtest/wiki/Running-the-worker +[lc0-data-link]: https://storage.lczero.org/files/training_data +[odbl-link]: https://opendatacommons.org/licenses/odbl/odbl-10.txt [build-badge]: https://img.shields.io/github/actions/workflow/status/official-stockfish/Stockfish/stockfish.yml?branch=master&style=for-the-badge&label=stockfish&logo=github [commits-badge]: https://img.shields.io/github/commits-since/official-stockfish/Stockfish/latest?style=for-the-badge From afaf3a0f2a06918e4c046e27743cbe71befb3216 Mon Sep 17 00:00:00 2001 From: Michael Chaly Date: Tue, 3 Dec 2024 09:18:27 +0300 Subject: [PATCH 313/315] Refine statscore for captures Continuation of previous attempts there. Now instead of using capture history with a static offset also add the value of the captured piece in the same way at it is used in movepicker. Passed STC: https://tests.stockfishchess.org/tests/view/674aa3d386d5ee47d95404aa LLR: 2.93 (-2.94,2.94) <0.00,2.00> Total: 116480 W: 30433 L: 29999 D: 56048 Ptnml(0-2): 361, 13720, 29662, 14118, 379 Passed LTC: https://tests.stockfishchess.org/tests/view/674c4b2d86d5ee47d954073f LLR: 2.94 (-2.94,2.94) <0.50,2.50> Total: 133542 W: 34365 L: 33847 D: 65330 Ptnml(0-2): 78, 14585, 36934, 15089, 85 closes https://github.com/official-stockfish/Stockfish/pull/5706 Bench: 934447 --- src/search.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index 149222fc..3ce30b81 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1180,7 +1180,10 @@ moves_loop: // When in check, search starts here r -= 1879; if (capture) - ss->statScore = 0; + ss->statScore = + 7 * int(PieceValue[pos.captured_piece()]) + + thisThread->captureHistory[movedPiece][move.to_sq()][type_of(pos.captured_piece())] + - 5000; else ss->statScore = 2 * thisThread->mainHistory[us][move.from_to()] + (*contHist[0])[movedPiece][move.to_sq()] From a8b6bf1b1a978775ad15ae677d8d425ccd05304b Mon Sep 17 00:00:00 2001 From: mstembera Date: Sat, 7 Dec 2024 15:28:02 -0800 Subject: [PATCH 314/315] Small Major/Minor piece key simplification/optimization. closes https://github.com/official-stockfish/Stockfish/pull/5710 No functional change --- src/position.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/position.cpp b/src/position.cpp index bab7a1fc..1b1c0269 100644 --- a/src/position.cpp +++ b/src/position.cpp @@ -360,7 +360,7 @@ void Position::set_state() const { { st->nonPawnMaterial[color_of(pc)] += PieceValue[pc]; - if (type_of(pc) == QUEEN || type_of(pc) == ROOK) + if (type_of(pc) >= ROOK) st->majorPieceKey ^= Zobrist::psq[pc][s]; else @@ -759,7 +759,7 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) { st->nonPawnMaterial[them] -= PieceValue[captured]; st->nonPawnKey[them] ^= Zobrist::psq[captured][capsq]; - if (type_of(captured) == QUEEN || type_of(captured) == ROOK) + if (type_of(captured) >= ROOK) st->majorPieceKey ^= Zobrist::psq[captured][capsq]; else @@ -844,7 +844,7 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) { st->materialKey ^= Zobrist::psq[promotion][pieceCount[promotion] - 1] ^ Zobrist::psq[pc][pieceCount[pc]]; - if (promotionType == QUEEN || promotionType == ROOK) + if (promotionType >= ROOK) st->majorPieceKey ^= Zobrist::psq[promotion][to]; else @@ -871,7 +871,7 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) { st->minorPieceKey ^= Zobrist::psq[pc][from] ^ Zobrist::psq[pc][to]; } - else if (type_of(pc) == QUEEN || type_of(pc) == ROOK) + else if (type_of(pc) >= ROOK) st->majorPieceKey ^= Zobrist::psq[pc][from] ^ Zobrist::psq[pc][to]; else From cf10644d6e2592e663e48b3d41dae07e7294166e Mon Sep 17 00:00:00 2001 From: Nonlinear2 <131959792+Nonlinear2@users.noreply.github.com> Date: Sun, 8 Dec 2024 22:24:29 +0100 Subject: [PATCH 315/315] Fix duplicate code (#5711) closes https://github.com/official-stockfish/Stockfish/pull/5711 No functional change --- src/search.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 3ce30b81..e352c96e 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1041,9 +1041,7 @@ moves_loop: // When in check, search starts here { if (bestValue <= futilityValue && !is_decisive(bestValue) && !is_win(futilityValue)) - if (bestValue <= futilityValue && !is_decisive(bestValue) - && !is_win(futilityValue)) - bestValue = futilityValue; + bestValue = futilityValue; continue; }