From 2deb08a52946379d4cebb1082e5d740d1d027122 Mon Sep 17 00:00:00 2001 From: SFisGOD Date: Tue, 18 Aug 2020 18:54:28 +0800 Subject: [PATCH 01/52] Reintroduce last captures extension STC: LLR: 2.93 (-2.94,2.94) {-0.50,1.50} Total: 34840 W: 3834 L: 3682 D: 27324 Ptnml(0-2): 153, 2767, 11455, 2865, 180 https://tests.stockfishchess.org/tests/view/5f3bb380b38d442594aabefc LTC: LLR: 2.95 (-2.94,2.94) {0.25,1.75} Total: 15832 W: 890 L: 776 D: 14166 Ptnml(0-2): 17, 669, 6429, 785, 16 https://tests.stockfishchess.org/tests/view/5f3c46a0a95672ddd56c632a closes https://github.com/official-stockfish/Stockfish/pull/3028 see also https://github.com/official-stockfish/Stockfish/pull/3020 Bench: 4348811 --- src/search.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/search.cpp b/src/search.cpp index 1d5bc5f7..7c839dfc 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1122,6 +1122,11 @@ moves_loop: // When in check, search starts from here && (pos.is_discovery_check_on_king(~us, move) || pos.see_ge(move))) extension = 1; + // Last captures extension + else if ( PieceValue[EG][pos.captured_piece()] > PawnValueEg + && pos.non_pawn_material() <= 2 * RookValueMg) + extension = 1; + // Castling extension if ( type_of(move) == CASTLING && popcount(pos.pieces(us) & ~pos.pieces(PAWN) & (to_sq(move) & KingSide ? KingSide : QueenSide)) <= 2) From a1ad8604a11459a94189f857e368d0fbb72da25d Mon Sep 17 00:00:00 2001 From: Joost VandeVondele Date: Wed, 19 Aug 2020 19:21:41 +0200 Subject: [PATCH 02/52] Send error message as an UCI info string some GUIs do not show the error message when the engine terminates in the no-net case, as it is send to cerr. Instead send it as an info string, which the GUI will more likely display. closes https://github.com/official-stockfish/Stockfish/pull/3031 No functional change. --- src/evaluate.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index 1bd89353..c84d894f 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -53,10 +53,11 @@ namespace Eval { UCI::OptionsMap defaults; UCI::init(defaults); - std::cerr << "NNUE evaluation used, but the network file " << eval_file << " was not loaded successfully. " - << "These network evaluation parameters must be available, and compatible with this version of the code. " - << "The UCI option EvalFile might need to specify the full path, including the directory/folder name, to the file. " - << "The default net can be downloaded from: https://tests.stockfishchess.org/api/nn/"+std::string(defaults["EvalFile"]) << std::endl; + sync_cout << "info string ERROR: NNUE evaluation used, but the network file " << eval_file << " was not loaded successfully." << sync_endl; + sync_cout << "info string ERROR: The UCI option EvalFile might need to specify the full path, including the directory/folder name, to the file." << sync_endl; + sync_cout << "info string ERROR: The default net can be downloaded from: https://tests.stockfishchess.org/api/nn/"+std::string(defaults["EvalFile"]) << sync_endl; + sync_cout << "info string ERROR: If the UCI option Use NNUE is set to true, network evaluation parameters compatible with the program must be available." << sync_endl; + sync_cout << "info string ERROR: The engine will be terminated now." << sync_endl; std::exit(EXIT_FAILURE); } From daac86691de55fe388b9b727794c7d27f2b90d5c Mon Sep 17 00:00:00 2001 From: Joost VandeVondele Date: Thu, 20 Aug 2020 14:24:49 +0200 Subject: [PATCH 03/52] Set Use NNUE by default to true Since the initial stages of the merge, progress has been made so that this seems the best option now: * NNUE is clearly stronger on most relevant hardware and time controls * All of our CI and testing infrastructure has been adjusted * The default net is easy to get (further ideas #3030) fixes https://github.com/official-stockfish/Stockfish/issues/2861 closes https://github.com/official-stockfish/Stockfish/pull/3033 No functional change. --- src/ucioption.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ucioption.cpp b/src/ucioption.cpp index 2b66a475..ec83c7c8 100644 --- a/src/ucioption.cpp +++ b/src/ucioption.cpp @@ -78,7 +78,7 @@ void init(OptionsMap& o) { o["SyzygyProbeDepth"] << Option(1, 1, 100); o["Syzygy50MoveRule"] << Option(true); o["SyzygyProbeLimit"] << Option(7, 0, 7); - o["Use NNUE"] << Option(false, on_use_NNUE); + o["Use NNUE"] << Option(true, on_use_NNUE); // The default must follow the format nn-[SHA256 first 12 digits].nnue // for the build process (profile-build and fishtest) to work. o["EvalFile"] << Option("nn-82215d0fd0df.nnue", on_eval_file); From 8b45b1c4907b4b2186441e02edd3b0c37f8b3269 Mon Sep 17 00:00:00 2001 From: Joost VandeVondele Date: Fri, 21 Aug 2020 07:42:19 +0200 Subject: [PATCH 04/52] Deal with very old linux kernels MADV_HUGEPAGE might not be available, for kernels before 2.6.38 (released 2011). Just skip the madvise. closes https://github.com/official-stockfish/Stockfish/pull/3039 No functional change --- src/misc.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/misc.cpp b/src/misc.cpp index 459ea100..56a3dcad 100644 --- a/src/misc.cpp +++ b/src/misc.cpp @@ -367,7 +367,9 @@ void* aligned_ttmem_alloc(size_t allocSize, void*& mem) { size_t size = ((allocSize + alignment - 1) / alignment) * alignment; // multiple of alignment if (posix_memalign(&mem, alignment, size)) mem = nullptr; +#if defined(MADV_HUGEPAGE) madvise(mem, allocSize, MADV_HUGEPAGE); +#endif return mem; } From 15abcaedc1e32d4de913a2f7dea12578912371b7 Mon Sep 17 00:00:00 2001 From: gsobala Date: Fri, 21 Aug 2020 11:28:53 +0100 Subject: [PATCH 05/52] Update Makefile for macOS Changes to deal with compilation (particularly profile-build) on macOS. (1) The default toolchain has gcc masquerading as clang, the previous Makefile was not picking up the required changes to the different profiling tools. (2) The previous Makefile test for gccisclang occurred before a potential overwrite of CXX by COMPCXX (3) llvm-profdata no longer runs as a command on macOS and instead is invoked by ``xcrun llvm-profdata`` (4) Needs to support use of true gcc using e.g. ``make build ... COMPCXX=g++-10`` (5) enable profile-build in travis for macOS closes https://github.com/official-stockfish/Stockfish/pull/3043 No functional change --- .travis.yml | 3 ++- AUTHORS | 1 + src/Makefile | 16 ++++++++++++---- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/.travis.yml b/.travis.yml index 12596f1e..a029c4fc 100644 --- a/.travis.yml +++ b/.travis.yml @@ -71,7 +71,8 @@ script: - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-32-sse2 build && ../tests/signature.sh $benchref; fi - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-32 build && ../tests/signature.sh $benchref; fi - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=general-32 build && ../tests/signature.sh $benchref; fi - - if [[ "$TRAVIS_OS_NAME" == "linux" && "$COMP" == "gcc" ]]; then make clean && make -j2 ARCH=x86-64-modern profile-build && ../tests/signature.sh $benchref; fi + # workaround: exclude a custom version of llvm+clang, which doesn't find llvm-profdata on ubuntu + - if [[ "$TRAVIS_OS_NAME" != "linux" || "$COMP" == "gcc" ]]; then make clean && make -j2 ARCH=x86-64-modern profile-build && ../tests/signature.sh $benchref; fi # compile only for some more advanced architectures (might not run in travis) - make clean && make -j2 ARCH=x86-64-avx2 build diff --git a/AUTHORS b/AUTHORS index d8f4d30e..c96f870a 100644 --- a/AUTHORS +++ b/AUTHORS @@ -59,6 +59,7 @@ Fauzi Akram Dabat (FauziAkram) Felix Wittmann gamander Gary Heckman (gheckman) +George Sobala (gsobala) gguliash Gian-Carlo Pascutto (gcp) Gontran Lemaire (gonlem) diff --git a/src/Makefile b/src/Makefile index 79c7333a..b969ba04 100644 --- a/src/Makefile +++ b/src/Makefile @@ -302,9 +302,6 @@ ifeq ($(COMP),gcc) ifneq ($(KERNEL),Darwin) LDFLAGS += -Wl,--no-as-needed endif - - gccversion = $(shell $(CXX) --version) - gccisclang = $(findstring clang,$(gccversion)) endif ifeq ($(COMP),mingw) @@ -376,6 +373,7 @@ endif ifeq ($(KERNEL),Darwin) CXXFLAGS += -arch $(arch) -mmacosx-version-min=10.14 LDFLAGS += -arch $(arch) -mmacosx-version-min=10.14 + XCRUN = xcrun endif # To cross-compile for Android, NDK version r21 or later is recommended. @@ -407,6 +405,16 @@ ifdef COMPCXX CXX=$(COMPCXX) endif +### Sometimes gcc is really clang +ifeq ($(COMP),gcc) + gccversion = $(shell $(CXX) --version) + gccisclang = $(findstring clang,$(gccversion)) + ifneq ($(gccisclang),) + profile_make = clang-profile-make + profile_use = clang-profile-use + endif +endif + ### On mingw use Windows threads, otherwise POSIX ifneq ($(comp),mingw) # On Android Bionic's C library comes with its own pthread implementation bundled in @@ -798,7 +806,7 @@ clang-profile-make: all clang-profile-use: - llvm-profdata merge -output=stockfish.profdata *.profraw + $(XCRUN) llvm-profdata merge -output=stockfish.profdata *.profraw $(MAKE) ARCH=$(ARCH) COMP=$(COMP) \ EXTRACXXFLAGS='-fprofile-instr-use=stockfish.profdata' \ EXTRALDFLAGS='-fprofile-use ' \ From e64b957274b94e89ad1a6e3ec4571c9082246a0a Mon Sep 17 00:00:00 2001 From: Unai Corzo Date: Fri, 21 Aug 2020 09:24:25 +0200 Subject: [PATCH 06/52] Simplify away internal iterative deepening Remove the iterative deepening step. Instead, employ a depth reduction if the position is not in TT and on the PV. STC https://tests.stockfishchess.org/tests/view/5f3ce6eaa95672ddd56c637e LLR: 2.97 (-2.94,2.94) {-0.50,1.50} Total: 41096 W: 4421 L: 4257 D: 32418 Ptnml(0-2): 207, 3259, 13460, 3407, 215 LTC (old) https://tests.stockfishchess.org/tests/view/5f3d7d4fa95672ddd56c640b LLR: 2.92 (-2.94,2.94) {-1.50,0.50} Total: 26032 W: 1320 L: 1309 D: 23403 Ptnml(0-2): 22, 1152, 10654, 1169, 19 LTC (new) https://tests.stockfishchess.org/tests/view/5f3e31e0a95672ddd56c6464 LLR: 2.95 (-2.94,2.94) {-0.75,0.25} Total: 34160 W: 1844 L: 1766 D: 30550 Ptnml(0-2): 33, 1533, 13876, 1599, 39 bench: 3849173 --- src/search.cpp | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 7c839dfc..ba13680c 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -939,15 +939,11 @@ namespace { } } - // Step 11. Internal iterative deepening (~1 Elo) - if (depth >= 7 && !ttMove) - { - search(pos, ss, alpha, beta, depth - 7, cutNode); - - tte = TT.probe(posKey, ttHit); - ttValue = ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE; - ttMove = ttHit ? tte->move() : MOVE_NONE; - } + // Step 11. If the position is not in TT, decrease depth by 2 + if ( PvNode + && depth >= 6 + && !ttMove) + depth -= 2; moves_loop: // When in check, search starts from here From cbcb05ca092160137c166f84e7e9da3d6bb4e2d3 Mon Sep 17 00:00:00 2001 From: MJZ1977 <37274752+MJZ1977@users.noreply.github.com> Date: Fri, 21 Aug 2020 10:57:34 +0200 Subject: [PATCH 07/52] Display classic and NNUE evaluation in trace mode show both the classical and NNUE evaluation, as well as the Final evaluation. closes https://github.com/official-stockfish/Stockfish/pull/3042 No functional change. --- src/evaluate.cpp | 65 ++++++++++++++++++++++++++---------------------- 1 file changed, 35 insertions(+), 30 deletions(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index c84d894f..c66938d6 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -972,42 +972,47 @@ std::string Eval::trace(const Position& pos) { Value v; - if (Eval::useNNUE) - { - v = NNUE::evaluate(pos); - } - else - { - std::memset(scores, 0, sizeof(scores)); + std::memset(scores, 0, sizeof(scores)); - pos.this_thread()->contempt = SCORE_ZERO; // Reset any dynamic contempt + pos.this_thread()->contempt = SCORE_ZERO; // Reset any dynamic contempt - v = Evaluation(pos).value(); + v = Evaluation(pos).value(); - ss << std::showpoint << std::noshowpos << std::fixed << std::setprecision(2) - << " Term | White | Black | Total \n" - << " | MG EG | MG EG | MG EG \n" - << " ------------+-------------+-------------+------------\n" - << " Material | " << Term(MATERIAL) - << " Imbalance | " << Term(IMBALANCE) - << " Pawns | " << Term(PAWN) - << " Knights | " << Term(KNIGHT) - << " Bishops | " << Term(BISHOP) - << " Rooks | " << Term(ROOK) - << " Queens | " << Term(QUEEN) - << " Mobility | " << Term(MOBILITY) - << " King safety | " << Term(KING) - << " Threats | " << Term(THREAT) - << " Passed | " << Term(PASSED) - << " Space | " << Term(SPACE) - << " Winnable | " << Term(WINNABLE) - << " ------------+-------------+-------------+------------\n" - << " Total | " << Term(TOTAL); - } + ss << std::showpoint << std::noshowpos << std::fixed << std::setprecision(2) + << " Term | White | Black | Total \n" + << " | MG EG | MG EG | MG EG \n" + << " ------------+-------------+-------------+------------\n" + << " Material | " << Term(MATERIAL) + << " Imbalance | " << Term(IMBALANCE) + << " Pawns | " << Term(PAWN) + << " Knights | " << Term(KNIGHT) + << " Bishops | " << Term(BISHOP) + << " Rooks | " << Term(ROOK) + << " Queens | " << Term(QUEEN) + << " Mobility | " << Term(MOBILITY) + << " King safety | " << Term(KING) + << " Threats | " << Term(THREAT) + << " Passed | " << Term(PASSED) + << " Space | " << Term(SPACE) + << " Winnable | " << Term(WINNABLE) + << " ------------+-------------+-------------+------------\n" + << " Total | " << Term(TOTAL); v = pos.side_to_move() == WHITE ? v : -v; - ss << "\nFinal evaluation: " << to_cp(v) << " (white side)\n"; + ss << "\nClassical evaluation: " << to_cp(v) << " (white side)\n"; + + if (Eval::useNNUE) + { + v = NNUE::evaluate(pos); + v = pos.side_to_move() == WHITE ? v : -v; + ss << "\nNNUE evaluation: " << to_cp(v) << " (white side)\n"; + } + + v = evaluate(pos); + v = pos.side_to_move() == WHITE ? v : -v; + ss << "\nFinal evaluation: " << to_cp(v) << " (white side)\n"; + return ss.str(); } From 34f67c57223d73ad40d583ccc033c75eb0df2453 Mon Sep 17 00:00:00 2001 From: Joost VandeVondele Date: Fri, 21 Aug 2020 22:10:55 +0200 Subject: [PATCH 08/52] Explicitly rely on pthreads if possible allows us to set the needed stacksize on thread creation. Useful for environments with too small a default stack size (e.g. Alpine Linux with musl). Passed STC, no regression: LLR: 2.96 (-2.94,2.94) {-1.25,0.25} Total: 17816 W: 1344 L: 1275 D: 15197 Ptnml(0-2): 30, 1057, 6682, 1092, 47 https://tests.stockfishchess.org/tests/view/5f402b5587a5c3c63d8f534d closes https://github.com/official-stockfish/Stockfish/pull/3047 fixes https://github.com/official-stockfish/Stockfish/issues/3041 No functional change. --- src/Makefile | 1 + src/thread_win32_osx.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Makefile b/src/Makefile index b969ba04..74ef87b9 100644 --- a/src/Makefile +++ b/src/Makefile @@ -417,6 +417,7 @@ endif ### On mingw use Windows threads, otherwise POSIX ifneq ($(comp),mingw) + CXXFLAGS += -DUSE_PTHREADS # On Android Bionic's C library comes with its own pthread implementation bundled in ifneq ($(OS),Android) # Haiku has pthreads in its libroot, so only link it in on other platforms diff --git a/src/thread_win32_osx.h b/src/thread_win32_osx.h index c4b55a48..75ef5d9a 100644 --- a/src/thread_win32_osx.h +++ b/src/thread_win32_osx.h @@ -27,7 +27,7 @@ /// The implementation calls pthread_create() with the stack size parameter /// equal to the linux 8MB default, on platforms that support it. -#if defined(__APPLE__) || defined(__MINGW32__) || defined(__MINGW64__) +#if defined(__APPLE__) || defined(__MINGW32__) || defined(__MINGW64__) || defined(USE_PTHREADS) #include From 3542033342f15625f808013b69aa8c2d274a2f91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Nicolet?= Date: Sat, 22 Aug 2020 11:37:53 +0200 Subject: [PATCH 09/52] Instructions to build on older Macintosh In recent Macs, it is possible to use the Clang compiler provided by Apple to compile Stockfish out of the box, and this is the method used by default in our Makefile (the Makefile sets the macosx-version-min=10.14 flag to select the right libc++ library for the Clang compiler with recent c++17 support). But it is quite possible to compile and run Stockfish on older Macs! Below we describe a method to install a recent GNU compiler on these Macs, to get the c++17 support. We have tested the following procedure to install gcc10 on machines running Mac OS 10.7, Mac OS 10.9 and Mac OS 10.13: 1) install XCode for your machine. 2) install Apple command-line developer tools for XCode, by typing the following command in a Terminal: ``` sudo xcode-select --install ``` 3) go to the Stockfish "src" directory, then try a default build and run Stockfish: ``` make clean make build make net ./stockfish ``` 4) if step 3 worked, congrats! You have a compiler recent enough on your Mac to compile Stockfish. If not, continue with step 5 to install GNU gcc10 :-) 5) install the MacPorts package manager (https://www.macports.org/install.php), for instance using the fast method in the "macOS Package (.pkg) Installer" section of the page. 6) use the "port" command to install the gcc10 package of MacPorts by typing the following command: ``` sudo port install gcc10 ``` With this step, MacPorts will install the gcc10 compiler under the name "g++-mp-10" in the /opt/local/bin directory: ``` which g++-mp-10 /opt/local/bin/g++-mp-10 <--- answer ``` 7) You can now go back to the "src" directory of Stockfish, and try to build Stockfish by pointing at the right compiler: ``` make clean make build COMP=gcc COMPCXX=/opt/local/bin/g++-mp-10 make net ./stockfish ``` 8) Enjoy Stockfish on Macintosh! See this pull request for further discussion: https://github.com/official-stockfish/Stockfish/pull/3049 No functional change --- src/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Makefile b/src/Makefile index 74ef87b9..b0274504 100644 --- a/src/Makefile +++ b/src/Makefile @@ -370,7 +370,7 @@ else endif endif -ifeq ($(KERNEL),Darwin) +ifeq ($(KERNEL),Darwin) CXXFLAGS += -arch $(arch) -mmacosx-version-min=10.14 LDFLAGS += -arch $(arch) -mmacosx-version-min=10.14 XCRUN = xcrun From 5f1843c9cb55afcd3fb1da9e9dc4b0092f25d9f0 Mon Sep 17 00:00:00 2001 From: Joost VandeVondele Date: Sat, 11 Jul 2020 16:59:33 +0200 Subject: [PATCH 10/52] Small trivial cleanups closes https://github.com/official-stockfish/Stockfish/pull/2801 No functional change --- README.md | 53 +++++++++++++++++++++++++----------------------- src/Makefile | 2 +- src/bitboard.cpp | 12 ++++++++++- src/bitboard.h | 10 --------- src/evaluate.cpp | 17 ++++++++-------- src/material.cpp | 2 +- src/misc.cpp | 17 ++++++++-------- src/misc.h | 8 -------- src/movegen.cpp | 2 +- src/movepick.cpp | 14 ++++++------- src/movepick.h | 8 ++++---- src/pawns.cpp | 2 +- src/position.cpp | 4 ++-- src/position.h | 5 +++++ src/search.cpp | 15 +++++++------- src/timeman.cpp | 18 ++++++++-------- src/uci.cpp | 2 +- 17 files changed, 96 insertions(+), 95 deletions(-) diff --git a/README.md b/README.md index 7b6ddf4c..2cc88bf4 100644 --- a/README.md +++ b/README.md @@ -4,17 +4,17 @@ [![Build Status](https://ci.appveyor.com/api/projects/status/github/official-stockfish/Stockfish?branch=master&svg=true)](https://ci.appveyor.com/project/mcostalba/stockfish/branch/master) [Stockfish](https://stockfishchess.org) is a free, powerful UCI chess engine -derived from Glaurung 2.1. It features two evaluation functions, the classical -evaluation based on handcrafted terms, and the NNUE evaluation based on -efficiently updateable neural networks. The classical evaluation runs efficiently -on most 64bit CPU architectures, while the NNUE evaluation benefits strongly from the -vector intrinsics available on modern CPUs (avx2 or similar). +derived from Glaurung 2.1. Stockfish is not a complete chess program and requires a +UCI-compatible graphical user interface (GUI) (e.g. XBoard with PolyGlot, Scid, +Cute Chess, eboard, Arena, Sigma Chess, Shredder, Chess Partner or Fritz) in order +to be used comfortably. Read the documentation for your GUI of choice for information +about how to use Stockfish with it. -Stockfish is not a complete chess program and requires a -UCI-compatible GUI (e.g. XBoard with PolyGlot, Scid, Cute Chess, eboard, Arena, -Sigma Chess, Shredder, Chess Partner or Fritz) in order to be used comfortably. -Read the documentation for your GUI of choice for information about how to use -Stockfish with it. +The Stockfish engine features two evaluation functions for chess, the classical +evaluation based on handcrafted terms, and the NNUE evaluation based on efficiently +updateable neural networks. The classical evaluation runs efficiently on most 64bit +CPU architectures, while the NNUE evaluation benefits strongly from the vector +intrinsics available on modern CPUs (avx2 or similar). ## Files @@ -28,10 +28,13 @@ This distribution of Stockfish consists of the following files: * src, a subdirectory containing the full source code, including a Makefile that can be used to compile Stockfish on Unix-like systems. -To use the NNUE evaluation an additional data file with neural network parameters -needs to be downloaded. The filename for the default set can be found as the default -value of the `EvalFile` UCI option, with the format -`nn-[SHA256 first 12 digits].nnue` (e.g. nn-c157e0a5755b.nnue). This file can be downloaded from + * a file with the .nnue extension, storing the neural network for the NNUE + evaluation. + +Note: to use the NNUE evaluation, the additional data file with neural network parameters +needs to be downloaded. The filename for the default net can be found as the default +value of the `EvalFile` UCI option, with the format `nn-[SHA256 first 12 digits].nnue` +(for instance, `nn-c157e0a5755b.nnue`). This file can be downloaded from ``` https://tests.stockfishchess.org/api/nn/[filename] ``` @@ -64,14 +67,6 @@ Currently, Stockfish has the following UCI options: The name of the file of the NNUE evaluation parameters. Depending on the GUI the filename should include the full path to the folder/directory that contains the file. - * #### Contempt - A positive value for contempt favors middle game positions and avoids draws, - effective for the classical evaluation only. - - * #### Analysis Contempt - By default, contempt is set to prefer the side to move. Set this option to "White" - or "Black" to analyse with contempt for that side, or "Off" to disable contempt. - * #### UCI_AnalyseMode An option handled by your GUI. @@ -120,6 +115,14 @@ Currently, Stockfish has the following UCI options: Limit Syzygy tablebase probing to positions with at most this many pieces left (including kings and pawns). + * #### Contempt + A positive value for contempt favors middle game positions and avoids draws, + effective for the classical evaluation only. + + * #### Analysis Contempt + By default, contempt is set to prefer the side to move. Set this option to "White" + or "Black" to analyse with contempt for that side, or "Off" to disable contempt. + * #### Move Overhead Assume a time delay of x ms due to network and GUI overheads. This is useful to avoid losses on time in those cases. @@ -138,7 +141,7 @@ Currently, Stockfish has the following UCI options: * #### Debug Log File Write all communication to and from the engine into a text file. -## Classical and NNUE evaluation +## A note on classical and NNUE evaluation Both approaches assign a value to a position that is used in alpha-beta (PVS) search to find the best move. The classical evaluation computes this value as a function @@ -226,6 +229,7 @@ targets with corresponding descriptions. cd src make help make build ARCH=x86-64-modern + make net ``` When not using the Makefile to compile (for instance with Microsoft MSVC) you @@ -237,8 +241,7 @@ compiler you used to create your executable. These informations can be found by typing the following commands in a console: ``` - ./stockfish - compiler + ./stockfish compiler ``` ## Understanding the code base and participating in the project diff --git a/src/Makefile b/src/Makefile index b0274504..74ef87b9 100644 --- a/src/Makefile +++ b/src/Makefile @@ -370,7 +370,7 @@ else endif endif -ifeq ($(KERNEL),Darwin) +ifeq ($(KERNEL),Darwin) CXXFLAGS += -arch $(arch) -mmacosx-version-min=10.14 LDFLAGS += -arch $(arch) -mmacosx-version-min=10.14 XCRUN = xcrun diff --git a/src/bitboard.cpp b/src/bitboard.cpp index f531010c..80206b58 100644 --- a/src/bitboard.cpp +++ b/src/bitboard.cpp @@ -39,6 +39,16 @@ namespace { Bitboard BishopTable[0x1480]; // To store bishop attacks void init_magics(PieceType pt, Bitboard table[], Magic magics[]); + +} + + +/// safe_destination() returns the bitboard of target square for the given step +/// from the given square. If the step is off the board, returns empty bitboard. + +inline Bitboard safe_destination(Square s, int step) { + Square to = Square(s + step); + return is_ok(to) && distance(s, to) <= 2 ? square_bb(to) : Bitboard(0); } @@ -110,7 +120,7 @@ namespace { Direction RookDirections[4] = {NORTH, SOUTH, EAST, WEST}; Direction BishopDirections[4] = {NORTH_EAST, SOUTH_EAST, SOUTH_WEST, NORTH_WEST}; - for(Direction d : (pt == ROOK ? RookDirections : BishopDirections)) + for (Direction d : (pt == ROOK ? RookDirections : BishopDirections)) { Square s = sq; while(safe_destination(s, d) && !(occupied & s)) diff --git a/src/bitboard.h b/src/bitboard.h index a899d879..29d8f66d 100644 --- a/src/bitboard.h +++ b/src/bitboard.h @@ -279,16 +279,6 @@ inline int edge_distance(File f) { return std::min(f, File(FILE_H - f)); } inline int edge_distance(Rank r) { return std::min(r, Rank(RANK_8 - r)); } -/// safe_destination() returns the bitboard of target square for the given step -/// from the given square. If the step is off the board, returns empty bitboard. - -inline Bitboard safe_destination(Square s, int step) -{ - Square to = Square(s + step); - return is_ok(to) && distance(s, to) <= 2 ? square_bb(to) : Bitboard(0); -} - - /// attacks_bb(Square) returns the pseudo attacks of the give piece type /// assuming an empty board. diff --git a/src/evaluate.cpp b/src/evaluate.cpp index c66938d6..ce92db9a 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -288,8 +288,8 @@ namespace { attackedBy2[Us] = dblAttackByPawn | (attackedBy[Us][KING] & attackedBy[Us][PAWN]); // Init our king safety tables - Square s = make_square(Utility::clamp(file_of(ksq), FILE_B, FILE_G), - Utility::clamp(rank_of(ksq), RANK_2, RANK_7)); + Square s = make_square(std::clamp(file_of(ksq), FILE_B, FILE_G), + std::clamp(rank_of(ksq), RANK_2, RANK_7)); kingRing[Us] = attacks_bb(s) | s; kingAttackersCount[Them] = popcount(kingRing[Us] & pe->pawn_attacks(Them)); @@ -686,8 +686,8 @@ namespace { Square blockSq = s + Up; // Adjust bonus based on the king's proximity - bonus += make_score(0, ( (king_proximity(Them, blockSq) * 19) / 4 - - king_proximity(Us, blockSq) * 2) * w); + bonus += make_score(0, ( king_proximity(Them, blockSq) * 19 / 4 + - king_proximity(Us, blockSq) * 2) * w); // If blockSq is not the queening square then consider also a second push if (r != RANK_7) @@ -731,7 +731,7 @@ namespace { // Evaluation::space() computes a space evaluation for a given side, aiming to improve game - // play in the opening. It is based on the number of safe squares on the 4 central files + // play in the opening. It is based on the number of safe squares on the four central files // on ranks 2 to 4. Completely safe squares behind a friendly pawn are counted twice. // Finally, the space bonus is multiplied by a weight which decreases according to occupancy. @@ -804,7 +804,7 @@ namespace { // Now apply the bonus: note that we find the attacking side by extracting the // sign of the midgame or endgame values, and that we carefully cap the bonus // so that the midgame and endgame scores do not change sign after the bonus. - int u = ((mg > 0) - (mg < 0)) * Utility::clamp(complexity + 50, -abs(mg), 0); + int u = ((mg > 0) - (mg < 0)) * std::clamp(complexity + 50, -abs(mg), 0); int v = ((eg > 0) - (eg < 0)) * std::max(complexity, -abs(eg)); mg += u; @@ -951,8 +951,8 @@ Value Eval::evaluate(const Position& pos) { // Damp down the evaluation linearly when shuffling v = v * (100 - pos.rule50_count()) / 100; - // Guarantee evalution outside of TB range - v = Utility::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1); + // Guarantee evaluation does not hit the tablebase range + v = std::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1); return v; } @@ -1013,6 +1013,5 @@ std::string Eval::trace(const Position& pos) { v = pos.side_to_move() == WHITE ? v : -v; ss << "\nFinal evaluation: " << to_cp(v) << " (white side)\n"; - return ss.str(); } diff --git a/src/material.cpp b/src/material.cpp index 0ef9926f..870a5e11 100644 --- a/src/material.cpp +++ b/src/material.cpp @@ -130,7 +130,7 @@ Entry* probe(const Position& pos) { Value npm_w = pos.non_pawn_material(WHITE); Value npm_b = pos.non_pawn_material(BLACK); - Value npm = Utility::clamp(npm_w + npm_b, EndgameLimit, MidgameLimit); + Value npm = std::clamp(npm_w + npm_b, EndgameLimit, MidgameLimit); // Map total non-pawn material into [PHASE_ENDGAME, PHASE_MIDGAME] e->gamePhase = Phase(((npm - EndgameLimit) * PHASE_MIDGAME) / (MidgameLimit - EndgameLimit)); diff --git a/src/misc.cpp b/src/misc.cpp index 56a3dcad..80c436ac 100644 --- a/src/misc.cpp +++ b/src/misc.cpp @@ -328,16 +328,16 @@ void prefetch(void* addr) { #endif -/// Wrappers for systems where the c++17 implementation doesn't guarantee the availability of aligned_alloc. -/// Memory allocated with std_aligned_alloc must be freed with std_aligned_free. -/// + +/// std_aligned_alloc() is our wrapper for systems where the c++17 implementation +/// does not guarantee the availability of aligned_alloc(). Memory allocated with +/// std_aligned_alloc() must be freed with std_aligned_free(). void* std_aligned_alloc(size_t alignment, size_t size) { + #if defined(POSIXALIGNEDALLOC) - void *pointer; - if(posix_memalign(&pointer, alignment, size) == 0) - return pointer; - return nullptr; + void *mem; + return posix_memalign(&mem, alignment, size) ? nullptr : mem; #elif defined(_WIN32) return _mm_malloc(size, alignment); #else @@ -346,6 +346,7 @@ void* std_aligned_alloc(size_t alignment, size_t size) { } void std_aligned_free(void* ptr) { + #if defined(POSIXALIGNEDALLOC) free(ptr); #elif defined(_WIN32) @@ -355,7 +356,7 @@ void std_aligned_free(void* ptr) { #endif } -/// aligned_ttmem_alloc() will return suitably aligned memory, and if possible use large pages. +/// aligned_ttmem_alloc() will return suitably aligned memory, if possible using large pages. /// The returned pointer is the aligned one, while the mem argument is the one that needs /// to be passed to free. With c++17 some of this functionality could be simplified. diff --git a/src/misc.h b/src/misc.h index eb4e05c0..8ad17b50 100644 --- a/src/misc.h +++ b/src/misc.h @@ -65,14 +65,6 @@ std::ostream& operator<<(std::ostream&, SyncCout); #define sync_cout std::cout << IO_LOCK #define sync_endl std::endl << IO_UNLOCK -namespace Utility { - -/// Clamp a value between lo and hi. Available in c++17. -template constexpr const T& clamp(const T& v, const T& lo, const T& hi) { - return v < lo ? lo : v > hi ? hi : v; -} - -} /// xorshift64star Pseudo-Random Number Generator /// This class is based on original code written and dedicated diff --git a/src/movegen.cpp b/src/movegen.cpp index d74df4c3..3340f65c 100644 --- a/src/movegen.cpp +++ b/src/movegen.cpp @@ -248,7 +248,7 @@ namespace { *moveList++ = make_move(ksq, pop_lsb(&b)); if ((Type != CAPTURES) && pos.can_castle(Us & ANY_CASTLING)) - for(CastlingRights cr : { Us & KING_SIDE, Us & QUEEN_SIDE } ) + for (CastlingRights cr : { Us & KING_SIDE, Us & QUEEN_SIDE } ) if (!pos.castling_impeded(cr) && pos.can_castle(cr)) *moveList++ = make(ksq, pos.castling_rook_square(cr)); } diff --git a/src/movepick.cpp b/src/movepick.cpp index 96a44449..153d323e 100644 --- a/src/movepick.cpp +++ b/src/movepick.cpp @@ -182,7 +182,7 @@ top: --endMoves; ++stage; - /* fallthrough */ + [[fallthrough]]; case REFUTATION: if (select([&](){ return *cur != MOVE_NONE @@ -190,7 +190,7 @@ top: && pos.pseudo_legal(*cur); })) return *(cur - 1); ++stage; - /* fallthrough */ + [[fallthrough]]; case QUIET_INIT: if (!skipQuiets) @@ -203,7 +203,7 @@ top: } ++stage; - /* fallthrough */ + [[fallthrough]]; case QUIET: if ( !skipQuiets @@ -217,7 +217,7 @@ top: endMoves = endBadCaptures; ++stage; - /* fallthrough */ + [[fallthrough]]; case BAD_CAPTURE: return select([](){ return true; }); @@ -228,7 +228,7 @@ top: score(); ++stage; - /* fallthrough */ + [[fallthrough]]; case EVASION: return select([](){ return true; }); @@ -246,14 +246,14 @@ top: return MOVE_NONE; ++stage; - /* fallthrough */ + [[fallthrough]]; case QCHECK_INIT: cur = moves; endMoves = generate(pos, cur); ++stage; - /* fallthrough */ + [[fallthrough]]; case QCHECK: return select([](){ return true; }); diff --git a/src/movepick.h b/src/movepick.h index f080935a..97ea5bec 100644 --- a/src/movepick.h +++ b/src/movepick.h @@ -86,14 +86,14 @@ enum StatsType { NoCaptures, Captures }; /// the move's from and to squares, see www.chessprogramming.org/Butterfly_Boards typedef Stats ButterflyHistory; -/// At higher depths LowPlyHistory records successful quiet moves near the root and quiet -/// moves which are/were in the PV (ttPv) -/// It is cleared with each new search and filled during iterative deepening +/// At higher depths LowPlyHistory records successful quiet moves near the root +/// and quiet moves which are/were in the PV (ttPv). It is cleared with each new +/// search and filled during iterative deepening. constexpr int MAX_LPH = 4; typedef Stats LowPlyHistory; /// CounterMoveHistory stores counter moves indexed by [piece][to] of the previous -/// move, see www.chessprogramming.org/Countermove_Heuristic +/// move, see www.chessprogramming.org/Countermove_Heuristic typedef Stats CounterMoveHistory; /// CapturePieceToHistory is addressed by a move's [piece][to][captured piece type] diff --git a/src/pawns.cpp b/src/pawns.cpp index 868d0c8e..af0f6618 100644 --- a/src/pawns.cpp +++ b/src/pawns.cpp @@ -219,7 +219,7 @@ Score Entry::evaluate_shelter(const Position& pos, Square ksq) const { Score bonus = make_score(5, 5); - File center = Utility::clamp(file_of(ksq), FILE_B, FILE_G); + File center = std::clamp(file_of(ksq), FILE_B, FILE_G); for (File f = File(center - 1); f <= File(center + 1); ++f) { b = ourPawns & file_bb(f); diff --git a/src/position.cpp b/src/position.cpp index 46e5d78b..02898547 100644 --- a/src/position.cpp +++ b/src/position.cpp @@ -1145,8 +1145,8 @@ bool Position::see_ge(Move m, Value threshold) const { // Don't allow pinned pieces to attack (except the king) as long as // there are pinners on their original square. - if (st->pinners[~stm] & occupied) - stmAttackers &= ~st->blockersForKing[stm]; + if (pinners(~stm) & occupied) + stmAttackers &= ~blockers_for_king(stm); if (!stmAttackers) break; diff --git a/src/position.h b/src/position.h index a77050eb..5ce17277 100644 --- a/src/position.h +++ b/src/position.h @@ -113,6 +113,7 @@ public: Bitboard checkers() const; Bitboard blockers_for_king(Color c) const; Bitboard check_squares(PieceType pt) const; + Bitboard pinners(Color c) const; bool is_discovery_check_on_king(Color c, Move m) const; // Attacks to/from a given square @@ -309,6 +310,10 @@ inline Bitboard Position::blockers_for_king(Color c) const { return st->blockersForKing[c]; } +inline Bitboard Position::pinners(Color c) const { + return st->pinners[c]; +} + inline Bitboard Position::check_squares(PieceType pt) const { return st->checkSquares[pt]; } diff --git a/src/search.cpp b/src/search.cpp index ba13680c..82d8bb9d 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -335,7 +335,7 @@ void Thread::search() { // for match (TC 60+0.6) results spanning a wide range of k values. PRNG rng(now()); double floatLevel = Options["UCI_LimitStrength"] ? - Utility::clamp(std::pow((Options["UCI_Elo"] - 1346.6) / 143.4, 1 / 0.806), 0.0, 20.0) : + std::clamp(std::pow((Options["UCI_Elo"] - 1346.6) / 143.4, 1 / 0.806), 0.0, 20.0) : double(Options["Skill Level"]); int intLevel = int(floatLevel) + ((floatLevel - int(floatLevel)) * 1024 > rng.rand() % 1024 ? 1 : 0); @@ -508,7 +508,7 @@ void Thread::search() { { double fallingEval = (318 + 6 * (mainThread->bestPreviousScore - bestValue) + 6 * (mainThread->iterValue[iterIdx] - bestValue)) / 825.0; - fallingEval = Utility::clamp(fallingEval, 0.5, 1.5); + fallingEval = std::clamp(fallingEval, 0.5, 1.5); // If the bestMove is stable over several iterations, reduce time accordingly timeReduction = lastBestMoveDepth + 9 < completedDepth ? 1.92 : 0.95; @@ -807,8 +807,9 @@ namespace { && eval <= alpha - RazorMargin) return qsearch(pos, ss, alpha, beta); - improving = (ss-2)->staticEval == VALUE_NONE ? (ss->staticEval > (ss-4)->staticEval - || (ss-4)->staticEval == VALUE_NONE) : ss->staticEval > (ss-2)->staticEval; + improving = (ss-2)->staticEval == VALUE_NONE + ? ss->staticEval > (ss-4)->staticEval || (ss-4)->staticEval == VALUE_NONE + : ss->staticEval > (ss-2)->staticEval; // Step 8. Futility pruning: child node (~50 Elo) if ( !PvNode @@ -879,8 +880,8 @@ namespace { // there and in further interactions with transposition table cutoff depth is set to depth - 3 // because probCut search has depth set to depth - 4 but we also do a move before it // so effective depth is equal to depth - 3 - && !( ttHit - && tte->depth() >= depth - 3 + && !( ttHit + && tte->depth() >= depth - 3 && ttValue != VALUE_NONE && ttValue < probCutBeta)) { @@ -1238,7 +1239,7 @@ moves_loop: // When in check, search starts from here r++; } - Depth d = Utility::clamp(newDepth - r, 1, newDepth); + Depth d = std::clamp(newDepth - r, 1, newDepth); value = -search(pos, ss+1, -(alpha+1), -alpha, d, true); diff --git a/src/timeman.cpp b/src/timeman.cpp index df4ba9b2..6d9c95ef 100644 --- a/src/timeman.cpp +++ b/src/timeman.cpp @@ -38,9 +38,9 @@ void TimeManagement::init(Search::LimitsType& limits, Color us, int ply) { TimePoint slowMover = TimePoint(Options["Slow Mover"]); TimePoint npmsec = TimePoint(Options["nodestime"]); - // opt_scale is a percentage of available time to use for the current move. - // max_scale is a multiplier applied to optimumTime. - double opt_scale, max_scale; + // optScale is a percentage of available time to use for the current move. + // maxScale is a multiplier applied to optimumTime. + double optScale, maxScale; // If we have to play in 'nodes as time' mode, then convert from time // to nodes, and use resulting values in time management formulas. @@ -75,22 +75,22 @@ void TimeManagement::init(Search::LimitsType& limits, Color us, int ply) { // game time for the current move, so also cap to 20% of available game time. if (limits.movestogo == 0) { - opt_scale = std::min(0.008 + std::pow(ply + 3.0, 0.5) / 250.0, + optScale = std::min(0.008 + std::pow(ply + 3.0, 0.5) / 250.0, 0.2 * limits.time[us] / double(timeLeft)); - max_scale = std::min(7.0, 4.0 + ply / 12.0); + maxScale = std::min(7.0, 4.0 + ply / 12.0); } // x moves in y seconds (+ z increment) else { - opt_scale = std::min((0.8 + ply / 128.0) / mtg, + optScale = std::min((0.8 + ply / 128.0) / mtg, 0.8 * limits.time[us] / double(timeLeft)); - max_scale = std::min(6.3, 1.5 + 0.11 * mtg); + maxScale = std::min(6.3, 1.5 + 0.11 * mtg); } // Never use more than 80% of the available time for this move - optimumTime = TimePoint(opt_scale * timeLeft); - maximumTime = TimePoint(std::min(0.8 * limits.time[us] - moveOverhead, max_scale * optimumTime)); + optimumTime = TimePoint(optScale * timeLeft); + maximumTime = TimePoint(std::min(0.8 * limits.time[us] - moveOverhead, maxScale * optimumTime)); if (Options["Ponder"]) optimumTime += optimumTime / 4; diff --git a/src/uci.cpp b/src/uci.cpp index d6486320..bc0ee0a0 100644 --- a/src/uci.cpp +++ b/src/uci.cpp @@ -211,7 +211,7 @@ namespace { double b = (((bs[0] * m + bs[1]) * m + bs[2]) * m) + bs[3]; // Transform eval to centipawns with limited range - double x = Utility::clamp(double(100 * v) / PawnValueEg, -1000.0, 1000.0); + double x = std::clamp(double(100 * v) / PawnValueEg, -1000.0, 1000.0); // Return win rate in per mille (rounded to nearest) return int(0.5 + 1000 / (1 + std::exp((a - x) / b))); From cc9d503ddea998890112efd08fae3705f2727e37 Mon Sep 17 00:00:00 2001 From: syzygy1 <3028851+syzygy1@users.noreply.github.com> Date: Sat, 22 Aug 2020 13:36:34 +0200 Subject: [PATCH 11/52] Skip the alignment bug workaround for Clang Clang-10.0.0 poses as gcc-4.2: $ clang++ -E -dM - Date: Sun, 23 Aug 2020 14:22:32 +0300 Subject: [PATCH 12/52] Introduce movecount pruning for qsearch() If in quiescence search, we assume that me can prune late moves when: a) the move ordering count of the move is : moveCount > abs(depth) + 2 b) we are not in check c) the late move does not give check d) the late move is not an advanced pawn push Modification of an original idea by @VoyagerOne. STC https://tests.stockfishchess.org/tests/view/5f40581787a5c3c63d8f535f LLR: 2.96 (-2.94,2.94) {-0.25,1.25} Total: 132848 W: 14999 L: 14661 D: 103188 Ptnml(0-2): 684, 11242, 42309, 11430, 759 LTC https://tests.stockfishchess.org/tests/view/5f4226da87a5c3c63d8f5412 LLR: 2.98 (-2.94,2.94) {0.25,1.25} Total: 12008 W: 678 L: 551 D: 10779 Ptnml(0-2): 8, 485, 4899, 596, 16 closes https://github.com/official-stockfish/Stockfish/pull/3053 Bench: 3749974 --- src/movepick.h | 2 +- src/search.cpp | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/movepick.h b/src/movepick.h index 97ea5bec..4c0ad551 100644 --- a/src/movepick.h +++ b/src/movepick.h @@ -93,7 +93,7 @@ constexpr int MAX_LPH = 4; typedef Stats LowPlyHistory; /// CounterMoveHistory stores counter moves indexed by [piece][to] of the previous -/// move, see www.chessprogramming.org/Countermove_Heuristic +/// move, see www.chessprogramming.org/Countermove_Heuristic typedef Stats CounterMoveHistory; /// CapturePieceToHistory is addressed by a move's [piece][to][captured piece type] diff --git a/src/search.cpp b/src/search.cpp index 82d8bb9d..266e2db3 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1531,6 +1531,10 @@ moves_loop: // When in check, search starts from here { assert(type_of(move) != ENPASSANT); // Due to !pos.advanced_pawn_push + // moveCount pruning + if (moveCount > abs(depth) + 2) + continue; + futilityValue = futilityBase + PieceValue[EG][pos.piece_on(to_sq(move))]; if (futilityValue <= alpha) @@ -1547,7 +1551,7 @@ moves_loop: // When in check, search starts from here } // Do not search moves with negative SEE values - if ( !ss->inCheck && !pos.see_ge(move)) + if (!ss->inCheck && !pos.see_ge(move)) continue; // Speculative prefetch as early as possible From e453f09f06f41680ef96f594f593f8de33e62b8f Mon Sep 17 00:00:00 2001 From: George Sobala Date: Mon, 24 Aug 2020 06:37:42 +0100 Subject: [PATCH 13/52] armv8 AArch64 does not require -mfpu=neon -mpfu is not required on AArch64 / armv8 architecture on Linux and throws an error if present. This PR has been tested on gcc and clang on Gentoo-64 and Raspian-64 on a Raspberry Pi 4, as well as with a cross from Ubuntu (`make clean && make -j build ARCH=armv8 COMP=gcc COMPILER=aarch64-linux-gnu-g++`) fixes https://github.com/official-stockfish/Stockfish/issues/3056 closes https://github.com/official-stockfish/Stockfish/pull/3059 No functional change --- src/Makefile | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/Makefile b/src/Makefile index 74ef87b9..3e1b7c35 100644 --- a/src/Makefile +++ b/src/Makefile @@ -241,7 +241,7 @@ ifeq ($(ARCH),armv7-neon) endif ifeq ($(ARCH),armv8) - arch = armv8-a + arch = armv8 prefetch = yes popcnt = yes neon = yes @@ -285,7 +285,7 @@ ifeq ($(COMP),gcc) CXX=g++ CXXFLAGS += -pedantic -Wextra -Wshadow - ifeq ($(arch),$(filter $(arch),armv7 armv8-a)) + ifeq ($(arch),$(filter $(arch),armv7 armv8)) ifeq ($(OS),Android) CXXFLAGS += -m$(bits) LDFLAGS += -m$(bits) @@ -387,7 +387,7 @@ ifeq ($(COMP),ndk) CXXFLAGS += -mthumb -march=armv7-a -mfloat-abi=softfp -mfpu=neon STRIP=arm-linux-androideabi-strip endif - ifeq ($(arch),armv8-a) + ifeq ($(arch),armv8) comp=aarch64-linux-android21-clang CXX=aarch64-linux-android21-clang++ STRIP=aarch64-linux-android-strip @@ -476,7 +476,7 @@ endif ### 3.6 popcnt ifeq ($(popcnt),yes) - ifeq ($(arch),$(filter $(arch),ppc64 armv7 armv8-a arm64)) + ifeq ($(arch),$(filter $(arch),ppc64 armv7 armv8 arm64)) CXXFLAGS += -DUSE_POPCNT else ifeq ($(comp),icc) CXXFLAGS += -msse3 -DUSE_POPCNT @@ -539,9 +539,11 @@ ifeq ($(neon),yes) CXXFLAGS += -DUSE_NEON ifeq ($(KERNEL),Linux) ifneq ($(COMP),ndk) + ifneq ($(arch),armv8) CXXFLAGS += -mfpu=neon endif endif + endif endif ### 3.7 pext @@ -780,7 +782,7 @@ config-sanity: @test "$(optimize)" = "yes" || test "$(optimize)" = "no" @test "$(arch)" = "any" || test "$(arch)" = "x86_64" || test "$(arch)" = "i386" || \ test "$(arch)" = "ppc64" || test "$(arch)" = "ppc" || \ - test "$(arch)" = "armv7" || test "$(arch)" = "armv8-a" || test "$(arch)" = "arm64" + test "$(arch)" = "armv7" || test "$(arch)" = "armv8" || test "$(arch)" = "arm64" @test "$(bits)" = "32" || test "$(bits)" = "64" @test "$(prefetch)" = "yes" || test "$(prefetch)" = "no" @test "$(popcnt)" = "yes" || test "$(popcnt)" = "no" From 701b2427bd84d112376ce858b66befc5b66c4bb2 Mon Sep 17 00:00:00 2001 From: mstembera Date: Thu, 20 Aug 2020 16:59:27 -0700 Subject: [PATCH 14/52] Support VNNI on 256bit vectors due to downclocking on current chips (tested up to cascade lake) supporting avx512 and vnni512, it is better to use avx2 or vnni256 in multithreaded (in particular hyperthreaded) engine use. In single threaded use, the picture is different. gcc compilation for vnni256 requires a toolchain for gcc >= 9. closes https://github.com/official-stockfish/Stockfish/pull/3038 No functional change --- .travis.yml | 6 +++-- src/Makefile | 39 ++++++++++++++++++++++++------ src/nnue/layers/affine_transform.h | 8 +++++- 3 files changed, 42 insertions(+), 11 deletions(-) diff --git a/.travis.yml b/.travis.yml index a029c4fc..c1e6d6df 100644 --- a/.travis.yml +++ b/.travis.yml @@ -77,8 +77,10 @@ script: # compile only for some more advanced architectures (might not run in travis) - make clean && make -j2 ARCH=x86-64-avx2 build - make clean && make -j2 ARCH=x86-64-bmi2 build - # needs gcc 10 to compile - - if [[ "$COMPILER" != "g++-8" ]]; then make clean && make -j2 ARCH=x86-64-avx512 build; fi + - make clean && make -j2 ARCH=x86-64-avx512 build + - make clean && make -j2 ARCH=x86-64-vnni512 build + # requires gcc 9 or higher + - if [[ "$COMPILER" != "g++-8" ]]; make clean && make -j2 ARCH=x86-64-vnni256 build; fi # # Check perft and reproducible search diff --git a/src/Makefile b/src/Makefile index 3e1b7c35..228ea851 100644 --- a/src/Makefile +++ b/src/Makefile @@ -75,7 +75,8 @@ endif # sse41 = yes/no --- -msse4.1 --- Use Intel Streaming SIMD Extensions 4.1 # avx2 = yes/no --- -mavx2 --- Use Intel Advanced Vector Extensions 2 # avx512 = yes/no --- -mavx512bw --- Use Intel Advanced Vector Extensions 512 -# vnni = yes/no --- -mavx512vnni --- Use Intel Vector Neural Network Instructions 512 +# vnni256 = yes/no --- -mavx512vnni --- Use Intel Vector Neural Network Instructions 256 +# vnni512 = yes/no --- -mavx512vnni --- Use Intel Vector Neural Network Instructions 512 # neon = yes/no --- -DUSE_NEON --- Use ARM SIMD architecture # # Note that Makefile is space sensitive, so when adding new architectures @@ -102,7 +103,8 @@ ssse3 = no sse41 = no avx2 = no avx512 = no -vnni = no +vnni256 = no +vnni512 = no neon = no ARCH = x86-64-modern STRIP = strip @@ -192,7 +194,18 @@ ifeq ($(findstring -avx512,$(ARCH)),-avx512) avx512 = yes endif -ifeq ($(findstring -vnni,$(ARCH)),-vnni) +ifeq ($(findstring -vnni256,$(ARCH)),-vnni256) + popcnt = yes + sse = yes + sse2 = yes + ssse3 = yes + sse41 = yes + avx2 = yes + pext = yes + vnni256 = yes +endif + +ifeq ($(findstring -vnni512,$(ARCH)),-vnni512) popcnt = yes sse = yes sse2 = yes @@ -201,7 +214,7 @@ ifeq ($(findstring -vnni,$(ARCH)),-vnni) avx2 = yes pext = yes avx512 = yes - vnni = yes + vnni512 = yes endif ifeq ($(sse),yes) @@ -500,7 +513,14 @@ ifeq ($(avx512),yes) endif endif -ifeq ($(vnni),yes) +ifeq ($(vnni256),yes) + CXXFLAGS += -DUSE_VNNI + ifeq ($(comp),$(filter $(comp),gcc clang mingw)) + CXXFLAGS += -mavx512vnni -mavx512dq -mavx512vl -mprefer-vector-width=256 + endif +endif + +ifeq ($(vnni512),yes) CXXFLAGS += -DUSE_VNNI ifeq ($(comp),$(filter $(comp),gcc clang mingw)) CXXFLAGS += -mavx512vnni -mavx512dq -mavx512vl @@ -623,7 +643,8 @@ help: @echo "" @echo "Supported archs:" @echo "" - @echo "x86-64-vnni > x86 64-bit with vnni support" + @echo "x86-64-vnni512 > x86 64-bit with vnni support 512bit wide" + @echo "x86-64-vnni256 > x86 64-bit with vnni support 256bit wide" @echo "x86-64-avx512 > x86 64-bit with avx512 support" @echo "x86-64-bmi2 > x86 64-bit with bmi2 support" @echo "x86-64-avx2 > x86 64-bit with avx2 support" @@ -767,7 +788,8 @@ config-sanity: @echo "sse41: '$(sse41)'" @echo "avx2: '$(avx2)'" @echo "avx512: '$(avx512)'" - @echo "vnni: '$(vnni)'" + @echo "vnni256: '$(vnni256)'" + @echo "vnni512: '$(vnni512)'" @echo "neon: '$(neon)'" @echo "" @echo "Flags:" @@ -794,7 +816,8 @@ config-sanity: @test "$(sse41)" = "yes" || test "$(sse41)" = "no" @test "$(avx2)" = "yes" || test "$(avx2)" = "no" @test "$(avx512)" = "yes" || test "$(avx512)" = "no" - @test "$(vnni)" = "yes" || test "$(vnni)" = "no" + @test "$(vnni256)" = "yes" || test "$(vnni256)" = "no" + @test "$(vnni512)" = "yes" || test "$(vnni512)" = "no" @test "$(neon)" = "yes" || test "$(neon)" = "no" @test "$(comp)" = "gcc" || test "$(comp)" = "icc" || test "$(comp)" = "mingw" || test "$(comp)" = "clang" \ || test "$(comp)" = "armv7a-linux-androideabi16-clang" || test "$(comp)" = "aarch64-linux-android21-clang" diff --git a/src/nnue/layers/affine_transform.h b/src/nnue/layers/affine_transform.h index 7ac5a1c0..94d0b5a9 100644 --- a/src/nnue/layers/affine_transform.h +++ b/src/nnue/layers/affine_transform.h @@ -85,8 +85,10 @@ namespace Eval::NNUE::Layers { #elif defined(USE_AVX2) constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth; - const __m256i kOnes = _mm256_set1_epi16(1); const auto input_vector = reinterpret_cast(input); + #if !defined(USE_VNNI) + const __m256i kOnes = _mm256_set1_epi16(1); + #endif #elif defined(USE_SSE2) constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth; @@ -145,9 +147,13 @@ namespace Eval::NNUE::Layers { __m256i sum = _mm256_setzero_si256(); const auto row = reinterpret_cast(&weights_[offset]); for (IndexType j = 0; j < kNumChunks; ++j) { + #if defined(USE_VNNI) + sum = _mm256_dpbusd_epi32(sum, _mm256_loadA_si256(&input_vector[j]), _mm256_load_si256(&row[j])); + #else __m256i product = _mm256_maddubs_epi16(_mm256_loadA_si256(&input_vector[j]), _mm256_load_si256(&row[j])); product = _mm256_madd_epi16(product, kOnes); sum = _mm256_add_epi32(sum, product); + #endif } __m128i sum128 = _mm_add_epi32(_mm256_castsi256_si128(sum), _mm256_extracti128_si256(sum, 1)); sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_BADC)); From f7b3f0e8426bbf7414d139ed9d1cfa7a98d7314d Mon Sep 17 00:00:00 2001 From: Sami Kiminki Date: Fri, 21 Aug 2020 12:12:39 +0300 Subject: [PATCH 15/52] Allow TT entries with key16==0 to be fetched Fix the issue where a TT entry with key16==0 would always be reported as a miss. Instead, we'll use depth8 to detect whether the TT entry is occupied. In order to do that, we'll change DEPTH_OFFSET to -7 (depth8==0) to distinguish between an unoccupied entry and the otherwise lowest possible depth, i.e., DEPTH_NONE (depth8==1). To prevent a performance regression, we'll reorder the TT entry fields by the access order of TranspositionTable::probe(). Memory in general works fastest when accessed in sequential order. We'll also match the store order in TTEntry::save() with the entry field order, and re-order the 'if-or' expressions in TTEntry::save() from the cheapest to the most expensive. Finally, as we now have a proper TT entry occupancy test, we'll fix a minor corner case with hashfull reporting. To reproduce: - Use a big hash - Either: a. Start 31 very quick searches (this wraparounds generation to 0); or b. Force generation of the first search to 0. - go depth infinite Before the fix, hashfull would incorrectly report nearly full hash immediately after the search start, since TranspositionTable::hashfull() used to consider only the entry generation and not whether the entry was actually occupied. STC: LLR: 2.95 (-2.94,2.94) {-0.25,1.25} Total: 36848 W: 4091 L: 3898 D: 28859 Ptnml(0-2): 158, 2996, 11972, 3091, 207 https://tests.stockfishchess.org/tests/view/5f3f98d5dc02a01a0c2881f7 LTC: LLR: 2.95 (-2.94,2.94) {0.25,1.25} Total: 32280 W: 1828 L: 1653 D: 28799 Ptnml(0-2): 34, 1428, 13051, 1583, 44 https://tests.stockfishchess.org/tests/view/5f3fe77a87a5c3c63d8f5332 closes https://github.com/official-stockfish/Stockfish/pull/3048 Bench: 3760677 --- src/tt.cpp | 21 +++++++++++---------- src/tt.h | 12 ++++++------ src/types.h | 3 ++- 3 files changed, 19 insertions(+), 17 deletions(-) diff --git a/src/tt.cpp b/src/tt.cpp index d494c27d..60a3a5f1 100644 --- a/src/tt.cpp +++ b/src/tt.cpp @@ -37,18 +37,19 @@ void TTEntry::save(Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev) if (m || (uint16_t)k != key16) move16 = (uint16_t)m; - // Overwrite less valuable entries - if ((uint16_t)k != key16 - || d - DEPTH_OFFSET > depth8 - 4 - || b == BOUND_EXACT) + // Overwrite less valuable entries (cheapest checks first) + if (b == BOUND_EXACT + || (uint16_t)k != key16 + || d - DEPTH_OFFSET > depth8 - 4) { - assert(d >= DEPTH_OFFSET); + assert(d > DEPTH_OFFSET); + assert(d < 256 + DEPTH_OFFSET); key16 = (uint16_t)k; + depth8 = (uint8_t)(d - DEPTH_OFFSET); + genBound8 = (uint8_t)(TT.generation8 | uint8_t(pv) << 2 | b); value16 = (int16_t)v; eval16 = (int16_t)ev; - genBound8 = (uint8_t)(TT.generation8 | uint8_t(pv) << 2 | b); - depth8 = (uint8_t)(d - DEPTH_OFFSET); } } @@ -119,11 +120,11 @@ TTEntry* TranspositionTable::probe(const Key key, bool& found) const { const uint16_t key16 = (uint16_t)key; // Use the low 16 bits as key inside the cluster for (int i = 0; i < ClusterSize; ++i) - if (!tte[i].key16 || tte[i].key16 == key16) + if (tte[i].key16 == key16 || !tte[i].depth8) { tte[i].genBound8 = uint8_t(generation8 | (tte[i].genBound8 & 0x7)); // Refresh - return found = (bool)tte[i].key16, &tte[i]; + return found = (bool)tte[i].depth8, &tte[i]; } // Find an entry to be replaced according to the replacement strategy @@ -149,7 +150,7 @@ int TranspositionTable::hashfull() const { int cnt = 0; for (int i = 0; i < 1000; ++i) for (int j = 0; j < ClusterSize; ++j) - cnt += (table[i].entry[j].genBound8 & 0xF8) == generation8; + cnt += table[i].entry[j].depth8 && (table[i].entry[j].genBound8 & 0xF8) == generation8; return cnt / ClusterSize; } diff --git a/src/tt.h b/src/tt.h index c177ca52..fdfd6769 100644 --- a/src/tt.h +++ b/src/tt.h @@ -25,13 +25,13 @@ /// TTEntry struct is the 10 bytes transposition table entry, defined as below: /// /// key 16 bit -/// move 16 bit -/// value 16 bit -/// eval value 16 bit +/// depth 8 bit /// generation 5 bit /// pv node 1 bit /// bound type 2 bit -/// depth 8 bit +/// move 16 bit +/// value 16 bit +/// eval value 16 bit struct TTEntry { @@ -47,11 +47,11 @@ private: friend class TranspositionTable; uint16_t key16; + uint8_t depth8; + uint8_t genBound8; uint16_t move16; int16_t value16; int16_t eval16; - uint8_t genBound8; - uint8_t depth8; }; diff --git a/src/types.h b/src/types.h index 73da41e2..1cd711b6 100644 --- a/src/types.h +++ b/src/types.h @@ -232,7 +232,8 @@ enum : int { DEPTH_QS_RECAPTURES = -5, DEPTH_NONE = -6, - DEPTH_OFFSET = DEPTH_NONE + + DEPTH_OFFSET = -7 // value used only for TT entry occupancy check }; enum Square : int { From 843a961a8c10d5949e04718b829e3b3d5adeedb4 Mon Sep 17 00:00:00 2001 From: Vizvezdenec Date: Mon, 24 Aug 2020 08:04:16 +0300 Subject: [PATCH 16/52] Introduce countermove based pruning for qsearch This patch continues work of previous patch in introducing pruning heuristics in qsearch by analogy to main search, now with countermove based pruning. Idea is that if move is late enough and is quite check (we do generate them in qsearch) and has bad enough countermove history - prune it. passed STC https://tests.stockfishchess.org/tests/view/5f41220287a5c3c63d8f53c5 LLR: 2.93 (-2.94,2.94) {-0.25,1.25} Total: 35944 W: 4127 L: 3929 D: 27888 Ptnml(0-2): 196, 2970, 11459, 3134, 213 passed LTC https://tests.stockfishchess.org/tests/view/5f41862f87a5c3c63d8f53e8 LLR: 2.95 (-2.94,2.94) {0.25,1.25} Total: 138448 W: 7655 L: 7252 D: 123541 Ptnml(0-2): 145, 6247, 56043, 6638, 151 closes https://github.com/official-stockfish/Stockfish/pull/3058 Bench: 3610676 --- src/search.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/search.cpp b/src/search.cpp index 266e2db3..2ca64a01 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1570,6 +1570,12 @@ moves_loop: // When in check, search starts from here [pos.moved_piece(move)] [to_sq(move)]; + if ( !captureOrPromotion + && moveCount >= abs(depth) + 1 + && (*contHist[0])[pos.moved_piece(move)][to_sq(move)] < CounterMovePruneThreshold + && (*contHist[1])[pos.moved_piece(move)][to_sq(move)] < CounterMovePruneThreshold) + continue; + // Make and search the move pos.do_move(move, st, givesCheck); value = -qsearch(pos, ss+1, -beta, -alpha, depth - 1); From 530fccbf272ffe424ae53a464b91db148cc968ae Mon Sep 17 00:00:00 2001 From: mstembera Date: Mon, 24 Aug 2020 03:38:01 -0700 Subject: [PATCH 17/52] Allow for VNNI256 compilation with g++-8 explicitly pass needed -mavx512f -mavx512bw flags closes https://github.com/official-stockfish/Stockfish/pull/3061 No functional change --- .travis.yml | 3 +-- src/Makefile | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index c1e6d6df..092c7f53 100644 --- a/.travis.yml +++ b/.travis.yml @@ -79,8 +79,7 @@ script: - make clean && make -j2 ARCH=x86-64-bmi2 build - make clean && make -j2 ARCH=x86-64-avx512 build - make clean && make -j2 ARCH=x86-64-vnni512 build - # requires gcc 9 or higher - - if [[ "$COMPILER" != "g++-8" ]]; make clean && make -j2 ARCH=x86-64-vnni256 build; fi + - make clean && make -j2 ARCH=x86-64-vnni256 build # # Check perft and reproducible search diff --git a/src/Makefile b/src/Makefile index 228ea851..2e85a144 100644 --- a/src/Makefile +++ b/src/Makefile @@ -516,7 +516,7 @@ endif ifeq ($(vnni256),yes) CXXFLAGS += -DUSE_VNNI ifeq ($(comp),$(filter $(comp),gcc clang mingw)) - CXXFLAGS += -mavx512vnni -mavx512dq -mavx512vl -mprefer-vector-width=256 + CXXFLAGS += -mavx512f -mavx512bw -mavx512vnni -mavx512dq -mavx512vl -mprefer-vector-width=256 endif endif From b0b4ca17db49ed03057b5fa4ee4a12dab0e9c9e6 Mon Sep 17 00:00:00 2001 From: Joost VandeVondele Date: Mon, 24 Aug 2020 21:32:04 +0200 Subject: [PATCH 18/52] Check ARCH=.... variable to prevent user errors or generating untested code, check explicitly that the ARCH variable is equivalent to a supported architecture as listed in `make help`. To nevertheless compile for an untested target the user can override the internal variable, passing the undocumented `SUPPORTED_ARCH=true` to make. closes https://github.com/official-stockfish/Stockfish/pull/3062 No functional change. --- src/Makefile | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/Makefile b/src/Makefile index 2e85a144..703aa230 100644 --- a/src/Makefile +++ b/src/Makefile @@ -85,8 +85,15 @@ endif ### 2.1. General and architecture defaults -ifeq ($(ARCH),) - empty_arch = yes +# explicitly check for the list of supported architectures (as listed with make help), +# the user can override with `make ARCH=x86-32-vnni256 SUPPORTED_ARCH=true` +ifeq ($(ARCH),$(filter $(ARCH),x86-64-vnni512 x86-64-vnni256 x86-64-avx512 x86-64-bmi2 x86-64-avx2 \ + x86-64-sse41-popcnt x86-64-modern x86-64-ssse3 x86-64-sse3-popcnt \ + x86-64 x86-32-sse41-popcnt x86-32-sse2 x86-32 ppc-64 ppc-32 \ + armv7 armv7-neon armv8 apple-silicon general-64 general-32)) + SUPPORTED_ARCH=true +else + SUPPORTED_ARCH=false endif optimize = yes @@ -625,6 +632,7 @@ endif ### Section 4. Public Targets ### ========================================================================== + help: @echo "" @echo "To compile stockfish, type: " @@ -684,10 +692,12 @@ help: @echo "make -j profile-build ARCH=x86-64-bmi2 COMP=gcc COMPCXX=g++-9.0" @echo "make -j build ARCH=x86-64-ssse3 COMP=clang" @echo "" -ifneq ($(empty_arch), yes) @echo "-------------------------------" +ifeq ($(SUPPORTED_ARCH), true) @echo "The selected architecture $(ARCH) will enable the following configuration: " @$(MAKE) ARCH=$(ARCH) COMP=$(COMP) config-sanity +else + @echo "Specify a supported architecture with the ARCH option for more details" endif @@ -802,6 +812,7 @@ config-sanity: @test "$(debug)" = "yes" || test "$(debug)" = "no" @test "$(sanitize)" = "undefined" || test "$(sanitize)" = "thread" || test "$(sanitize)" = "address" || test "$(sanitize)" = "no" @test "$(optimize)" = "yes" || test "$(optimize)" = "no" + @test "$(SUPPORTED_ARCH)" = "true" @test "$(arch)" = "any" || test "$(arch)" = "x86_64" || test "$(arch)" = "i386" || \ test "$(arch)" = "ppc64" || test "$(arch)" = "ppc" || \ test "$(arch)" = "armv7" || test "$(arch)" = "armv8" || test "$(arch)" = "arm64" From 9b4967071e2fb116673820127522bc43d01d2257 Mon Sep 17 00:00:00 2001 From: syzygy1 <3028851+syzygy1@users.noreply.github.com> Date: Mon, 24 Aug 2020 02:29:38 +0200 Subject: [PATCH 19/52] Remove EvalList This patch removes the EvalList structure from the Position object and generally simplifies the interface between do_move() and the NNUE code. The NNUE evaluation function first calculates the "accumulator". The accumulator consists of two halves: one for white's perspective, one for black's perspective. If the "friendly king" has moved or the accumulator for the parent position is not available, the accumulator for this half has to be calculated from scratch. To do this, the NNUE node needs to know the positions and types of all non-king pieces and the position of the friendly king. This information can easily be obtained from the Position object. If the "friendly king" has not moved, its half of the accumulator can be calculated by incrementally updating the accumulator for the previous position. For this, the NNUE code needs to know which pieces have been added to which squares and which pieces have been removed from which squares. In principle this information can be derived from the Position object and StateInfo struct (in the same way as undo_move() does this). However, it is probably a bit faster to prepare this information in do_move(), so I have kept the DirtyPiece struct. Since the DirtyPiece struct now stores the squares rather than "PieceSquare" indices, there are now at most three "dirty pieces" (previously two). A promotion move that captures a piece removes the capturing pawn and the captured piece from the board (to SQ_NONE) and moves the promoted piece to the promotion square (from SQ_NONE). An STC test has confirmed a small speedup: https://tests.stockfishchess.org/tests/view/5f43f06b5089a564a10d850a LLR: 2.94 (-2.94,2.94) {-0.25,1.25} Total: 87704 W: 9763 L: 9500 D: 68441 Ptnml(0-2): 426, 6950, 28845, 7197, 434 closes https://github.com/official-stockfish/Stockfish/pull/3068 No functional change --- src/nnue/evaluate_nnue.cpp | 43 ++++++------ src/nnue/features/feature_set.h | 3 +- src/nnue/features/half_kp.cpp | 58 ++++++---------- src/nnue/features/half_kp.h | 10 +-- src/nnue/nnue_common.h | 21 ++++++ src/position.cpp | 95 +++++++------------------- src/position.h | 23 ------- src/types.h | 116 +++----------------------------- 8 files changed, 96 insertions(+), 273 deletions(-) diff --git a/src/nnue/evaluate_nnue.cpp b/src/nnue/evaluate_nnue.cpp index dfbb1ac2..e6619089 100644 --- a/src/nnue/evaluate_nnue.cpp +++ b/src/nnue/evaluate_nnue.cpp @@ -29,30 +29,29 @@ #include "evaluate_nnue.h" -ExtPieceSquare kpp_board_index[PIECE_NB] = { - // convention: W - us, B - them - // viewed from other side, W and B are reversed - { PS_NONE, PS_NONE }, - { PS_W_PAWN, PS_B_PAWN }, - { PS_W_KNIGHT, PS_B_KNIGHT }, - { PS_W_BISHOP, PS_B_BISHOP }, - { PS_W_ROOK, PS_B_ROOK }, - { PS_W_QUEEN, PS_B_QUEEN }, - { PS_W_KING, PS_B_KING }, - { PS_NONE, PS_NONE }, - { PS_NONE, PS_NONE }, - { PS_B_PAWN, PS_W_PAWN }, - { PS_B_KNIGHT, PS_W_KNIGHT }, - { PS_B_BISHOP, PS_W_BISHOP }, - { PS_B_ROOK, PS_W_ROOK }, - { PS_B_QUEEN, PS_W_QUEEN }, - { PS_B_KING, PS_W_KING }, - { PS_NONE, PS_NONE } -}; - - namespace Eval::NNUE { + uint32_t kpp_board_index[PIECE_NB][COLOR_NB] = { + // convention: W - us, B - them + // viewed from other side, W and B are reversed + { PS_NONE, PS_NONE }, + { PS_W_PAWN, PS_B_PAWN }, + { PS_W_KNIGHT, PS_B_KNIGHT }, + { PS_W_BISHOP, PS_B_BISHOP }, + { PS_W_ROOK, PS_B_ROOK }, + { PS_W_QUEEN, PS_B_QUEEN }, + { PS_W_KING, PS_B_KING }, + { PS_NONE, PS_NONE }, + { PS_NONE, PS_NONE }, + { PS_B_PAWN, PS_W_PAWN }, + { PS_B_KNIGHT, PS_W_KNIGHT }, + { PS_B_BISHOP, PS_W_BISHOP }, + { PS_B_ROOK, PS_W_ROOK }, + { PS_B_QUEEN, PS_W_QUEEN }, + { PS_B_KING, PS_W_KING }, + { PS_NONE, PS_NONE } + }; + // Input feature converter AlignedPtr feature_transformer; diff --git a/src/nnue/features/feature_set.h b/src/nnue/features/feature_set.h index 79ca83ae..558a6b22 100644 --- a/src/nnue/features/feature_set.h +++ b/src/nnue/features/feature_set.h @@ -68,8 +68,7 @@ namespace Eval::NNUE::Features { reset[perspective] = false; switch (trigger) { case TriggerEvent::kFriendKingMoved: - reset[perspective] = - dp.pieceId[0] == PIECE_ID_KING + perspective; + reset[perspective] = dp.piece[0] == make_piece(perspective, KING); break; default: assert(false); diff --git a/src/nnue/features/half_kp.cpp b/src/nnue/features/half_kp.cpp index 628add6e..88e384a3 100644 --- a/src/nnue/features/half_kp.cpp +++ b/src/nnue/features/half_kp.cpp @@ -23,25 +23,17 @@ namespace Eval::NNUE::Features { - // Find the index of the feature quantity from the king position and PieceSquare - template - inline IndexType HalfKP::MakeIndex(Square sq_k, PieceSquare p) { - return static_cast(PS_END) * static_cast(sq_k) + p; + // Orient a square according to perspective (rotates by 180 for black) + inline Square orient(Color perspective, Square s) { + return Square(int(s) ^ (bool(perspective) * 63)); } - // Get pieces information + // Find the index of the feature quantity from the king position and PieceSquare template - inline void HalfKP::GetPieces( - const Position& pos, Color perspective, - PieceSquare** pieces, Square* sq_target_k) { + inline IndexType HalfKP::MakeIndex( + Color perspective, Square s, Piece pc, Square ksq) { - *pieces = (perspective == BLACK) ? - pos.eval_list()->piece_list_fb() : - pos.eval_list()->piece_list_fw(); - const PieceId target = (AssociatedKing == Side::kFriend) ? - static_cast(PIECE_ID_KING + perspective) : - static_cast(PIECE_ID_KING + ~perspective); - *sq_target_k = static_cast(((*pieces)[target] - PS_W_KING) % SQUARE_NB); + return IndexType(orient(perspective, s) + kpp_board_index[pc][perspective] + PS_END * ksq); } // Get a list of indices for active features @@ -49,16 +41,11 @@ namespace Eval::NNUE::Features { void HalfKP::AppendActiveIndices( const Position& pos, Color perspective, IndexList* active) { - // Do nothing if array size is small to avoid compiler warning - if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return; - - PieceSquare* pieces; - Square sq_target_k; - GetPieces(pos, perspective, &pieces, &sq_target_k); - for (PieceId i = PIECE_ID_ZERO; i < PIECE_ID_KING; ++i) { - if (pieces[i] != PS_NONE) { - active->push_back(MakeIndex(sq_target_k, pieces[i])); - } + Square ksq = orient(perspective, pos.square(perspective)); + Bitboard bb = pos.pieces() & ~pos.pieces(KING); + while (bb) { + Square s = pop_lsb(&bb); + active->push_back(MakeIndex(perspective, s, pos.piece_on(s), ksq)); } } @@ -68,22 +55,15 @@ namespace Eval::NNUE::Features { const Position& pos, Color perspective, IndexList* removed, IndexList* added) { - PieceSquare* pieces; - Square sq_target_k; - GetPieces(pos, perspective, &pieces, &sq_target_k); + Square ksq = orient(perspective, pos.square(perspective)); const auto& dp = pos.state()->dirtyPiece; for (int i = 0; i < dp.dirty_num; ++i) { - if (dp.pieceId[i] >= PIECE_ID_KING) continue; - const auto old_p = static_cast( - dp.old_piece[i].from[perspective]); - if (old_p != PS_NONE) { - removed->push_back(MakeIndex(sq_target_k, old_p)); - } - const auto new_p = static_cast( - dp.new_piece[i].from[perspective]); - if (new_p != PS_NONE) { - added->push_back(MakeIndex(sq_target_k, new_p)); - } + Piece pc = dp.piece[i]; + if (type_of(pc) == KING) continue; + if (dp.from[i] != SQ_NONE) + removed->push_back(MakeIndex(perspective, dp.from[i], pc, ksq)); + if (dp.to[i] != SQ_NONE) + added->push_back(MakeIndex(perspective, dp.to[i], pc, ksq)); } } diff --git a/src/nnue/features/half_kp.h b/src/nnue/features/half_kp.h index 99842eea..ee6a8df3 100644 --- a/src/nnue/features/half_kp.h +++ b/src/nnue/features/half_kp.h @@ -41,7 +41,7 @@ namespace Eval::NNUE::Features { static constexpr IndexType kDimensions = static_cast(SQUARE_NB) * static_cast(PS_END); // Maximum number of simultaneously active features - static constexpr IndexType kMaxActiveDimensions = PIECE_ID_KING; + static constexpr IndexType kMaxActiveDimensions = 30; // Kings don't count // Trigger for full calculation instead of difference calculation static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kFriendKingMoved; @@ -53,13 +53,9 @@ namespace Eval::NNUE::Features { static void AppendChangedIndices(const Position& pos, Color perspective, IndexList* removed, IndexList* added); - // Index of a feature for a given king position and another piece on some square - static IndexType MakeIndex(Square sq_k, PieceSquare p); - private: - // Get pieces information - static void GetPieces(const Position& pos, Color perspective, - PieceSquare** pieces, Square* sq_target_k); + // Index of a feature for a given king position and another piece on some square + static IndexType MakeIndex(Color perspective, Square s, Piece pc, Square sq_k); }; } // namespace Eval::NNUE::Features diff --git a/src/nnue/nnue_common.h b/src/nnue/nnue_common.h index a9d8e5af..7bc905dc 100644 --- a/src/nnue/nnue_common.h +++ b/src/nnue/nnue_common.h @@ -94,6 +94,27 @@ namespace Eval::NNUE { constexpr std::size_t kMaxSimdWidth = 32; + // unique number for each piece type on each square + enum { + PS_NONE = 0, + PS_W_PAWN = 1, + PS_B_PAWN = 1 * SQUARE_NB + 1, + PS_W_KNIGHT = 2 * SQUARE_NB + 1, + PS_B_KNIGHT = 3 * SQUARE_NB + 1, + PS_W_BISHOP = 4 * SQUARE_NB + 1, + PS_B_BISHOP = 5 * SQUARE_NB + 1, + PS_W_ROOK = 6 * SQUARE_NB + 1, + PS_B_ROOK = 7 * SQUARE_NB + 1, + PS_W_QUEEN = 8 * SQUARE_NB + 1, + PS_B_QUEEN = 9 * SQUARE_NB + 1, + PS_W_KING = 10 * SQUARE_NB + 1, + PS_END = PS_W_KING, // pieces without kings (pawns included) + PS_B_KING = 11 * SQUARE_NB + 1, + PS_END2 = 12 * SQUARE_NB + 1 + }; + + extern uint32_t kpp_board_index[PIECE_NB][COLOR_NB]; + // Type of input feature after conversion using TransformedFeatureType = std::uint8_t; using IndexType = std::uint32_t; diff --git a/src/position.cpp b/src/position.cpp index 02898547..fe89b753 100644 --- a/src/position.cpp +++ b/src/position.cpp @@ -198,9 +198,6 @@ Position& Position::set(const string& fenStr, bool isChess960, StateInfo* si, Th std::fill_n(&pieceList[0][0], sizeof(pieceList) / sizeof(Square), SQ_NONE); st = si; - // Each piece on board gets a unique ID used to track the piece later - PieceId piece_id, next_piece_id = PIECE_ID_ZERO; - ss >> std::noskipws; // 1. Piece placement @@ -212,21 +209,8 @@ Position& Position::set(const string& fenStr, bool isChess960, StateInfo* si, Th else if (token == '/') sq += 2 * SOUTH; - else if ((idx = PieceToChar.find(token)) != string::npos) - { - auto pc = Piece(idx); - put_piece(pc, sq); - - if (Eval::useNNUE) - { - // Kings get a fixed ID, other pieces get ID in order of placement - piece_id = - (idx == W_KING) ? PIECE_ID_WKING : - (idx == B_KING) ? PIECE_ID_BKING : - next_piece_id++; - evalList.put_piece(piece_id, sq, pc); - } - + else if ((idx = PieceToChar.find(token)) != string::npos) { + put_piece(Piece(idx), sq); ++sq; } } @@ -721,8 +705,6 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) { // Used by NNUE st->accumulator.computed_accumulation = false; st->accumulator.computed_score = false; - PieceId dp0 = PIECE_ID_NONE; - PieceId dp1 = PIECE_ID_NONE; auto& dp = st->dirtyPiece; dp.dirty_num = 1; @@ -775,12 +757,10 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) { if (Eval::useNNUE) { - dp.dirty_num = 2; // 2 pieces moved - dp1 = piece_id_on(capsq); - dp.pieceId[1] = dp1; - dp.old_piece[1] = evalList.piece_with_id(dp1); - evalList.put_piece(dp1, capsq, NO_PIECE); - dp.new_piece[1] = evalList.piece_with_id(dp1); + dp.dirty_num = 2; // 1 piece moved, 1 piece captured + dp.piece[1] = captured; + dp.from[1] = capsq; + dp.to[1] = SQ_NONE; } // Update board and piece lists @@ -821,11 +801,9 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) { { if (Eval::useNNUE) { - dp0 = piece_id_on(from); - dp.pieceId[0] = dp0; - dp.old_piece[0] = evalList.piece_with_id(dp0); - evalList.put_piece(dp0, to, pc); - dp.new_piece[0] = evalList.piece_with_id(dp0); + dp.piece[0] = pc; + dp.from[0] = from; + dp.to[0] = to; } move_piece(from, to); @@ -854,9 +832,12 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) { if (Eval::useNNUE) { - dp0 = piece_id_on(to); - evalList.put_piece(dp0, to, promotion); - dp.new_piece[0] = evalList.piece_with_id(dp0); + // Promoting pawn to SQ_NONE, promoted piece from SQ_NONE + dp.to[0] = SQ_NONE; + dp.piece[dp.dirty_num] = promotion; + dp.from[dp.dirty_num] = SQ_NONE; + dp.to[dp.dirty_num] = to; + dp.dirty_num++; } // Update hash keys @@ -950,12 +931,6 @@ void Position::undo_move(Move m) { { move_piece(to, from); // Put the piece back at the source square - if (Eval::useNNUE) - { - PieceId dp0 = st->dirtyPiece.pieceId[0]; - evalList.put_piece(dp0, from, pc); - } - if (st->capturedPiece) { Square capsq = to; @@ -972,14 +947,6 @@ void Position::undo_move(Move m) { } put_piece(st->capturedPiece, capsq); // Restore the captured piece - - if (Eval::useNNUE) - { - PieceId dp1 = st->dirtyPiece.pieceId[1]; - assert(evalList.piece_with_id(dp1).from[WHITE] == PS_NONE); - assert(evalList.piece_with_id(dp1).from[BLACK] == PS_NONE); - evalList.put_piece(dp1, capsq, st->capturedPiece); - } } } @@ -1001,32 +968,16 @@ void Position::do_castling(Color us, Square from, Square& to, Square& rfrom, Squ rto = relative_square(us, kingSide ? SQ_F1 : SQ_D1); to = relative_square(us, kingSide ? SQ_G1 : SQ_C1); - if (Eval::useNNUE) + if (Do && Eval::useNNUE) { - PieceId dp0, dp1; auto& dp = st->dirtyPiece; - dp.dirty_num = 2; // 2 pieces moved - - if (Do) - { - dp0 = piece_id_on(from); - dp1 = piece_id_on(rfrom); - dp.pieceId[0] = dp0; - dp.old_piece[0] = evalList.piece_with_id(dp0); - evalList.put_piece(dp0, to, make_piece(us, KING)); - dp.new_piece[0] = evalList.piece_with_id(dp0); - dp.pieceId[1] = dp1; - dp.old_piece[1] = evalList.piece_with_id(dp1); - evalList.put_piece(dp1, rto, make_piece(us, ROOK)); - dp.new_piece[1] = evalList.piece_with_id(dp1); - } - else - { - dp0 = piece_id_on(to); - dp1 = piece_id_on(rto); - evalList.put_piece(dp0, from, make_piece(us, KING)); - evalList.put_piece(dp1, rfrom, make_piece(us, ROOK)); - } + dp.piece[0] = make_piece(us, KING); + dp.from[0] = from; + dp.to[0] = to; + dp.piece[1] = make_piece(us, ROOK); + dp.from[1] = rfrom; + dp.to[1] = rto; + dp.dirty_num = 2; } // Remove both pieces first since squares could overlap in Chess960 diff --git a/src/position.h b/src/position.h index 5ce17277..d6f5c9fd 100644 --- a/src/position.h +++ b/src/position.h @@ -171,7 +171,6 @@ public: // Used by NNUE StateInfo* state() const; - const EvalList* eval_list() const; private: // Initialization helpers (used while setting up a position) @@ -186,9 +185,6 @@ private: template void do_castling(Color us, Square from, Square& to, Square& rfrom, Square& rto); - // ID of a piece on a given square - PieceId piece_id_on(Square sq) const; - // Data members Piece board[SQUARE_NB]; Bitboard byTypeBB[PIECE_TYPE_NB]; @@ -205,9 +201,6 @@ private: Thread* thisThread; StateInfo* st; bool chess960; - - // List of pieces used in NNUE evaluation function - EvalList evalList; }; namespace PSQT { @@ -451,20 +444,4 @@ inline StateInfo* Position::state() const { return st; } -inline const EvalList* Position::eval_list() const { - - return &evalList; -} - -inline PieceId Position::piece_id_on(Square sq) const -{ - - assert(piece_on(sq) != NO_PIECE); - - PieceId pid = evalList.piece_id_list[sq]; - assert(is_ok(pid)); - - return pid; -} - #endif // #ifndef POSITION_H_INCLUDED diff --git a/src/types.h b/src/types.h index 1cd711b6..5873c698 100644 --- a/src/types.h +++ b/src/types.h @@ -201,22 +201,6 @@ enum Piece { PIECE_NB = 16 }; -// An ID used to track the pieces. Max. 32 pieces on board. -enum PieceId { - PIECE_ID_ZERO = 0, - PIECE_ID_KING = 30, - PIECE_ID_WKING = 30, - PIECE_ID_BKING = 31, - PIECE_ID_NONE = 32 -}; - -inline PieceId operator++(PieceId& d, int) { - - PieceId x = d; - d = PieceId(int(d) + 1); - return x; -} - constexpr Value PieceValue[PHASE_NB][PIECE_NB] = { { VALUE_ZERO, PawnValueMg, KnightValueMg, BishopValueMg, RookValueMg, QueenValueMg, VALUE_ZERO, VALUE_ZERO, VALUE_ZERO, PawnValueMg, KnightValueMg, BishopValueMg, RookValueMg, QueenValueMg, VALUE_ZERO, VALUE_ZERO }, @@ -271,93 +255,20 @@ enum Rank : int { RANK_1, RANK_2, RANK_3, RANK_4, RANK_5, RANK_6, RANK_7, RANK_8, RANK_NB }; -// unique number for each piece type on each square -enum PieceSquare : uint32_t { - PS_NONE = 0, - PS_W_PAWN = 1, - PS_B_PAWN = 1 * SQUARE_NB + 1, - PS_W_KNIGHT = 2 * SQUARE_NB + 1, - PS_B_KNIGHT = 3 * SQUARE_NB + 1, - PS_W_BISHOP = 4 * SQUARE_NB + 1, - PS_B_BISHOP = 5 * SQUARE_NB + 1, - PS_W_ROOK = 6 * SQUARE_NB + 1, - PS_B_ROOK = 7 * SQUARE_NB + 1, - PS_W_QUEEN = 8 * SQUARE_NB + 1, - PS_B_QUEEN = 9 * SQUARE_NB + 1, - PS_W_KING = 10 * SQUARE_NB + 1, - PS_END = PS_W_KING, // pieces without kings (pawns included) - PS_B_KING = 11 * SQUARE_NB + 1, - PS_END2 = 12 * SQUARE_NB + 1 -}; - -struct ExtPieceSquare { - PieceSquare from[COLOR_NB]; -}; - -// Array for finding the PieceSquare corresponding to the piece on the board -extern ExtPieceSquare kpp_board_index[PIECE_NB]; - -constexpr bool is_ok(PieceId pid); -constexpr Square rotate180(Square sq); - -// Structure holding which tracked piece (PieceId) is where (PieceSquare) -class EvalList { - -public: - // Max. number of pieces without kings is 30 but must be a multiple of 4 in AVX2 - static const int MAX_LENGTH = 32; - - // Array that holds the piece id for the pieces on the board - PieceId piece_id_list[SQUARE_NB]; - - // List of pieces, separate from White and Black POV - PieceSquare* piece_list_fw() const { return const_cast(pieceListFw); } - PieceSquare* piece_list_fb() const { return const_cast(pieceListFb); } - - // Place the piece pc with piece_id on the square sq on the board - void put_piece(PieceId piece_id, Square sq, Piece pc) - { - assert(is_ok(piece_id)); - if (pc != NO_PIECE) - { - pieceListFw[piece_id] = PieceSquare(kpp_board_index[pc].from[WHITE] + sq); - pieceListFb[piece_id] = PieceSquare(kpp_board_index[pc].from[BLACK] + rotate180(sq)); - piece_id_list[sq] = piece_id; - } - else - { - pieceListFw[piece_id] = PS_NONE; - pieceListFb[piece_id] = PS_NONE; - piece_id_list[sq] = piece_id; - } - } - - // Convert the specified piece_id piece to ExtPieceSquare type and return it - ExtPieceSquare piece_with_id(PieceId piece_id) const - { - ExtPieceSquare eps; - eps.from[WHITE] = pieceListFw[piece_id]; - eps.from[BLACK] = pieceListFb[piece_id]; - return eps; - } - -private: - PieceSquare pieceListFw[MAX_LENGTH]; - PieceSquare pieceListFb[MAX_LENGTH]; -}; - -// For differential evaluation of pieces that changed since last turn +// Keep track of what a move changes on the board (used by NNUE) struct DirtyPiece { // Number of changed pieces int dirty_num; - // The ids of changed pieces, max. 2 pieces can change in one move - PieceId pieceId[2]; + // Max 3 pieces can change in one move. A promotion with capture moves + // both the pawn and the captured piece to SQ_NONE and the piece promoted + // to from SQ_NONE to the capture square. + Piece piece[3]; - // What changed from the piece with that piece number - ExtPieceSquare old_piece[2]; - ExtPieceSquare new_piece[2]; + // From and to squares, which may be SQ_NONE + Square from[3]; + Square to[3]; }; /// Score enum stores a middlegame and an endgame value in a single integer (enum). @@ -407,8 +318,6 @@ ENABLE_FULL_OPERATORS_ON(Value) ENABLE_FULL_OPERATORS_ON(Direction) ENABLE_INCR_OPERATORS_ON(Piece) -ENABLE_INCR_OPERATORS_ON(PieceSquare) -ENABLE_INCR_OPERATORS_ON(PieceId) ENABLE_INCR_OPERATORS_ON(PieceType) ENABLE_INCR_OPERATORS_ON(Square) ENABLE_INCR_OPERATORS_ON(File) @@ -497,10 +406,6 @@ inline Color color_of(Piece pc) { return Color(pc >> 3); } -constexpr bool is_ok(PieceId pid) { - return pid < PIECE_ID_NONE; -} - constexpr bool is_ok(Square s) { return s >= SQ_A1 && s <= SQ_H8; } @@ -537,11 +442,6 @@ constexpr Square to_sq(Move m) { return Square(m & 0x3F); } -// Return relative square when turning the board 180 degrees -constexpr Square rotate180(Square sq) { - return (Square)(sq ^ 0x3F); -} - constexpr int from_to(Move m) { return m & 0xFFF; } From 95b8f3f8005598fc3ad07a7a8f6440d828cabc29 Mon Sep 17 00:00:00 2001 From: VoyagerOne Date: Sun, 23 Aug 2020 12:04:50 -0400 Subject: [PATCH 20/52] Remove Reduce Depth Remove Reduce Depth at PV nodes. STC: LLR: 2.94 (-2.94,2.94) {-1.25,0.25} Total: 56760 W: 6299 L: 6236 D: 44225 Ptnml(0-2): 286, 4843, 18076, 4872, 303 https://tests.stockfishchess.org/tests/view/5f41356087a5c3c63d8f53c9 LTC: LLR: 2.95 (-2.94,2.94) {-0.75,0.25} Total: 17496 W: 954 L: 865 D: 15677 Ptnml(0-2): 13, 768, 7098, 855, 14 https://tests.stockfishchess.org/tests/view/5f41bb7687a5c3c63d8f53f9 closes https://github.com/official-stockfish/Stockfish/pull/3055 Bench: 3555051 --- src/search.cpp | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 2ca64a01..d6b611a3 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -940,12 +940,6 @@ namespace { } } - // Step 11. If the position is not in TT, decrease depth by 2 - if ( PvNode - && depth >= 6 - && !ttMove) - depth -= 2; - moves_loop: // When in check, search starts from here const PieceToHistory* contHist[] = { (ss-1)->continuationHistory, (ss-2)->continuationHistory, @@ -969,7 +963,7 @@ moves_loop: // When in check, search starts from here // Mark this node as being searched ThreadHolding th(thisThread, posKey, ss->ply); - // Step 12. Loop through all pseudo-legal moves until no moves remain + // Step 11. Loop through all pseudo-legal moves until no moves remain // or a beta cutoff occurs. while ((move = mp.next_move(moveCountPruning)) != MOVE_NONE) { @@ -1007,7 +1001,7 @@ moves_loop: // When in check, search starts from here // Calculate new depth for this move newDepth = depth - 1; - // Step 13. Pruning at shallow depth (~200 Elo) + // Step 12. Pruning at shallow depth (~200 Elo) if ( !rootNode && pos.non_pawn_material(us) && bestValue > VALUE_TB_LOSS_IN_MAX_PLY) @@ -1065,7 +1059,7 @@ moves_loop: // When in check, search starts from here } } - // Step 14. Extensions (~75 Elo) + // Step 13. Extensions (~75 Elo) // Singular extension search (~70 Elo). If all moves but one fail low on a // search of (alpha-s, beta-s), and just one fails high on (alpha, beta), @@ -1148,10 +1142,10 @@ moves_loop: // When in check, search starts from here [movedPiece] [to_sq(move)]; - // Step 15. Make the move + // Step 14. Make the move pos.do_move(move, st, givesCheck); - // Step 16. Reduced depth search (LMR, ~200 Elo). If the move fails high it will be + // Step 15. Reduced depth search (LMR, ~200 Elo). If the move fails high it will be // re-searched at full depth. if ( depth >= 3 && moveCount > 1 + 2 * rootNode + 2 * (PvNode && abs(bestValue) < 2) @@ -1254,7 +1248,7 @@ moves_loop: // When in check, search starts from here didLMR = false; } - // Step 17. Full depth search when LMR is skipped or fails high + // Step 16. Full depth search when LMR is skipped or fails high if (doFullDepthSearch) { value = -search(pos, ss+1, -(alpha+1), -alpha, newDepth, !cutNode); @@ -1282,12 +1276,12 @@ moves_loop: // When in check, search starts from here value = -search(pos, ss+1, -beta, -alpha, newDepth, false); } - // Step 18. Undo move + // Step 17. Undo move pos.undo_move(move); assert(value > -VALUE_INFINITE && value < VALUE_INFINITE); - // Step 19. Check for a new best move + // Step 18. Check for a new best move // Finished searching the move. If a stop occurred, the return value of // the search cannot be trusted, and we return immediately without // updating best move, PV and TT. @@ -1364,7 +1358,7 @@ moves_loop: // When in check, search starts from here return VALUE_DRAW; */ - // Step 20. Check for mate and stalemate + // Step 19. Check for mate and stalemate // All legal moves have been searched and if there are no legal moves, it // must be a mate or a stalemate. If we are in a singular extension search then // return a fail low score. From 242a7d9fead561488ca176a4687deef8859918f2 Mon Sep 17 00:00:00 2001 From: VoyagerOne Date: Tue, 25 Aug 2020 09:10:47 -0400 Subject: [PATCH 21/52] Simplify MCP in QS Simplify moveCount pruning in QS by removing depth dependency. STC LLR: 2.94 (-2.94,2.94) {-1.25,0.25} Total: 42960 W: 4741 L: 4661 D: 33558 Ptnml(0-2): 218, 3574, 13804, 3678, 206 https://tests.stockfishchess.org/tests/view/5f42e3f75089a564a10d8493 LTC LLR: 2.94 (-2.94,2.94) {-0.75,0.25} Total: 66672 W: 3563 L: 3508 D: 59601 Ptnml(0-2): 71, 3064, 26996, 3149, 56 https://tests.stockfishchess.org/tests/view/5f4353285089a564a10d84d0 closes https://github.com/official-stockfish/Stockfish/pull/3067 Bench: 4074430 --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index d6b611a3..cae8a684 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1526,7 +1526,7 @@ moves_loop: // When in check, search starts from here assert(type_of(move) != ENPASSANT); // Due to !pos.advanced_pawn_push // moveCount pruning - if (moveCount > abs(depth) + 2) + if (moveCount > 2) continue; futilityValue = futilityBase + PieceValue[EG][pos.piece_on(to_sq(move))]; From 406979ea12ee7828e079871b0f9f3dc8f127a741 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ste=CC=81phane=20Nicolet?= Date: Sun, 23 Aug 2020 13:43:38 +0200 Subject: [PATCH 22/52] Embed default net, and simplify using non-default nets covers the most important cases from the user perspective: It embeds the default net in the binary, so a download of that binary will result in a working engine with the default net. The engine will be functional in the default mode without any additional user action. It allows non-default nets to be used, which will be looked for in up to three directories (working directory, location of the binary, and optionally a specific default directory). This mechanism is also kept for those developers that use MSVC, the one compiler that doesn't have an easy mechanism for embedding data. It is possible to disable embedding, and instead specify a specific directory, e.g. linux distros might want to use CXXFLAGS="-DNNUE_EMBEDDING_OFF -DDEFAULT_NNUE_DIRECTORY=/usr/share/games/stockfish/" make -j ARCH=x86-64 profile-build passed STC non-regression: https://tests.stockfishchess.org/tests/view/5f4a581c150f0aef5f8ae03a LLR: 2.95 (-2.94,2.94) {-1.25,-0.25} Total: 66928 W: 7202 L: 7147 D: 52579 Ptnml(0-2): 291, 5309, 22211, 5360, 293 closes https://github.com/official-stockfish/Stockfish/pull/3070 fixes https://github.com/official-stockfish/Stockfish/issues/3030 No functional change. --- AUTHORS | 3 +- README.md | 18 +- appveyor.yml | 2 +- src/Makefile | 10 +- src/evaluate.cpp | 103 +++++++++-- src/evaluate.h | 7 +- src/incbin/UNLICENCE | 26 +++ src/incbin/incbin.h | 368 +++++++++++++++++++++++++++++++++++++ src/main.cpp | 1 + src/misc.cpp | 59 ++++++ src/misc.h | 9 +- src/nnue/evaluate_nnue.cpp | 14 +- src/ucioption.cpp | 5 +- 13 files changed, 582 insertions(+), 43 deletions(-) create mode 100644 src/incbin/UNLICENCE create mode 100755 src/incbin/incbin.h diff --git a/AUTHORS b/AUTHORS index c96f870a..c00ab657 100644 --- a/AUTHORS +++ b/AUTHORS @@ -36,10 +36,11 @@ Bryan Cross (crossbr) candirufish Chess13234 Chris Cain (ceebo) +Dale Weiler (graphitemaster) Dan Schmidt (dfannius) Daniel Axtens (daxtens) Daniel Dugovic (ddugovic) -Dariusz Orzechowski +Dariusz Orzechowski (dorzechowski) David Zar Daylen Yang (daylen) DiscanX diff --git a/README.md b/README.md index 2cc88bf4..96a495ae 100644 --- a/README.md +++ b/README.md @@ -12,9 +12,9 @@ about how to use Stockfish with it. The Stockfish engine features two evaluation functions for chess, the classical evaluation based on handcrafted terms, and the NNUE evaluation based on efficiently -updateable neural networks. The classical evaluation runs efficiently on most 64bit -CPU architectures, while the NNUE evaluation benefits strongly from the vector -intrinsics available on modern CPUs (avx2 or similar). +updateable neural networks. The classical evaluation runs efficiently on almost all +CPU architectures, while the NNUE evaluation benefits from the vector +intrinsics available on most CPUs (sse2, avx2, neon, or similar). ## Files @@ -29,10 +29,11 @@ This distribution of Stockfish consists of the following files: that can be used to compile Stockfish on Unix-like systems. * a file with the .nnue extension, storing the neural network for the NNUE - evaluation. + evaluation. Binary distributions will have this file embedded. Note: to use the NNUE evaluation, the additional data file with neural network parameters -needs to be downloaded. The filename for the default net can be found as the default +needs to be available. Normally, this file is already embedded in the binary or it can be downloaded. +The filename for the default (recommended) net can be found as the default value of the `EvalFile` UCI option, with the format `nn-[SHA256 first 12 digits].nnue` (for instance, `nn-c157e0a5755b.nnue`). This file can be downloaded from ``` @@ -61,11 +62,14 @@ Currently, Stockfish has the following UCI options: * #### Use NNUE Toggle between the NNUE and classical evaluation functions. If set to "true", - the network parameters must be available to load from file (see also EvalFile). + the network parameters must be available to load from file (see also EvalFile), + if they are not embedded in the binary. * #### EvalFile The name of the file of the NNUE evaluation parameters. Depending on the GUI the - filename should include the full path to the folder/directory that contains the file. + filename might have to include the full path to the folder/directory that contains the file. + Other locations, such as the directory that contains the binary and the working directory, + are also searched. * #### UCI_AnalyseMode An option handled by your GUI. diff --git a/appveyor.yml b/appveyor.yml index a3732a23..ab608409 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -63,7 +63,7 @@ build_script: - cmake --build . --config %CONFIGURATION% -- /verbosity:minimal - ps: | # Download default NNUE net from fishtest - $nnuenet = Get-Content -Path src\ucioption.cpp | Select-String -CaseSensitive -Pattern "Option" | Select-String -CaseSensitive -Pattern "nn-[a-z0-9]{12}.nnue" + $nnuenet = Get-Content -Path src\evaluate.h | Select-String -CaseSensitive -Pattern "EvalFileDefaultName" | Select-String -CaseSensitive -Pattern "nn-[a-z0-9]{12}.nnue" $dummy = $nnuenet -match "(?nn-[a-z0-9]{12}.nnue)" $nnuenet = $Matches.nnuenet Write-Host "Default net:" $nnuenet diff --git a/src/Makefile b/src/Makefile index 703aa230..5f363f02 100644 --- a/src/Makefile +++ b/src/Makefile @@ -614,10 +614,12 @@ ifeq ($(debug), no) # So, only enable it for a cross from Linux by default. else ifeq ($(comp),mingw) ifeq ($(KERNEL),Linux) + ifneq ($(arch),i386) CXXFLAGS += -flto LDFLAGS += $(CXXFLAGS) -flto=jobserver endif endif + endif endif endif @@ -705,7 +707,7 @@ endif config-sanity icc-profile-use icc-profile-make gcc-profile-use gcc-profile-make \ clang-profile-use clang-profile-make -build: config-sanity +build: config-sanity net $(MAKE) ARCH=$(ARCH) COMP=$(COMP) all profile-build: net config-sanity objclean profileclean @@ -731,12 +733,13 @@ install: -cp $(EXE) $(BINDIR) -strip $(BINDIR)/$(EXE) -#clean all +# clean all clean: objclean profileclean @rm -f .depend *~ core +# evaluation network (nnue) net: - $(eval nnuenet := $(shell grep EvalFile ucioption.cpp | grep Option | sed 's/.*\(nn-[a-z0-9]\{12\}.nnue\).*/\1/')) + $(eval nnuenet := $(shell grep EvalFileDefaultName evaluate.h | grep define | sed 's/.*\(nn-[a-z0-9]\{12\}.nnue\).*/\1/')) @echo "Default net: $(nnuenet)" $(eval nnuedownloadurl := https://tests.stockfishchess.org/api/nn/$(nnuenet)) $(eval curl_or_wget := $(shell if hash curl 2>/dev/null; then echo "curl -skL"; elif hash wget 2>/dev/null; then echo "wget -qO-"; fi)) @@ -758,7 +761,6 @@ net: echo "shasum / sha256sum not found, skipping net validation"; \ fi - # clean binaries and objects objclean: @rm -f $(EXE) *.o ./syzygy/*.o ./nnue/*.o ./nnue/features/*.o diff --git a/src/evaluate.cpp b/src/evaluate.cpp index ce92db9a..67154751 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -20,51 +20,126 @@ #include #include #include // For std::memset +#include #include #include #include +#include +#include #include "bitboard.h" #include "evaluate.h" #include "material.h" +#include "misc.h" #include "pawns.h" #include "thread.h" #include "uci.h" +#include "incbin/incbin.h" + + +// Macro to embed the default NNUE file data in the engine binary (using incbin.h, by Dale Weiler). +// This macro invocation will declare the following three variables +// const unsigned char gEmbeddedNNUEData[]; // a pointer to the embedded data +// const unsigned char *const gEmbeddedNNUEEnd; // a marker to the end +// const unsigned int gEmbeddedNNUESize; // the size of the embedded file +// Note that this does not work in Microsof Visual Studio. +#if !defined(_MSC_VER) && !defined(NNUE_EMBEDDING_OFF) + INCBIN(EmbeddedNNUE, EvalFileDefaultName); +#else + const unsigned char gEmbeddedNNUEData[1] = {0x0}; + const unsigned char *const gEmbeddedNNUEEnd = &gEmbeddedNNUEData[1]; + const unsigned int gEmbeddedNNUESize = 1; +#endif + + +using namespace std; +using namespace Eval::NNUE; namespace Eval { bool useNNUE; - std::string eval_file_loaded="None"; + string eval_file_loaded = "None"; + + /// init_NNUE() tries to load a nnue network at startup time, or when the engine + /// receives a UCI command "setoption name EvalFile value nn-[a-z0-9]{12}.nnue" + /// The name of the nnue network is always retrieved from the EvalFile option. + /// We search the given network in three locations: internally (the default + /// network may be embedded in the binary), in the active working directory and + /// in the engine directory. Distro packagers may define the DEFAULT_NNUE_DIRECTORY + /// variable to have the engine search in a special directory in their distro. void init_NNUE() { useNNUE = Options["Use NNUE"]; - std::string eval_file = std::string(Options["EvalFile"]); - if (useNNUE && eval_file_loaded != eval_file) - if (Eval::NNUE::load_eval_file(eval_file)) - eval_file_loaded = eval_file; + if (!useNNUE) + return; + + string eval_file = string(Options["EvalFile"]); + + #if defined(DEFAULT_NNUE_DIRECTORY) + #define stringify2(x) #x + #define stringify(x) stringify2(x) + vector dirs = { "" , "" , CommandLine::binaryDirectory , stringify(DEFAULT_NNUE_DIRECTORY) }; + #else + vector dirs = { "" , "" , CommandLine::binaryDirectory }; + #endif + + for (string directory : dirs) + if (eval_file_loaded != eval_file) + { + if (directory != "") + { + ifstream stream(directory + eval_file, ios::binary); + if (load_eval(eval_file, stream)) + eval_file_loaded = eval_file; + } + + if (directory == "" && eval_file == EvalFileDefaultName) + { + // C++ way to prepare a buffer for a memory stream + class MemoryBuffer : public basic_streambuf { + public: MemoryBuffer(char* p, size_t n) { setg(p, p, p + n); setp(p, p + n); } + }; + + MemoryBuffer buffer(const_cast(reinterpret_cast(gEmbeddedNNUEData)), + size_t(gEmbeddedNNUESize)); + + istream stream(&buffer); + if (load_eval(eval_file, stream)) + eval_file_loaded = eval_file; + } + } } + /// verify_NNUE() verifies that the last net used was loaded successfully void verify_NNUE() { - std::string eval_file = std::string(Options["EvalFile"]); + string eval_file = string(Options["EvalFile"]); + if (useNNUE && eval_file_loaded != eval_file) { UCI::OptionsMap defaults; UCI::init(defaults); - sync_cout << "info string ERROR: NNUE evaluation used, but the network file " << eval_file << " was not loaded successfully." << sync_endl; - sync_cout << "info string ERROR: The UCI option EvalFile might need to specify the full path, including the directory/folder name, to the file." << sync_endl; - sync_cout << "info string ERROR: The default net can be downloaded from: https://tests.stockfishchess.org/api/nn/"+std::string(defaults["EvalFile"]) << sync_endl; - sync_cout << "info string ERROR: If the UCI option Use NNUE is set to true, network evaluation parameters compatible with the program must be available." << sync_endl; - sync_cout << "info string ERROR: The engine will be terminated now." << sync_endl; - std::exit(EXIT_FAILURE); + string msg1 = "If the UCI option \"Use NNUE\" is set to true, network evaluation parameters compatible with the engine must be available."; + string msg2 = "The option is set to true, but the network file " + eval_file + " was not loaded successfully."; + string msg3 = "The UCI option EvalFile might need to specify the full path, including the directory name, to the network file."; + string msg4 = "The default net can be downloaded from: https://tests.stockfishchess.org/api/nn/" + string(defaults["EvalFile"]); + string msg5 = "The engine will be terminated now."; + + sync_cout << "info string ERROR: " << msg1 << sync_endl; + sync_cout << "info string ERROR: " << msg2 << sync_endl; + sync_cout << "info string ERROR: " << msg3 << sync_endl; + sync_cout << "info string ERROR: " << msg4 << sync_endl; + sync_cout << "info string ERROR: " << msg5 << sync_endl; + + exit(EXIT_FAILURE); } if (useNNUE) - sync_cout << "info string NNUE evaluation using " << eval_file << " enabled." << sync_endl; + sync_cout << "info string NNUE evaluation using " << eval_file << " enabled" << sync_endl; else - sync_cout << "info string classical evaluation enabled." << sync_endl; + sync_cout << "info string classical evaluation enabled" << sync_endl; } } diff --git a/src/evaluate.h b/src/evaluate.h index e808068d..d701f5a7 100644 --- a/src/evaluate.h +++ b/src/evaluate.h @@ -35,12 +35,17 @@ namespace Eval { void init_NNUE(); void verify_NNUE(); + // The default net name MUST follow the format nn-[SHA256 first 12 digits].nnue + // for the build process (profile-build and fishtest) to work. Do not change the + // name of the macro, as it is used in the Makefile. + #define EvalFileDefaultName "nn-82215d0fd0df.nnue" + namespace NNUE { Value evaluate(const Position& pos); Value compute_eval(const Position& pos); void update_eval(const Position& pos); - bool load_eval_file(const std::string& evalFile); + bool load_eval(std::string streamName, std::istream& stream); } // namespace NNUE diff --git a/src/incbin/UNLICENCE b/src/incbin/UNLICENCE new file mode 100644 index 00000000..32484ab5 --- /dev/null +++ b/src/incbin/UNLICENCE @@ -0,0 +1,26 @@ +The file "incbin.h" is free and unencumbered software released into +the public domain by Dale Weiler, see: + + +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. + +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to diff --git a/src/incbin/incbin.h b/src/incbin/incbin.h new file mode 100755 index 00000000..c19684d7 --- /dev/null +++ b/src/incbin/incbin.h @@ -0,0 +1,368 @@ +/** + * @file incbin.h + * @author Dale Weiler + * @brief Utility for including binary files + * + * Facilities for including binary files into the current translation unit and + * making use from them externally in other translation units. + */ +#ifndef INCBIN_HDR +#define INCBIN_HDR +#include +#if defined(__AVX512BW__) || \ + defined(__AVX512CD__) || \ + defined(__AVX512DQ__) || \ + defined(__AVX512ER__) || \ + defined(__AVX512PF__) || \ + defined(__AVX512VL__) || \ + defined(__AVX512F__) +# define INCBIN_ALIGNMENT_INDEX 6 +#elif defined(__AVX__) || \ + defined(__AVX2__) +# define INCBIN_ALIGNMENT_INDEX 5 +#elif defined(__SSE__) || \ + defined(__SSE2__) || \ + defined(__SSE3__) || \ + defined(__SSSE3__) || \ + defined(__SSE4_1__) || \ + defined(__SSE4_2__) || \ + defined(__neon__) +# define INCBIN_ALIGNMENT_INDEX 4 +#elif ULONG_MAX != 0xffffffffu +# define INCBIN_ALIGNMENT_INDEX 3 +# else +# define INCBIN_ALIGNMENT_INDEX 2 +#endif + +/* Lookup table of (1 << n) where `n' is `INCBIN_ALIGNMENT_INDEX' */ +#define INCBIN_ALIGN_SHIFT_0 1 +#define INCBIN_ALIGN_SHIFT_1 2 +#define INCBIN_ALIGN_SHIFT_2 4 +#define INCBIN_ALIGN_SHIFT_3 8 +#define INCBIN_ALIGN_SHIFT_4 16 +#define INCBIN_ALIGN_SHIFT_5 32 +#define INCBIN_ALIGN_SHIFT_6 64 + +/* Actual alignment value */ +#define INCBIN_ALIGNMENT \ + INCBIN_CONCATENATE( \ + INCBIN_CONCATENATE(INCBIN_ALIGN_SHIFT, _), \ + INCBIN_ALIGNMENT_INDEX) + +/* Stringize */ +#define INCBIN_STR(X) \ + #X +#define INCBIN_STRINGIZE(X) \ + INCBIN_STR(X) +/* Concatenate */ +#define INCBIN_CAT(X, Y) \ + X ## Y +#define INCBIN_CONCATENATE(X, Y) \ + INCBIN_CAT(X, Y) +/* Deferred macro expansion */ +#define INCBIN_EVAL(X) \ + X +#define INCBIN_INVOKE(N, ...) \ + INCBIN_EVAL(N(__VA_ARGS__)) + +/* Green Hills uses a different directive for including binary data */ +#if defined(__ghs__) +# if (__ghs_asm == 2) +# define INCBIN_MACRO ".file" +/* Or consider the ".myrawdata" entry in the ld file */ +# else +# define INCBIN_MACRO "\tINCBIN" +# endif +#else +# define INCBIN_MACRO ".incbin" +#endif + +#ifndef _MSC_VER +# define INCBIN_ALIGN \ + __attribute__((aligned(INCBIN_ALIGNMENT))) +#else +# define INCBIN_ALIGN __declspec(align(INCBIN_ALIGNMENT)) +#endif + +#if defined(__arm__) || /* GNU C and RealView */ \ + defined(__arm) || /* Diab */ \ + defined(_ARM) /* ImageCraft */ +# define INCBIN_ARM +#endif + +#ifdef __GNUC__ +/* Utilize .balign where supported */ +# define INCBIN_ALIGN_HOST ".balign " INCBIN_STRINGIZE(INCBIN_ALIGNMENT) "\n" +# define INCBIN_ALIGN_BYTE ".balign 1\n" +#elif defined(INCBIN_ARM) +/* + * On arm assemblers, the alignment value is calculated as (1 << n) where `n' is + * the shift count. This is the value passed to `.align' + */ +# define INCBIN_ALIGN_HOST ".align " INCBIN_STRINGIZE(INCBIN_ALIGNMENT_INDEX) "\n" +# define INCBIN_ALIGN_BYTE ".align 0\n" +#else +/* We assume other inline assembler's treat `.align' as `.balign' */ +# define INCBIN_ALIGN_HOST ".align " INCBIN_STRINGIZE(INCBIN_ALIGNMENT) "\n" +# define INCBIN_ALIGN_BYTE ".align 1\n" +#endif + +/* INCBIN_CONST is used by incbin.c generated files */ +#if defined(__cplusplus) +# define INCBIN_EXTERNAL extern "C" +# define INCBIN_CONST extern const +#else +# define INCBIN_EXTERNAL extern +# define INCBIN_CONST const +#endif + +/** + * @brief Optionally override the linker section into which data is emitted. + * + * @warning If you use this facility, you'll have to deal with platform-specific linker output + * section naming on your own + * + * Overriding the default linker output section, e.g for esp8266/Arduino: + * @code + * #define INCBIN_OUTPUT_SECTION ".irom.text" + * #include "incbin.h" + * INCBIN(Foo, "foo.txt"); + * // Data is emitted into program memory that never gets copied to RAM + * @endcode + */ +#if !defined(INCBIN_OUTPUT_SECTION) +# if defined(__APPLE__) +# define INCBIN_OUTPUT_SECTION ".const_data" +# else +# define INCBIN_OUTPUT_SECTION ".rodata" +# endif +#endif + +#if defined(__APPLE__) +/* The directives are different for Apple branded compilers */ +# define INCBIN_SECTION INCBIN_OUTPUT_SECTION "\n" +# define INCBIN_GLOBAL(NAME) ".globl " INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME "\n" +# define INCBIN_INT ".long " +# define INCBIN_MANGLE "_" +# define INCBIN_BYTE ".byte " +# define INCBIN_TYPE(...) +#else +# define INCBIN_SECTION ".section " INCBIN_OUTPUT_SECTION "\n" +# define INCBIN_GLOBAL(NAME) ".global " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME "\n" +# if defined(__ghs__) +# define INCBIN_INT ".word " +# else +# define INCBIN_INT ".int " +# endif +# if defined(__USER_LABEL_PREFIX__) +# define INCBIN_MANGLE INCBIN_STRINGIZE(__USER_LABEL_PREFIX__) +# else +# define INCBIN_MANGLE "" +# endif +# if defined(INCBIN_ARM) +/* On arm assemblers, `@' is used as a line comment token */ +# define INCBIN_TYPE(NAME) ".type " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME ", %object\n" +# elif defined(__MINGW32__) || defined(__MINGW64__) +/* Mingw doesn't support this directive either */ +# define INCBIN_TYPE(NAME) +# else +/* It's safe to use `@' on other architectures */ +# define INCBIN_TYPE(NAME) ".type " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME ", @object\n" +# endif +# define INCBIN_BYTE ".byte " +#endif + +/* List of style types used for symbol names */ +#define INCBIN_STYLE_CAMEL 0 +#define INCBIN_STYLE_SNAKE 1 + +/** + * @brief Specify the prefix to use for symbol names. + * + * By default this is `g', producing symbols of the form: + * @code + * #include "incbin.h" + * INCBIN(Foo, "foo.txt"); + * + * // Now you have the following symbols: + * // const unsigned char gFooData[]; + * // const unsigned char *const gFooEnd; + * // const unsigned int gFooSize; + * @endcode + * + * If however you specify a prefix before including: e.g: + * @code + * #define INCBIN_PREFIX incbin + * #include "incbin.h" + * INCBIN(Foo, "foo.txt"); + * + * // Now you have the following symbols instead: + * // const unsigned char incbinFooData[]; + * // const unsigned char *const incbinFooEnd; + * // const unsigned int incbinFooSize; + * @endcode + */ +#if !defined(INCBIN_PREFIX) +# define INCBIN_PREFIX g +#endif + +/** + * @brief Specify the style used for symbol names. + * + * Possible options are + * - INCBIN_STYLE_CAMEL "CamelCase" + * - INCBIN_STYLE_SNAKE "snake_case" + * + * Default option is *INCBIN_STYLE_CAMEL* producing symbols of the form: + * @code + * #include "incbin.h" + * INCBIN(Foo, "foo.txt"); + * + * // Now you have the following symbols: + * // const unsigned char FooData[]; + * // const unsigned char *const FooEnd; + * // const unsigned int FooSize; + * @endcode + * + * If however you specify a style before including: e.g: + * @code + * #define INCBIN_STYLE INCBIN_STYLE_SNAKE + * #include "incbin.h" + * INCBIN(foo, "foo.txt"); + * + * // Now you have the following symbols: + * // const unsigned char foo_data[]; + * // const unsigned char *const foo_end; + * // const unsigned int foo_size; + * @endcode + */ +#if !defined(INCBIN_STYLE) +# define INCBIN_STYLE INCBIN_STYLE_CAMEL +#endif + +/* Style lookup tables */ +#define INCBIN_STYLE_0_DATA Data +#define INCBIN_STYLE_0_END End +#define INCBIN_STYLE_0_SIZE Size +#define INCBIN_STYLE_1_DATA _data +#define INCBIN_STYLE_1_END _end +#define INCBIN_STYLE_1_SIZE _size + +/* Style lookup: returning identifier */ +#define INCBIN_STYLE_IDENT(TYPE) \ + INCBIN_CONCATENATE( \ + INCBIN_STYLE_, \ + INCBIN_CONCATENATE( \ + INCBIN_EVAL(INCBIN_STYLE), \ + INCBIN_CONCATENATE(_, TYPE))) + +/* Style lookup: returning string literal */ +#define INCBIN_STYLE_STRING(TYPE) \ + INCBIN_STRINGIZE( \ + INCBIN_STYLE_IDENT(TYPE)) \ + +/* Generate the global labels by indirectly invoking the macro with our style + * type and concatenating the name against them. */ +#define INCBIN_GLOBAL_LABELS(NAME, TYPE) \ + INCBIN_INVOKE( \ + INCBIN_GLOBAL, \ + INCBIN_CONCATENATE( \ + NAME, \ + INCBIN_INVOKE( \ + INCBIN_STYLE_IDENT, \ + TYPE))) \ + INCBIN_INVOKE( \ + INCBIN_TYPE, \ + INCBIN_CONCATENATE( \ + NAME, \ + INCBIN_INVOKE( \ + INCBIN_STYLE_IDENT, \ + TYPE))) + +/** + * @brief Externally reference binary data included in another translation unit. + * + * Produces three external symbols that reference the binary data included in + * another translation unit. + * + * The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with + * "Data", as well as "End" and "Size" after. An example is provided below. + * + * @param NAME The name given for the binary data + * + * @code + * INCBIN_EXTERN(Foo); + * + * // Now you have the following symbols: + * // extern const unsigned char FooData[]; + * // extern const unsigned char *const FooEnd; + * // extern const unsigned int FooSize; + * @endcode + */ +#define INCBIN_EXTERN(NAME) \ + INCBIN_EXTERNAL const INCBIN_ALIGN unsigned char \ + INCBIN_CONCATENATE( \ + INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \ + INCBIN_STYLE_IDENT(DATA))[]; \ + INCBIN_EXTERNAL const INCBIN_ALIGN unsigned char *const \ + INCBIN_CONCATENATE( \ + INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \ + INCBIN_STYLE_IDENT(END)); \ + INCBIN_EXTERNAL const unsigned int \ + INCBIN_CONCATENATE( \ + INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \ + INCBIN_STYLE_IDENT(SIZE)) + +/** + * @brief Include a binary file into the current translation unit. + * + * Includes a binary file into the current translation unit, producing three symbols + * for objects that encode the data and size respectively. + * + * The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with + * "Data", as well as "End" and "Size" after. An example is provided below. + * + * @param NAME The name to associate with this binary data (as an identifier.) + * @param FILENAME The file to include (as a string literal.) + * + * @code + * INCBIN(Icon, "icon.png"); + * + * // Now you have the following symbols: + * // const unsigned char IconData[]; + * // const unsigned char *const IconEnd; + * // const unsigned int IconSize; + * @endcode + * + * @warning This must be used in global scope + * @warning The identifiers may be different if INCBIN_STYLE is not default + * + * To externally reference the data included by this in another translation unit + * please @see INCBIN_EXTERN. + */ +#ifdef _MSC_VER +#define INCBIN(NAME, FILENAME) \ + INCBIN_EXTERN(NAME) +#else +#define INCBIN(NAME, FILENAME) \ + __asm__(INCBIN_SECTION \ + INCBIN_GLOBAL_LABELS(NAME, DATA) \ + INCBIN_ALIGN_HOST \ + INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(DATA) ":\n" \ + INCBIN_MACRO " \"" FILENAME "\"\n" \ + INCBIN_GLOBAL_LABELS(NAME, END) \ + INCBIN_ALIGN_BYTE \ + INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(END) ":\n" \ + INCBIN_BYTE "1\n" \ + INCBIN_GLOBAL_LABELS(NAME, SIZE) \ + INCBIN_ALIGN_HOST \ + INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(SIZE) ":\n" \ + INCBIN_INT INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(END) " - " \ + INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(DATA) "\n" \ + INCBIN_ALIGN_HOST \ + ".text\n" \ + ); \ + INCBIN_EXTERN(NAME) + +#endif +#endif diff --git a/src/main.cpp b/src/main.cpp index fbad6622..f95db1c2 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -35,6 +35,7 @@ int main(int argc, char* argv[]) { std::cout << engine_info() << std::endl; + CommandLine::init(argc, argv); UCI::init(Options); Tune::init(); PSQT::init(); diff --git a/src/misc.cpp b/src/misc.cpp index 80c436ac..3fbdea35 100644 --- a/src/misc.cpp +++ b/src/misc.cpp @@ -132,6 +132,7 @@ public: } // namespace + /// engine_info() returns the full name of the current Stockfish version. This /// will be either "Stockfish DD-MM-YY" (where DD-MM-YY is the date when /// the program was compiled) or "Stockfish ", depending on whether @@ -589,3 +590,61 @@ void bindThisThread(size_t idx) { #endif } // namespace WinProcGroup + +#ifdef _WIN32 +#include +#define GETCWD _getcwd +#else +#include +#define GETCWD getcwd +#endif + +namespace CommandLine { + +string argv0; // path+name of the executable binary, as given by argv[0] +string binaryDirectory; // path of the executable directory +string workingDirectory; // path of the working directory +string pathSeparator; // Separator for our current OS + +void init(int argc, char* argv[]) { + (void)argc; + string separator; + + // extract the path+name of the executable binary + argv0 = argv[0]; + +#ifdef _WIN32 + pathSeparator = "\\"; + #ifdef _MSC_VER + // Under windows argv[0] may not have the extension. Also _get_pgmptr() had + // issues in some windows 10 versions, so check returned values carefully. + char* pgmptr = nullptr; + if (!_get_pgmptr(&pgmptr) && pgmptr != nullptr && *pgmptr) + argv0 = pgmptr; + #endif +#else + pathSeparator = "/"; +#endif + + // extract the working directory + workingDirectory = ""; + char buff[40000]; + char* cwd = GETCWD(buff, 40000); + if (cwd) + workingDirectory = cwd; + + // extract the binary directory path from argv0 + binaryDirectory = argv0; + size_t pos = binaryDirectory.find_last_of("\\/"); + if (pos == std::string::npos) + binaryDirectory = "." + pathSeparator; + else + binaryDirectory.resize(pos + 1); + + // pattern replacement: "./" at the start of path is replaced by the working directory + if (binaryDirectory.find("." + pathSeparator) == 0) + binaryDirectory.replace(0, 1, workingDirectory); +} + + +} // namespace CommandLine diff --git a/src/misc.h b/src/misc.h index 8ad17b50..68b9c884 100644 --- a/src/misc.h +++ b/src/misc.h @@ -42,9 +42,7 @@ void dbg_mean_of(int v); void dbg_print(); typedef std::chrono::milliseconds::rep TimePoint; // A value in milliseconds - static_assert(sizeof(TimePoint) == sizeof(int64_t), "TimePoint should be 64 bits"); - inline TimePoint now() { return std::chrono::duration_cast (std::chrono::steady_clock::now().time_since_epoch()).count(); @@ -126,4 +124,11 @@ namespace WinProcGroup { void bindThisThread(size_t idx); } +namespace CommandLine { + void init(int argc, char* argv[]); + + extern std::string binaryDirectory; // path of the executable directory + extern std::string workingDirectory; // path of the working directory +} + #endif // #ifndef MISC_H_INCLUDED diff --git a/src/nnue/evaluate_nnue.cpp b/src/nnue/evaluate_nnue.cpp index e6619089..d6ac9894 100644 --- a/src/nnue/evaluate_nnue.cpp +++ b/src/nnue/evaluate_nnue.cpp @@ -18,7 +18,6 @@ // Code for calculating NNUE evaluation function -#include #include #include @@ -143,17 +142,12 @@ namespace Eval::NNUE { return accumulator.score; } - // Load the evaluation function file - bool load_eval_file(const std::string& evalFile) { + // Load eval, from a file stream or a memory stream + bool load_eval(std::string streamName, std::istream& stream) { Initialize(); - fileName = evalFile; - - std::ifstream stream(evalFile, std::ios::binary); - - const bool result = ReadParameters(stream); - - return result; + fileName = streamName; + return ReadParameters(stream); } // Evaluation function. Perform differential calculation. diff --git a/src/ucioption.cpp b/src/ucioption.cpp index ec83c7c8..5e747a7f 100644 --- a/src/ucioption.cpp +++ b/src/ucioption.cpp @@ -21,6 +21,7 @@ #include #include +#include "evaluate.h" #include "misc.h" #include "search.h" #include "thread.h" @@ -79,9 +80,7 @@ void init(OptionsMap& o) { o["Syzygy50MoveRule"] << Option(true); o["SyzygyProbeLimit"] << Option(7, 0, 7); o["Use NNUE"] << Option(true, on_use_NNUE); - // The default must follow the format nn-[SHA256 first 12 digits].nnue - // for the build process (profile-build and fishtest) to work. - o["EvalFile"] << Option("nn-82215d0fd0df.nnue", on_eval_file); + o["EvalFile"] << Option(EvalFileDefaultName, on_eval_file); } From e4ed7d3dd7b8895ce523180cb3da3ec2714050fc Mon Sep 17 00:00:00 2001 From: Joost VandeVondele Date: Wed, 26 Aug 2020 18:00:54 +0200 Subject: [PATCH 23/52] Cleaner make help do not print details if ARCH is an empty string. Follow up for b0b4ca17db49ed03057b5fa4ee4a12dab0e9c9e6 https://github.com/official-stockfish/Stockfish/pull/3071 No functional change --- src/Makefile | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/Makefile b/src/Makefile index 5f363f02..9ae5a51c 100644 --- a/src/Makefile +++ b/src/Makefile @@ -85,12 +85,17 @@ endif ### 2.1. General and architecture defaults +ifeq ($(ARCH),) + ARCH = x86-64-modern + help_skip_sanity = yes +endif # explicitly check for the list of supported architectures (as listed with make help), # the user can override with `make ARCH=x86-32-vnni256 SUPPORTED_ARCH=true` -ifeq ($(ARCH),$(filter $(ARCH),x86-64-vnni512 x86-64-vnni256 x86-64-avx512 x86-64-bmi2 x86-64-avx2 \ - x86-64-sse41-popcnt x86-64-modern x86-64-ssse3 x86-64-sse3-popcnt \ - x86-64 x86-32-sse41-popcnt x86-32-sse2 x86-32 ppc-64 ppc-32 \ - armv7 armv7-neon armv8 apple-silicon general-64 general-32)) +ifeq ($(ARCH), $(filter $(ARCH), \ + x86-64-vnni512 x86-64-vnni256 x86-64-avx512 x86-64-bmi2 x86-64-avx2 \ + x86-64-sse41-popcnt x86-64-modern x86-64-ssse3 x86-64-sse3-popcnt \ + x86-64 x86-32-sse41-popcnt x86-32-sse2 x86-32 ppc-64 ppc-32 \ + armv7 armv7-neon armv8 apple-silicon general-64 general-32)) SUPPORTED_ARCH=true else SUPPORTED_ARCH=false @@ -113,7 +118,6 @@ avx512 = no vnni256 = no vnni512 = no neon = no -ARCH = x86-64-modern STRIP = strip ### 2.2 Architecture specific @@ -695,11 +699,12 @@ help: @echo "make -j build ARCH=x86-64-ssse3 COMP=clang" @echo "" @echo "-------------------------------" -ifeq ($(SUPPORTED_ARCH), true) +ifeq ($(SUPPORTED_ARCH)$(help_skip_sanity), true) @echo "The selected architecture $(ARCH) will enable the following configuration: " @$(MAKE) ARCH=$(ARCH) COMP=$(COMP) config-sanity else @echo "Specify a supported architecture with the ARCH option for more details" + @echo "" endif From d90d893b5eeea020b2d59d1372f5aa0a20b45412 Mon Sep 17 00:00:00 2001 From: Unai Corzo Date: Fri, 28 Aug 2020 09:27:15 +0200 Subject: [PATCH 24/52] Reintroduce depth reduction Reintroduce depth reduction if the position is not in TT. STC https://tests.stockfishchess.org/tests/view/5f4652e85089a564a10d868c LLR: 2.97 (-2.94,2.94) {-0.25,1.25} Total: 40240 W: 4535 L: 4331 D: 31374 Ptnml(0-2): 215, 3276, 12969, 3410, 250 LTC https://tests.stockfishchess.org/tests/view/5f46ca5e5089a564a10d86f3 LLR: 2.93 (-2.94,2.94) {0.25,1.25} Total: 63096 W: 3426 L: 3188 D: 56482 Ptnml(0-2): 51, 2798, 25645, 2970, 84 closes https://github.com/official-stockfish/Stockfish/pull/3072 bench: 3611906 --- src/search.cpp | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index cae8a684..77447043 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -939,6 +939,12 @@ namespace { } } } + + // Step 11. If the position is not in TT, decrease depth by 2 + if ( PvNode + && depth >= 6 + && !ttMove) + depth -= 2; moves_loop: // When in check, search starts from here @@ -963,7 +969,7 @@ moves_loop: // When in check, search starts from here // Mark this node as being searched ThreadHolding th(thisThread, posKey, ss->ply); - // Step 11. Loop through all pseudo-legal moves until no moves remain + // Step 12. Loop through all pseudo-legal moves until no moves remain // or a beta cutoff occurs. while ((move = mp.next_move(moveCountPruning)) != MOVE_NONE) { @@ -1001,7 +1007,7 @@ moves_loop: // When in check, search starts from here // Calculate new depth for this move newDepth = depth - 1; - // Step 12. Pruning at shallow depth (~200 Elo) + // Step 13. Pruning at shallow depth (~200 Elo) if ( !rootNode && pos.non_pawn_material(us) && bestValue > VALUE_TB_LOSS_IN_MAX_PLY) @@ -1059,7 +1065,7 @@ moves_loop: // When in check, search starts from here } } - // Step 13. Extensions (~75 Elo) + // Step 14. Extensions (~75 Elo) // Singular extension search (~70 Elo). If all moves but one fail low on a // search of (alpha-s, beta-s), and just one fails high on (alpha, beta), @@ -1142,10 +1148,10 @@ moves_loop: // When in check, search starts from here [movedPiece] [to_sq(move)]; - // Step 14. Make the move + // Step 15. Make the move pos.do_move(move, st, givesCheck); - // Step 15. Reduced depth search (LMR, ~200 Elo). If the move fails high it will be + // Step 16. Reduced depth search (LMR, ~200 Elo). If the move fails high it will be // re-searched at full depth. if ( depth >= 3 && moveCount > 1 + 2 * rootNode + 2 * (PvNode && abs(bestValue) < 2) @@ -1248,7 +1254,7 @@ moves_loop: // When in check, search starts from here didLMR = false; } - // Step 16. Full depth search when LMR is skipped or fails high + // Step 17. Full depth search when LMR is skipped or fails high if (doFullDepthSearch) { value = -search(pos, ss+1, -(alpha+1), -alpha, newDepth, !cutNode); @@ -1276,12 +1282,12 @@ moves_loop: // When in check, search starts from here value = -search(pos, ss+1, -beta, -alpha, newDepth, false); } - // Step 17. Undo move + // Step 18. Undo move pos.undo_move(move); assert(value > -VALUE_INFINITE && value < VALUE_INFINITE); - // Step 18. Check for a new best move + // Step 19. Check for a new best move // Finished searching the move. If a stop occurred, the return value of // the search cannot be trusted, and we return immediately without // updating best move, PV and TT. @@ -1358,7 +1364,7 @@ moves_loop: // When in check, search starts from here return VALUE_DRAW; */ - // Step 19. Check for mate and stalemate + // Step 20. Check for mate and stalemate // All legal moves have been searched and if there are no legal moves, it // must be a mate or a stalemate. If we are in a singular extension search then // return a fail low score. From c02b3a4c7a339d212d5c6f75b3b89c926d33a800 Mon Sep 17 00:00:00 2001 From: MJZ1977 <37274752+MJZ1977@users.noreply.github.com> Date: Fri, 28 Aug 2020 12:06:36 +0200 Subject: [PATCH 25/52] Add / remove leaves from search tree ttPv add if previous leaf is in search tree and we didn't find a counter move else remove the position if the leaf is the last one in search tree. STC : https://tests.stockfishchess.org/tests/view/5f49203c3def640786115314 LLR: 2.95 (-2.94,2.94) {-0.25,1.25} Total: 29968 W: 3381 L: 3195 D: 23392 Ptnml(0-2): 146, 2432, 9671, 2560, 175 LTC : https://tests.stockfishchess.org/tests/view/5f494bea3def640786115336 LLR: 2.96 (-2.94,2.94) {0.25,1.25} Total: 84952 W: 4619 L: 4333 D: 76000 Ptnml(0-2): 86, 3765, 34481, 4065, 79 closes https://github.com/official-stockfish/Stockfish/pull/3075 Bench 3527337 --- src/search.cpp | 34 ++++++++++++++++++++++++---------- src/search.h | 1 + 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 77447043..a2342a3c 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -597,7 +597,7 @@ namespace { Move ttMove, move, excludedMove, bestMove; Depth extension, newDepth; Value bestValue, value, ttValue, eval, maxValue, probCutBeta; - bool ttHit, ttPv, formerPv, givesCheck, improving, didLMR, priorCapture; + bool ttHit, formerPv, givesCheck, improving, didLMR, priorCapture; bool captureOrPromotion, doFullDepthSearch, moveCountPruning, ttCapture, singularQuietLMR; Piece movedPiece; @@ -644,6 +644,7 @@ namespace { assert(0 <= ss->ply && ss->ply < MAX_PLY); (ss+1)->ply = ss->ply + 1; + (ss+1)->ttPv = false; (ss+1)->excludedMove = bestMove = MOVE_NONE; (ss+2)->killers[0] = (ss+2)->killers[1] = MOVE_NONE; Square prevSq = to_sq((ss-1)->currentMove); @@ -667,10 +668,11 @@ namespace { ttValue = ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE; ttMove = rootNode ? thisThread->rootMoves[thisThread->pvIdx].pv[0] : ttHit ? tte->move() : MOVE_NONE; - ttPv = PvNode || (ttHit && tte->is_pv()); - formerPv = ttPv && !PvNode; + if (!excludedMove) + ss->ttPv = PvNode || (ttHit && tte->is_pv()); + formerPv = ss->ttPv && !PvNode; - if ( ttPv + if ( ss->ttPv && depth > 12 && ss->ply - 1 < MAX_LPH && !priorCapture @@ -748,7 +750,7 @@ namespace { if ( b == BOUND_EXACT || (b == BOUND_LOWER ? value >= beta : value <= alpha)) { - tte->save(posKey, value_to_tt(value, ss->ply), ttPv, b, + tte->save(posKey, value_to_tt(value, ss->ply), ss->ttPv, b, std::min(MAX_PLY - 1, depth + 6), MOVE_NONE, VALUE_NONE); @@ -798,7 +800,7 @@ namespace { else ss->staticEval = eval = -(ss-1)->staticEval + 2 * Tempo; - tte->save(posKey, VALUE_NONE, ttPv, BOUND_NONE, DEPTH_NONE, MOVE_NONE, eval); + tte->save(posKey, VALUE_NONE, ss->ttPv, BOUND_NONE, DEPTH_NONE, MOVE_NONE, eval); } // Step 7. Razoring (~1 Elo) @@ -824,7 +826,7 @@ namespace { && (ss-1)->statScore < 22977 && eval >= beta && eval >= ss->staticEval - && ss->staticEval >= beta - 30 * depth - 28 * improving + 84 * ttPv + 182 + && ss->staticEval >= beta - 30 * depth - 28 * improving + 84 * ss->ttPv + 182 && !excludedMove && pos.non_pawn_material(us) && (ss->ply >= thisThread->nmpMinPly || us != thisThread->nmpColor)) @@ -898,6 +900,8 @@ namespace { assert(probCutBeta < VALUE_INFINITE); MovePicker mp(pos, ttMove, probCutBeta - ss->staticEval, &captureHistory); int probCutCount = 0; + bool ttPv = ss->ttPv; + ss->ttPv = false; while ( (move = mp.next_move()) != MOVE_NONE && probCutCount < 2 + 2 * cutNode) @@ -938,6 +942,7 @@ namespace { return value; } } + ss->ttPv = ttPv; } // Step 11. If the position is not in TT, decrease depth by 2 @@ -1180,7 +1185,7 @@ moves_loop: // When in check, search starts from here r++; // Decrease reduction if position is or has been on the PV (~10 Elo) - if (ttPv) + if (ss->ttPv) r -= 2; if (moveCountPruning && !formerPv) @@ -1209,7 +1214,7 @@ moves_loop: // When in check, search starts from here // hence break make_move(). (~2 Elo) else if ( type_of(move) == NORMAL && !pos.see_ge(reverse_move(move))) - r -= 2 + ttPv - (type_of(movedPiece) == PAWN); + r -= 2 + ss->ttPv - (type_of(movedPiece) == PAWN); ss->statScore = thisThread->mainHistory[us][from_to(move)] + (*contHist[0])[movedPiece][to_sq(move)] @@ -1387,8 +1392,17 @@ moves_loop: // When in check, search starts from here if (PvNode) bestValue = std::min(bestValue, maxValue); + // If no good move is found and the previous position was ttPv, then the previous + // opponent move is probably good and the new position is added to the search tree. + if (bestValue <= alpha) + ss->ttPv = ss->ttPv || ((ss-1)->ttPv && depth > 3); + // Otherwise, a counter move has been found and if the position is the last leaf + // in the search tree, remove the position from the search tree. + else if (depth > 3) + ss->ttPv = ss->ttPv && (ss+1)->ttPv; + if (!excludedMove && !(rootNode && thisThread->pvIdx)) - tte->save(posKey, value_to_tt(bestValue, ss->ply), ttPv, + tte->save(posKey, value_to_tt(bestValue, ss->ply), ss->ttPv, bestValue >= beta ? BOUND_LOWER : PvNode && bestMove ? BOUND_EXACT : BOUND_UPPER, depth, bestMove, ss->staticEval); diff --git a/src/search.h b/src/search.h index 2554f3fb..79085189 100644 --- a/src/search.h +++ b/src/search.h @@ -48,6 +48,7 @@ struct Stack { int statScore; int moveCount; bool inCheck; + bool ttPv; }; From 9b5b9ec9a6a3a0e46ac00f58976887560948a7e2 Mon Sep 17 00:00:00 2001 From: VoyagerOne Date: Sat, 29 Aug 2020 21:13:05 -0400 Subject: [PATCH 26/52] QS Pruning Simplification Remove depth dependence in QS pruning STC: LLR: 2.95 (-2.94,2.94) {-1.25,0.25} Total: 40536 W: 4442 L: 4358 D: 31736 Ptnml(0-2): 209, 3330, 13118, 3390, 221 https://tests.stockfishchess.org/tests/view/5f49035b3def6407861152f9 LTC: LLR: 2.95 (-2.94,2.94) {-0.75,0.25} Total: 97104 W: 5164 L: 5130 D: 86810 Ptnml(0-2): 103, 4478, 39377, 4470, 124 https://tests.stockfishchess.org/tests/view/5f4939d53def640786115322 closes https://github.com/official-stockfish/Stockfish/pull/3077 Bench: 3865238 --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index a2342a3c..b319dff5 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1585,7 +1585,7 @@ moves_loop: // When in check, search starts from here [to_sq(move)]; if ( !captureOrPromotion - && moveCount >= abs(depth) + 1 + && moveCount && (*contHist[0])[pos.moved_piece(move)][to_sq(move)] < CounterMovePruneThreshold && (*contHist[1])[pos.moved_piece(move)][to_sq(move)] < CounterMovePruneThreshold) continue; From e0bafa1911ede61b9268e0b461a5d8856d6cd6be Mon Sep 17 00:00:00 2001 From: FauziAkram Date: Sun, 30 Aug 2020 13:58:05 +0300 Subject: [PATCH 27/52] Update parametes in classical evaluation. Passed STC (NNUE=False): https://tests.stockfishchess.org/tests/view/5f42edfe5089a564a10d84a0 LLR: 2.96 (-2.94,2.94) {-0.25,1.25} Total: 13840 W: 2591 L: 2336 D: 8913 Ptnml(0-2): 194, 1453, 3387, 1676, 210 Passed LTC (NNUE=False): https://tests.stockfishchess.org/tests/view/5f4369795089a564a10d84d8 LLR: 2.95 (-2.94,2.94) {0.25,1.25} Total: 159744 W: 19430 L: 18850 D: 121464 Ptnml(0-2): 960, 14185, 49030, 14709, 988 closes https://github.com/official-stockfish/Stockfish/pull/3080 bench: 3736029 --- src/evaluate.cpp | 10 +++++----- src/search.cpp | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index 67154751..09f36513 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -227,26 +227,26 @@ namespace { // Outpost[knight/bishop] contains bonuses for each knight or bishop occupying a // pawn protected square on rank 4 to 6 which is also safe from a pawn attack. - constexpr Score Outpost[] = { S(56, 36), S(30, 23) }; + constexpr Score Outpost[] = { S(56, 34), S(31, 23) }; // PassedRank[Rank] contains a bonus according to the rank of a passed pawn constexpr Score PassedRank[RANK_NB] = { - S(0, 0), S(10, 28), S(17, 33), S(15, 41), S(62, 72), S(168, 177), S(276, 260) + S(0, 0), S(9, 28), S(15, 31), S(17, 39), S(64, 70), S(171, 177), S(277, 260) }; // RookOnFile[semiopen/open] contains bonuses for each rook when there is // no (friendly) pawn on the rook file. - constexpr Score RookOnFile[] = { S(19, 7), S(48, 29) }; + constexpr Score RookOnFile[] = { S(19, 7), S(48, 27) }; // ThreatByMinor/ByRook[attacked PieceType] contains bonuses according to // which piece type attacks which one. Attacks on lesser pieces which are // pawn-defended are not considered. constexpr Score ThreatByMinor[PIECE_TYPE_NB] = { - S(0, 0), S(5, 32), S(57, 41), S(77, 56), S(88, 119), S(79, 161) + S(0, 0), S(5, 32), S(55, 41), S(77, 56), S(89, 119), S(79, 162) }; constexpr Score ThreatByRook[PIECE_TYPE_NB] = { - S(0, 0), S(3, 46), S(37, 68), S(42, 60), S(0, 38), S(58, 41) + S(0, 0), S(3, 44), S(37, 68), S(42, 60), S(0, 39), S(58, 43) }; // Assorted bonuses and penalties diff --git a/src/search.cpp b/src/search.cpp index b319dff5..e6e53e7c 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -944,7 +944,7 @@ namespace { } ss->ttPv = ttPv; } - + // Step 11. If the position is not in TT, decrease depth by 2 if ( PvNode && depth >= 6 From a0afe32d16554ff3b5c74f34ae56400f35759edf Mon Sep 17 00:00:00 2001 From: mstembera Date: Sun, 30 Aug 2020 18:40:49 -0700 Subject: [PATCH 28/52] Use stable sort to make sure bench with TB yields same results everywhere. std::sort() is not stable so different implementations can produce different results: use the stable version instead. Observed for '8/6k1/5r2/8/8/8/1K6/Q7 w - - 0 1' yielding different bench results for gcc and MSVC and 3-4-5 syzygy TB prior to this patch. closes https://github.com/official-stockfish/Stockfish/pull/3083 No functional change. --- src/search.cpp | 2 +- src/syzygy/tbprobe.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index e6e53e7c..c676bd6d 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1964,7 +1964,7 @@ void Tablebases::rank_root_moves(Position& pos, Search::RootMoves& rootMoves) { if (RootInTB) { // Sort moves according to TB rank - std::sort(rootMoves.begin(), rootMoves.end(), + std::stable_sort(rootMoves.begin(), rootMoves.end(), [](const RootMove &a, const RootMove &b) { return a.tbRank > b.tbRank; } ); // Probe during search only if DTZ is not available and we are winning diff --git a/src/syzygy/tbprobe.cpp b/src/syzygy/tbprobe.cpp index 20215b96..3dfe3e3e 100644 --- a/src/syzygy/tbprobe.cpp +++ b/src/syzygy/tbprobe.cpp @@ -758,7 +758,7 @@ Ret do_probe_table(const Position& pos, T* entry, WDLScore wdl, ProbeState* resu if (entry->hasPawns) { idx = LeadPawnIdx[leadPawnsCnt][squares[0]]; - std::sort(squares + 1, squares + leadPawnsCnt, pawns_comp); + std::stable_sort(squares + 1, squares + leadPawnsCnt, pawns_comp); for (int i = 1; i < leadPawnsCnt; ++i) idx += Binomial[i][MapPawns[squares[i]]]; @@ -859,7 +859,7 @@ encode_remaining: while (d->groupLen[++next]) { - std::sort(groupSq, groupSq + d->groupLen[next]); + std::stable_sort(groupSq, groupSq + d->groupLen[next]); uint64_t n = 0; // Map down a square if "comes later" than a square in the previous From a057f170c6920fc4d1abdae619c5259e9d80703c Mon Sep 17 00:00:00 2001 From: mstembera Date: Sun, 30 Aug 2020 20:48:10 -0700 Subject: [PATCH 29/52] Use llvm linker with clang on windows for LTO. other linkers might fail to link during the LTO phase. The linker might have to be installed using `pacman -Syu mingw-w64-x86_64-lld` closes https://github.com/official-stockfish/Stockfish/pull/3084 No functional change. --- src/Makefile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/Makefile b/src/Makefile index 9ae5a51c..340b3008 100644 --- a/src/Makefile +++ b/src/Makefile @@ -595,6 +595,11 @@ ifeq ($(debug), no) LDFLAGS += $(CXXFLAGS) else ifeq ($(comp),clang) CXXFLAGS += -flto=thin + ifneq ($(findstring MINGW,$(KERNEL)),) + CXXFLAGS += -fuse-ld=lld + else ifneq ($(findstring MSYS,$(KERNEL)),) + CXXFLAGS += -fuse-ld=lld + endif LDFLAGS += $(CXXFLAGS) # GCC and CLANG use different methods for parallelizing LTO and CLANG pretends to be From 61381372ec896ae6b0f139555e6e3f816d8aa570 Mon Sep 17 00:00:00 2001 From: Joost VandeVondele Date: Mon, 31 Aug 2020 22:53:20 +0200 Subject: [PATCH 30/52] Always print an info line before a bestmove if very few nodes are being searched before a bestmove is reported, an info line might be missing. fixes https://github.com/official-stockfish/Stockfish/issues/2757 closes https://github.com/official-stockfish/Stockfish/pull/3088 no functional change --- src/search.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index c676bd6d..c15cd753 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1861,12 +1861,15 @@ string UCI::pv(const Position& pos, Depth depth, Value alpha, Value beta) { { bool updated = rootMoves[i].score != -VALUE_INFINITE; - if (depth == 1 && !updated) + if (depth == 1 && !updated && i > 0) continue; - Depth d = updated ? depth : depth - 1; + Depth d = updated ? depth : std::max(1, depth - 1); Value v = updated ? rootMoves[i].score : rootMoves[i].previousScore; + if (v == -VALUE_INFINITE) + v = VALUE_ZERO; + bool tb = TB::RootInTB && abs(v) < VALUE_MATE_IN_MAX_PLY; v = tb ? rootMoves[i].tbScore : v; From a8bbaa17954471cf3fd8d168f1cafe3f2034730e Mon Sep 17 00:00:00 2001 From: VoyagerOne Date: Sun, 30 Aug 2020 13:57:57 -0400 Subject: [PATCH 31/52] LMR Root Node Simplification Simplify LMR at Root node STC: LLR: 2.94 (-2.94,2.94) {-1.25,0.25} Total: 71520 W: 7649 L: 7614 D: 56257 Ptnml(0-2): 346, 5845, 23349, 5868, 352 https://tests.stockfishchess.org/tests/view/5f4be8c0ba100690c5cc5cbb LTC: LLR: 2.95 (-2.94,2.94) {-0.75,0.25} Total: 74832 W: 3997 L: 3948 D: 66887 Ptnml(0-2): 77, 3422, 30362, 3485, 70 https://tests.stockfishchess.org/tests/view/5f4c603eba100690c5cc5d0e closes https://github.com/official-stockfish/Stockfish/pull/3091 Bench: 3624569 --- src/search.cpp | 1 - src/thread.cpp | 11 ----------- src/thread.h | 1 - 3 files changed, 13 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index c15cd753..b79fa6be 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1160,7 +1160,6 @@ moves_loop: // When in check, search starts from here // re-searched at full depth. if ( depth >= 3 && moveCount > 1 + 2 * rootNode + 2 * (PvNode && abs(bestValue) < 2) - && (!rootNode || thisThread->best_move_count(move) == 0) && ( !captureOrPromotion || moveCountPruning || ss->staticEval + PieceValue[EG][pos.captured_piece()] <= alpha diff --git a/src/thread.cpp b/src/thread.cpp index 1aa66a81..b46fce5e 100644 --- a/src/thread.cpp +++ b/src/thread.cpp @@ -51,17 +51,6 @@ Thread::~Thread() { } -/// Thread::bestMoveCount(Move move) return best move counter for the given root move - -int Thread::best_move_count(Move move) const { - - auto rm = std::find(rootMoves.begin() + pvIdx, - rootMoves.begin() + pvLast, move); - - return rm != rootMoves.begin() + pvLast ? rm->bestMoveCount : 0; -} - - /// Thread::clear() reset histories, usually before a new game void Thread::clear() { diff --git a/src/thread.h b/src/thread.h index 042bc2e9..34b99015 100644 --- a/src/thread.h +++ b/src/thread.h @@ -54,7 +54,6 @@ public: void idle_loop(); void start_searching(); void wait_for_search_finished(); - int best_move_count(Move move) const; Pawns::Table pawnsTable; Material::Table materialTable; From be87517734e2a1b222d1a35e98764382b4176732 Mon Sep 17 00:00:00 2001 From: Joost VandeVondele Date: Tue, 1 Sep 2020 12:22:47 +0200 Subject: [PATCH 32/52] Only use MADV_RANDOM if defined needed to compile on Haiku. fixes https://github.com/official-stockfish/Stockfish/issues/3093 closes https://github.com/official-stockfish/Stockfish/pull/3094 No functional change --- src/syzygy/tbprobe.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/syzygy/tbprobe.cpp b/src/syzygy/tbprobe.cpp index 3dfe3e3e..4d682f1a 100644 --- a/src/syzygy/tbprobe.cpp +++ b/src/syzygy/tbprobe.cpp @@ -223,7 +223,9 @@ public: *mapping = statbuf.st_size; *baseAddress = mmap(nullptr, statbuf.st_size, PROT_READ, MAP_SHARED, fd, 0); +#if defined(MADV_RANDOM) madvise(*baseAddress, statbuf.st_size, MADV_RANDOM); +#endif ::close(fd); if (*baseAddress == MAP_FAILED) From aa2de712302a2379d8aa26127d86455ad276f512 Mon Sep 17 00:00:00 2001 From: Joost VandeVondele Date: Wed, 2 Sep 2020 08:05:08 +0200 Subject: [PATCH 33/52] Update CPU contributors list with fishtest data of Sept. 2 2020 closes https://github.com/official-stockfish/Stockfish/pull/3095 No functional change --- Top CPU Contributors.txt | 319 +++++++++++++++++++++------------------ 1 file changed, 169 insertions(+), 150 deletions(-) diff --git a/Top CPU Contributors.txt b/Top CPU Contributors.txt index 0ea5ac72..482e9000 100644 --- a/Top CPU Contributors.txt +++ b/Top CPU Contributors.txt @@ -1,154 +1,173 @@ -Contributors with >10,000 CPU hours as of January 7, 2020 +Contributors with >10,000 CPU hours as of Sept 2, 2020 Thank you! Username CPU Hours Games played -------------------------------------------------- -noobpwnftw 9305707 695548021 -mlang 780050 61648867 -dew 621626 43921547 -mibere 524702 42238645 -crunchy 354587 27344275 -cw 354495 27274181 -fastgm 332801 22804359 -JojoM 295750 20437451 -CSU_Dynasty 262015 21828122 -Fisherman 232181 18939229 -ctoks 218866 17622052 -glinscott 201989 13780820 -tvijlbrief 201204 15337115 -velislav 188630 14348485 -gvreuls 187164 15149976 -bking_US 180289 11876016 -nordlandia 172076 13467830 -leszek 157152 11443978 -Thanar 148021 12365359 -spams 141975 10319326 -drabel 138073 11121749 -vdv 137850 9394330 -mgrabiak 133578 10454324 -TueRens 132485 10878471 -bcross 129683 11557084 -marrco 126078 9356740 -sqrt2 125830 9724586 -robal 122873 9593418 -vdbergh 120766 8926915 -malala 115926 8002293 -CoffeeOne 114241 5004100 -dsmith 113189 7570238 -BrunoBanani 104644 7436849 -Data 92328 8220352 -mhoram 89333 6695109 -davar 87924 7009424 -xoto 81094 6869316 -ElbertoOne 80899 7023771 -grandphish2 78067 6160199 -brabos 77212 6186135 -psk 75733 5984901 -BRAVONE 73875 5054681 -sunu 70771 5597972 -sterni1971 70605 5590573 -MaZePallas 66886 5188978 -Vizvezdenec 63708 4967313 -nssy 63462 5259388 -jromang 61634 4940891 -teddybaer 61231 5407666 -Pking_cda 60099 5293873 -solarlight 57469 5028306 -dv8silencer 56913 3883992 -tinker 54936 4086118 -renouve 49732 3501516 -Freja 49543 3733019 -robnjr 46972 4053117 -rap 46563 3219146 -Bobo1239 46036 3817196 -ttruscott 45304 3649765 -racerschmacer 44881 3975413 -finfish 44764 3370515 -eva42 41783 3599691 -biffhero 40263 3111352 -bigpen0r 39817 3291647 -mhunt 38871 2691355 -ronaldjerum 38820 3240695 -Antihistamine 38785 2761312 -pb00067 38038 3086320 -speedycpu 37591 3003273 -rkl 37207 3289580 -VoyagerOne 37050 3441673 -jbwiebe 35320 2805433 -cuistot 34191 2146279 -homyur 33927 2850481 -manap 32873 2327384 -gri 32538 2515779 -oryx 31267 2899051 -EthanOConnor 30959 2090311 -SC 30832 2730764 -csnodgrass 29505 2688994 -jmdana 29458 2205261 -strelock 28219 2067805 -jkiiski 27832 1904470 -Pyafue 27533 1902349 -Garf 27515 2747562 -eastorwest 27421 2317535 -slakovv 26903 2021889 -Prcuvu 24835 2170122 -anst 24714 2190091 -hyperbolic.tom 24319 2017394 -Patrick_G 23687 1801617 -Sharaf_DG 22896 1786697 -nabildanial 22195 1519409 -chriswk 21931 1868317 -achambord 21665 1767323 -Zirie 20887 1472937 -team-oh 20217 1636708 -Isidor 20096 1680691 -ncfish1 19931 1520927 -nesoneg 19875 1463031 -Spprtr 19853 1548165 -JanErik 19849 1703875 -agg177 19478 1395014 -SFTUser 19231 1567999 -xor12 19017 1680165 -sg4032 18431 1641865 -rstoesser 18118 1293588 -MazeOfGalious 17917 1629593 -j3corre 17743 941444 -cisco2015 17725 1690126 -ianh2105 17706 1632562 -dex 17678 1467203 -jundery 17194 1115855 -iisiraider 17019 1101015 -horst.prack 17012 1465656 -Adrian.Schmidt123 16563 1281436 -purplefishies 16342 1092533 -wei 16274 1745989 -ville 16144 1384026 -eudhan 15712 1283717 -OuaisBla 15581 972000 -DragonLord 15559 1162790 -dju 14716 875569 -chris 14479 1487385 -0xB00B1ES 14079 1001120 -OssumOpossum 13776 1007129 -enedene 13460 905279 -bpfliegel 13346 884523 -Ente 13198 1156722 -IgorLeMasson 13087 1147232 -jpulman 13000 870599 -ako027ako 12775 1173203 -Nikolay.IT 12352 1068349 -Andrew Grant 12327 895539 -joster 12008 950160 -AdrianSA 11996 804972 -Nesa92 11455 1111993 -fatmurphy 11345 853210 -Dark_wizzie 11108 1007152 -modolief 10869 896470 -mschmidt 10757 803401 -infinity 10594 727027 -mabichito 10524 749391 -Thomas A. Anderson 10474 732094 -thijsk 10431 719357 -Flopzee 10339 894821 -crocogoat 10104 1013854 -SapphireBrand 10104 969604 -stocky 10017 699440 +noobpwnftw 19352969 1231459677 +mlang 957168 61657446 +dew 949885 56893432 +mibere 703817 46865007 +crunchy 427035 27344275 +cw 416006 27521077 +JojoM 415904 24479564 +fastgm 404873 23953472 +CSU_Dynasty 335774 22850550 +tvijlbrief 335199 21871270 +Fisherman 325053 21786603 +gvreuls 311480 20751516 +ctoks 275877 18710423 +velislav 241267 15596372 +glinscott 217799 13780820 +nordlandia 211692 13484886 +bcross 206213 14934233 +bking_US 198894 11876016 +leszek 189170 11446821 +mgrabiak 183896 11778092 +drabel 181408 12489478 +TueRens 181349 12192000 +Thanar 179852 12365359 +vdv 175171 9881246 +robal 166948 10702862 +spams 157128 10319326 +marrco 149947 9376421 +sqrt2 147963 9724586 +vdbergh 137041 8926915 +CoffeeOne 136294 5004100 +malala 136182 8002293 +mhoram 128934 8177193 +davar 122092 7960001 +dsmith 122059 7570238 +xoto 119696 8222144 +grandphish2 116481 7582197 +Data 113305 8220352 +BrunoBanani 112960 7436849 +ElbertoOne 99028 7023771 +MaZePallas 98571 6362619 +brabos 92118 6186135 +psk 89957 5984901 +sunu 88463 6007033 +sterni1971 86948 5613788 +Vizvezdenec 83752 5343724 +BRAVONE 81239 5054681 +nssy 76497 5259388 +teddybaer 75125 5407666 +Pking_cda 73776 5293873 +jromang 70695 4940891 +solarlight 70517 5028306 +dv8silencer 70287 3883992 +Bobo1239 68515 4652287 +racerschmacer 67468 4935996 +manap 66273 4121774 +tinker 63458 4213726 +linrock 59082 4516053 +robnjr 57262 4053117 +Freja 56938 3733019 +ttruscott 56005 3679485 +renouve 53811 3501516 +cuistot 52532 3014920 +finfish 51360 3370515 +eva42 51272 3599691 +rkl 50759 3840947 +rap 49985 3219146 +pb00067 49727 3298270 +ronaldjerum 47654 3240695 +bigpen0r 47278 3291647 +biffhero 46564 3111352 +VoyagerOne 45386 3445881 +speedycpu 43842 3003273 +jbwiebe 43305 2805433 +Antihistamine 41788 2761312 +mhunt 41735 2691355 +eastorwest 40387 2812173 +homyur 39893 2850481 +gri 39871 2515779 +oryx 38228 2941656 +0x3C33 37773 2529097 +SC 37290 2731014 +csnodgrass 36207 2688994 +jmdana 36108 2205261 +strelock 34716 2074055 +Garf 33800 2747562 +EthanOConnor 33370 2090311 +slakovv 32915 2021889 +Spprtr 32591 2139601 +Prcuvu 30377 2170122 +anst 30301 2190091 +jkiiski 30136 1904470 +hyperbolic.tom 29840 2017394 +Pyafue 29650 1902349 +OuaisBla 27629 1578000 +chriswk 26902 1868317 +achambord 26582 1767323 +Patrick_G 26276 1801617 +yorkman 26193 1992080 +SFTUser 25182 1675689 +nabildanial 24942 1519409 +Sharaf_DG 24765 1786697 +ncfish1 24411 1520927 +agg177 23890 1395014 +JanErik 23408 1703875 +Isidor 23388 1680691 +Norabor 22976 1587862 +cisco2015 22880 1759669 +Zirie 22542 1472937 +team-oh 22272 1636708 +MazeOfGalious 21978 1629593 +sg4032 21945 1643065 +ianh2105 21725 1632562 +xor12 21628 1680365 +dex 21612 1467203 +nesoneg 21494 1463031 +horst.prack 20878 1465656 +0xB00B1ES 20590 1208666 +j3corre 20405 941444 +Adrian.Schmidt123 20316 1281436 +wei 19973 1745989 +rstoesser 19569 1293588 +eudhan 19274 1283717 +Ente 19070 1373058 +jundery 18445 1115855 +iisiraider 18247 1101015 +ville 17883 1384026 +chris 17698 1487385 +purplefishies 17595 1092533 +DragonLord 17014 1162790 +dju 16515 929427 +IgorLeMasson 16064 1147232 +ako027ako 15671 1173203 +Nikolay.IT 15154 1068349 +Andrew Grant 15114 895539 +yurikvelo 15027 1165616 +OssumOpossum 14857 1007129 +enedene 14476 905279 +bpfliegel 14298 884523 +jpulman 13982 870599 +joster 13794 950160 +Nesa92 13786 1114691 +Dark_wizzie 13422 1007152 +Hjax 13350 900887 +Fifis 13313 965473 +mabichito 12903 749391 +thijsk 12886 722107 +crocogoat 12876 1048802 +AdrianSA 12860 804972 +Flopzee 12698 894821 +fatmurphy 12547 853210 +SapphireBrand 12416 969604 +modolief 12386 896470 +scuzzi 12362 833465 +pgontarz 12151 848794 +stocky 11954 699440 +mschmidt 11941 803401 +infinity 11470 727027 +torbjo 11387 728873 +Thomas A. Anderson 11372 732094 +snicolet 11106 869170 +amicic 10779 733593 +rpngn 10712 688203 +d64 10680 771144 +basepi 10637 744851 +jjoshua2 10559 670905 +dzjp 10343 732529 +ols 10259 570669 +lbraesch 10252 647825 From c306d838697011da0a960758dde3f7ede6849060 Mon Sep 17 00:00:00 2001 From: Joost VandeVondele Date: Wed, 2 Sep 2020 09:12:04 +0200 Subject: [PATCH 34/52] Stockfish 12 Official release version of Stockfish 12 Bench: 3624569 ----------------------- It is our pleasure to release Stockfish 12 to users world-wide Downloads will be freely available at https://stockfishchess.org/download/ This version 12 of Stockfish plays significantly stronger than any of its predecessors. In a match against Stockfish 11, Stockfish 12 will typically win at least ten times more game pairs than it loses. This jump in strength, visible in regular progression tests during development[1], results from the introduction of an efficiently updatable neural network (NNUE) for the evaluation in Stockfish[2], and associated tuning of the engine as a whole. The concept of the NNUE evaluation was first introduced in shogi, and ported to Stockfish afterward. Stockfish remains a CPU-only engine, since the NNUE networks can be very efficiently evaluated on CPUs. The recommended parameters of the NNUE network are embedded in distributed binaries, and Stockfish will use NNUE by default. Both the NNUE and the classical evaluations are available, and can be used to assign values to positions that are later used in alpha-beta (PVS) search to find the best move. The classical evaluation computes this value as a function of various chess concepts, handcrafted by experts, tested and tuned using fishtest. The NNUE evaluation computes this value with a neural network based on basic inputs. The network is optimized and trained on the evaluations of millions of positions. The Stockfish project builds on a thriving community of enthusiasts that contribute their expertise, time, and resources to build a free and open source chess engine that is robust, widely available, and very strong. We invite chess fans to join the fishtest testing framework and programmers to contribute on github[3]. Stay safe and enjoy chess! The Stockfish team [1] https://github.com/glinscott/fishtest/wiki/Regression-Tests [2] https://github.com/official-stockfish/Stockfish/commit/84f3e867903f62480c33243dd0ecbffd342796fc [3] https://stockfishchess.org/get-involved/ --- src/misc.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/misc.cpp b/src/misc.cpp index 3fbdea35..22070f0e 100644 --- a/src/misc.cpp +++ b/src/misc.cpp @@ -65,7 +65,7 @@ namespace { /// Version number. If Version is left empty, then compile date in the format /// DD-MM-YY and show in engine_info. -const string Version = ""; +const string Version = "12"; /// Our fancy logging facility. The trick here is to replace cin.rdbuf() and /// cout.rdbuf() with two Tie objects that tie cin and cout to a file stream. We From 571c2d6d8daf70de884c493b40cf0279e9b48c61 Mon Sep 17 00:00:00 2001 From: Joost VandeVondele Date: Fri, 4 Sep 2020 07:46:06 +0200 Subject: [PATCH 35/52] Restore development version have fun! No functional change --- src/misc.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/misc.cpp b/src/misc.cpp index 22070f0e..3fbdea35 100644 --- a/src/misc.cpp +++ b/src/misc.cpp @@ -65,7 +65,7 @@ namespace { /// Version number. If Version is left empty, then compile date in the format /// DD-MM-YY and show in engine_info. -const string Version = "12"; +const string Version = ""; /// Our fancy logging facility. The trick here is to replace cin.rdbuf() and /// cout.rdbuf() with two Tie objects that tie cin and cout to a file stream. We From 0e1f734b05ee5c67e9a17ae0e2045a64209dee05 Mon Sep 17 00:00:00 2001 From: Unai Corzo Date: Wed, 2 Sep 2020 16:45:49 +0200 Subject: [PATCH 36/52] Less pruning in qsearch do not prune moves that give discovery checks, even if with negative SSE. STC https://tests.stockfishchess.org/tests/view/5f4cb5e8ba100690c5cc5d25 LLR: 2.96 (-2.94,2.94) {-0.25,1.25} Total: 91328 W: 9940 L: 9667 D: 71721 Ptnml(0-2): 491, 7345, 29693, 7670, 465 LTC https://tests.stockfishchess.org/tests/view/5f4dbc2eba100690c5cc5dac LLR: 2.97 (-2.94,2.94) {0.25,1.25} Total: 52448 W: 2799 L: 2586 D: 47063 Ptnml(0-2): 53, 2220, 21459, 2445, 47 closes https://github.com/official-stockfish/Stockfish/pull/3098 bench: 4031192 --- src/search.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index b79fa6be..0d823c8e 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1564,7 +1564,9 @@ moves_loop: // When in check, search starts from here } // Do not search moves with negative SEE values - if (!ss->inCheck && !pos.see_ge(move)) + if ( !ss->inCheck + && !(givesCheck && pos.is_discovery_check_on_king(~pos.side_to_move(), move)) + && !pos.see_ge(move)) continue; // Speculative prefetch as early as possible From d6530f7d49ef45e38dacafd8a3a838130113265c Mon Sep 17 00:00:00 2001 From: Unai Corzo Date: Thu, 3 Sep 2020 12:18:42 +0200 Subject: [PATCH 37/52] Simplify singularQuietLMR remove formerPV dependence STC https://tests.stockfishchess.org/tests/view/5f4cb922ba100690c5cc5d35 LLR: 2.96 (-2.94,2.94) {-1.25,0.25} Total: 113672 W: 12347 L: 12368 D: 88957 Ptnml(0-2): 566, 9537, 36699, 9420, 614 LTC https://tests.stockfishchess.org/tests/view/5f4e8474ba100690c5cc5e12 LLR: 2.93 (-2.94,2.94) {-0.75,0.25} Total: 43032 W: 2298 L: 2227 D: 38507 Ptnml(0-2): 45, 1940, 17475, 2011, 45 closes https://github.com/official-stockfish/Stockfish/pull/3102 bench: 3290084 --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index 0d823c8e..b5e190c8 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1196,7 +1196,7 @@ moves_loop: // When in check, search starts from here // Decrease reduction if ttMove has been singularly extended (~3 Elo) if (singularQuietLMR) - r -= 1 + formerPv; + r--; if (!captureOrPromotion) { From 2a696115094882b7dc5c024a97ed7dc2bdc98642 Mon Sep 17 00:00:00 2001 From: VoyagerOne Date: Wed, 2 Sep 2020 16:58:44 -0400 Subject: [PATCH 38/52] LMR Simplification remove reduction at non-check cut nodes for second move at low depths STC: LLR: 2.95 (-2.94,2.94) {-1.25,0.25} Total: 61712 W: 6594 L: 6543 D: 48575 Ptnml(0-2): 293, 5085, 20082, 5070, 326 https://tests.stockfishchess.org/tests/view/5f5007d6ba100690c5cc5ea9 LTC: LLR: 2.94 (-2.94,2.94) {-0.75,0.25} Total: 57544 W: 2983 L: 2925 D: 51636 Ptnml(0-2): 47, 2568, 23495, 2604, 58 https://tests.stockfishchess.org/tests/view/5f50c597ba100690c5cc5ef7 closes https://github.com/official-stockfish/Stockfish/pull/3103 Bench: 3952302 --- src/search.cpp | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index b5e190c8..a7692841 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1168,13 +1168,6 @@ moves_loop: // When in check, search starts from here { Depth r = reduction(improving, depth, moveCount); - // Decrease reduction at non-check cut nodes for second move at low depths - if ( cutNode - && depth <= 10 - && moveCount <= 2 - && !ss->inCheck) - r--; - // Decrease reduction if the ttHit running average is large if (thisThread->ttHitAverage > 509 * TtHitAverageResolution * TtHitAverageWindow / 1024) r--; From 9cc482c7889cbc5f6d92e1b69ccd28d422a44a32 Mon Sep 17 00:00:00 2001 From: Sergio Vieri Date: Thu, 3 Sep 2020 20:22:51 +0800 Subject: [PATCH 39/52] Update default net to nn-308d71810dff.nnue equivalent to 20200903-1739 Net trained from scratch, so it has quite different features extracted compared to the previous net (82215d0fd0df). STC: LLR: 2.98 (-2.94,2.94) {-0.25,1.25} Total: 108328 W: 14048 L: 13719 D: 80561 Ptnml(0-2): 842, 10039, 32062, 10390, 831 https://tests.stockfishchess.org/tests/view/5f50e053ba100690c5cc5f00 LTC: LLR: 2.96 (-2.94,2.94) {0.25,1.25} Total: 13872 W: 1059 L: 890 D: 11923 Ptnml(0-2): 30, 724, 5270, 871, 41 https://tests.stockfishchess.org/tests/view/5f51821fba100690c5cc5f36 closes https://github.com/official-stockfish/Stockfish/pull/3104 Bench: 3832716 --- src/evaluate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/evaluate.h b/src/evaluate.h index d701f5a7..3da6a9fe 100644 --- a/src/evaluate.h +++ b/src/evaluate.h @@ -38,7 +38,7 @@ namespace Eval { // The default net name MUST follow the format nn-[SHA256 first 12 digits].nnue // for the build process (profile-build and fishtest) to work. Do not change the // name of the macro, as it is used in the Makefile. - #define EvalFileDefaultName "nn-82215d0fd0df.nnue" + #define EvalFileDefaultName "nn-308d71810dff.nnue" namespace NNUE { From 9a063fc3cbc8f522215392db232eeb0e04e71b2c Mon Sep 17 00:00:00 2001 From: Vizvezdenec Date: Fri, 4 Sep 2020 15:53:59 +0300 Subject: [PATCH 40/52] Adjust penalty on refuted early quiet moves This patch changes how previous early moves are penalized in case search finds a best move. Here, the first quiet move that was not a transposition table move is penalized. passed STC https://tests.stockfishchess.org/tests/view/5f51d839ba100690c5cc5f69 LLR: 2.94 (-2.94,2.94) {-0.25,1.25} Total: 10088 W: 1150 L: 997 D: 7941 Ptnml(0-2): 41, 772, 3278, 899, 54 passed LTC https://tests.stockfishchess.org/tests/view/5f51e435ba100690c5cc5f76 LLR: 2.93 (-2.94,2.94) {0.25,1.25} Total: 30808 W: 1564 L: 1405 D: 27839 Ptnml(0-2): 19, 1245, 12717, 1404, 19 closes https://github.com/official-stockfish/Stockfish/pull/3106 bench 3983758 --- src/search.cpp | 42 +++++++++++++++++++++--------------------- src/search.h | 1 + 2 files changed, 22 insertions(+), 21 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index a7692841..4aeadc28 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -597,7 +597,7 @@ namespace { Move ttMove, move, excludedMove, bestMove; Depth extension, newDepth; Value bestValue, value, ttValue, eval, maxValue, probCutBeta; - bool ttHit, formerPv, givesCheck, improving, didLMR, priorCapture; + bool formerPv, givesCheck, improving, didLMR, priorCapture; bool captureOrPromotion, doFullDepthSearch, moveCountPruning, ttCapture, singularQuietLMR; Piece movedPiece; @@ -664,12 +664,12 @@ namespace { // position key in case of an excluded move. excludedMove = ss->excludedMove; posKey = excludedMove == MOVE_NONE ? pos.key() : pos.key() ^ make_key(excludedMove); - tte = TT.probe(posKey, ttHit); - ttValue = ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE; + tte = TT.probe(posKey, ss->ttHit); + ttValue = ss->ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE; ttMove = rootNode ? thisThread->rootMoves[thisThread->pvIdx].pv[0] - : ttHit ? tte->move() : MOVE_NONE; + : ss->ttHit ? tte->move() : MOVE_NONE; if (!excludedMove) - ss->ttPv = PvNode || (ttHit && tte->is_pv()); + ss->ttPv = PvNode || (ss->ttHit && tte->is_pv()); formerPv = ss->ttPv && !PvNode; if ( ss->ttPv @@ -681,11 +681,11 @@ namespace { // thisThread->ttHitAverage can be used to approximate the running average of ttHit thisThread->ttHitAverage = (TtHitAverageWindow - 1) * thisThread->ttHitAverage / TtHitAverageWindow - + TtHitAverageResolution * ttHit; + + TtHitAverageResolution * ss->ttHit; // At non-PV nodes we check for an early TT cutoff if ( !PvNode - && ttHit + && ss->ttHit && tte->depth() >= depth && ttValue != VALUE_NONE // Possible in case of TT access race && (ttValue >= beta ? (tte->bound() & BOUND_LOWER) @@ -778,7 +778,7 @@ namespace { improving = false; goto moves_loop; } - else if (ttHit) + else if (ss->ttHit) { // Never assume anything about values stored in TT ss->staticEval = eval = tte->eval(); @@ -882,14 +882,14 @@ namespace { // there and in further interactions with transposition table cutoff depth is set to depth - 3 // because probCut search has depth set to depth - 4 but we also do a move before it // so effective depth is equal to depth - 3 - && !( ttHit + && !( ss->ttHit && tte->depth() >= depth - 3 && ttValue != VALUE_NONE && ttValue < probCutBeta)) { // if ttMove is a capture and value from transposition table is good enough produce probCut // cutoff without digging into actual probCut search - if ( ttHit + if ( ss->ttHit && tte->depth() >= depth - 3 && ttValue != VALUE_NONE && ttValue >= probCutBeta @@ -933,7 +933,7 @@ namespace { if (value >= probCutBeta) { // if transposition table doesn't have equal or more deep info write probCut data into it - if ( !(ttHit + if ( !(ss->ttHit && tte->depth() >= depth - 3 && ttValue != VALUE_NONE)) tte->save(posKey, value_to_tt(value, ss->ply), ttPv, @@ -1423,7 +1423,7 @@ moves_loop: // When in check, search starts from here Move ttMove, move, bestMove; Depth ttDepth; Value bestValue, value, ttValue, futilityValue, futilityBase, oldAlpha; - bool ttHit, pvHit, givesCheck, captureOrPromotion; + bool pvHit, givesCheck, captureOrPromotion; int moveCount; if (PvNode) @@ -1453,13 +1453,13 @@ moves_loop: // When in check, search starts from here : DEPTH_QS_NO_CHECKS; // Transposition table lookup posKey = pos.key(); - tte = TT.probe(posKey, ttHit); - ttValue = ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE; - ttMove = ttHit ? tte->move() : MOVE_NONE; - pvHit = ttHit && tte->is_pv(); + tte = TT.probe(posKey, ss->ttHit); + ttValue = ss->ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE; + ttMove = ss->ttHit ? tte->move() : MOVE_NONE; + pvHit = ss->ttHit && tte->is_pv(); if ( !PvNode - && ttHit + && ss->ttHit && tte->depth() >= ttDepth && ttValue != VALUE_NONE // Only in case of TT access race && (ttValue >= beta ? (tte->bound() & BOUND_LOWER) @@ -1474,7 +1474,7 @@ moves_loop: // When in check, search starts from here } else { - if (ttHit) + if (ss->ttHit) { // Never assume anything about values stored in TT if ((ss->staticEval = bestValue = tte->eval()) == VALUE_NONE) @@ -1493,7 +1493,7 @@ moves_loop: // When in check, search starts from here // Stand pat. Return immediately if static value is at least beta if (bestValue >= beta) { - if (!ttHit) + if (!ss->ttHit) tte->save(posKey, value_to_tt(bestValue, ss->ply), false, BOUND_LOWER, DEPTH_NONE, MOVE_NONE, ss->staticEval); @@ -1711,8 +1711,8 @@ moves_loop: // When in check, search starts from here else captureHistory[moved_piece][to_sq(bestMove)][captured] << bonus1; - // Extra penalty for a quiet TT or main killer move in previous ply when it gets refuted - if ( ((ss-1)->moveCount == 1 || ((ss-1)->currentMove == (ss-1)->killers[0])) + // Extra penalty for a quiet early move that was not a TT move or main killer move in previous ply when it gets refuted + if ( ((ss-1)->moveCount == 1 + (ss-1)->ttHit || ((ss-1)->currentMove == (ss-1)->killers[0])) && !pos.captured_piece()) update_continuation_histories(ss-1, pos.piece_on(prevSq), prevSq, -bonus1); diff --git a/src/search.h b/src/search.h index 79085189..f60da4a5 100644 --- a/src/search.h +++ b/src/search.h @@ -49,6 +49,7 @@ struct Stack { int moveCount; bool inCheck; bool ttPv; + bool ttHit; }; From d539da19d2b13d70a81ab863f54046add0bc3b38 Mon Sep 17 00:00:00 2001 From: SFisGOD Date: Fri, 4 Sep 2020 17:14:50 +0800 Subject: [PATCH 41/52] Use classical eval more often If there is a moderate imbalance, use classical eval with small probability (1/16), as derived from the node counter. STC: LLR: 2.94 (-2.94,2.94) {-0.25,1.25} Total: 32320 W: 3562 L: 3377 D: 25381 Ptnml(0-2): 144, 2609, 10478, 2776, 153 https://tests.stockfishchess.org/tests/view/5f520615ba100690c5cc5f80 LTC: LLR: 2.95 (-2.94,2.94) {0.25,1.25} Total: 21032 W: 1116 L: 974 D: 18942 Ptnml(0-2): 20, 837, 8664, 971, 24 https://tests.stockfishchess.org/tests/view/5f522eaaba100690c5cc5f8c closes https://github.com/official-stockfish/Stockfish/pull/3107 Bench: 4109324 --- src/evaluate.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index 09f36513..db8379da 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -1015,12 +1015,16 @@ make_v: Value Eval::evaluate(const Position& pos) { + bool useClassical = abs(eg_value(pos.psq_score())) * 16 > NNUEThreshold1 * (16 + pos.rule50_count()); bool classical = !Eval::useNNUE - || abs(eg_value(pos.psq_score())) * 16 > NNUEThreshold1 * (16 + pos.rule50_count()); + || useClassical + || (abs(eg_value(pos.psq_score())) > PawnValueMg / 8 && !(pos.this_thread()->nodes & 0xF)); Value v = classical ? Evaluation(pos).value() : NNUE::evaluate(pos) * 5 / 4 + Tempo; - if (classical && Eval::useNNUE && abs(v) * 16 < NNUEThreshold2 * (16 + pos.rule50_count())) + if ( useClassical + && Eval::useNNUE + && abs(v) * 16 < NNUEThreshold2 * (16 + pos.rule50_count())) v = NNUE::evaluate(pos) * 5 / 4 + Tempo; // Damp down the evaluation linearly when shuffling From fc27d158c012341593518a05abf51903ecbcb495 Mon Sep 17 00:00:00 2001 From: syzygy1 <3028851+syzygy1@users.noreply.github.com> Date: Sun, 6 Sep 2020 17:29:12 +0200 Subject: [PATCH 42/52] Bug fix in do_null_move() and NNUE simplification. This fixes #3108 and removes some NNUE code that is currently not used. At the moment, do_null_move() copies the accumulator from the previous state into the new state, which is correct. It then clears the "computed_score" flag because the side to move has changed, and with the other side to move NNUE will return a completely different evaluation (normally with changed sign but also with different NNUE-internal tempo bonus). The problem is that do_null_move() clears the wrong flag. It clears the computed_score flag of the old state, not of the new state. It turns out that this almost never affects the search. For example, fixing it does not change the current bench (but it does change the previous bench). This is because the search code usually avoids calling evaluate() after a null move. This PR corrects do_null_move() by removing the computed_score flag altogether. The flag is not needed because nnue_evaluate() is never called twice on a position. This PR also removes some unnecessary {}s and inserts a few blank lines in the modified NNUE files in line with SF coding style. Resulf ot STC non-regression test: LLR: 2.95 (-2.94,2.94) {-1.25,0.25} Total: 26328 W: 3118 L: 3012 D: 20198 Ptnml(0-2): 126, 2208, 8397, 2300, 133 https://tests.stockfishchess.org/tests/view/5f553ccc2d02727c56b36db1 closes https://github.com/official-stockfish/Stockfish/pull/3109 bench: 4109324 --- src/nnue/evaluate_nnue.cpp | 38 ++----------------- src/nnue/nnue_accumulator.h | 2 - src/nnue/nnue_feature_transformer.h | 58 +++++++++++++---------------- src/position.cpp | 2 - 4 files changed, 29 insertions(+), 71 deletions(-) diff --git a/src/nnue/evaluate_nnue.cpp b/src/nnue/evaluate_nnue.cpp index d6ac9894..ed138881 100644 --- a/src/nnue/evaluate_nnue.cpp +++ b/src/nnue/evaluate_nnue.cpp @@ -115,31 +115,16 @@ namespace Eval::NNUE { return stream && stream.peek() == std::ios::traits_type::eof(); } - // Proceed with the difference calculation if possible - static void UpdateAccumulatorIfPossible(const Position& pos) { - - feature_transformer->UpdateAccumulatorIfPossible(pos); - } - - // Calculate the evaluation value - static Value ComputeScore(const Position& pos, bool refresh) { - - auto& accumulator = pos.state()->accumulator; - if (!refresh && accumulator.computed_score) { - return accumulator.score; - } + // Evaluation function. Perform differential calculation. + Value evaluate(const Position& pos) { alignas(kCacheLineSize) TransformedFeatureType transformed_features[FeatureTransformer::kBufferSize]; - feature_transformer->Transform(pos, transformed_features, refresh); + feature_transformer->Transform(pos, transformed_features); alignas(kCacheLineSize) char buffer[Network::kBufferSize]; const auto output = network->Propagate(transformed_features, buffer); - auto score = static_cast(output[0] / FV_SCALE); - - accumulator.score = score; - accumulator.computed_score = true; - return accumulator.score; + return static_cast(output[0] / FV_SCALE); } // Load eval, from a file stream or a memory stream @@ -150,19 +135,4 @@ namespace Eval::NNUE { return ReadParameters(stream); } - // Evaluation function. Perform differential calculation. - Value evaluate(const Position& pos) { - return ComputeScore(pos, false); - } - - // Evaluation function. Perform full calculation. - Value compute_eval(const Position& pos) { - return ComputeScore(pos, true); - } - - // Proceed with the difference calculation if possible - void update_eval(const Position& pos) { - UpdateAccumulatorIfPossible(pos); - } - } // namespace Eval::NNUE diff --git a/src/nnue/nnue_accumulator.h b/src/nnue/nnue_accumulator.h index 69dfaad2..26370710 100644 --- a/src/nnue/nnue_accumulator.h +++ b/src/nnue/nnue_accumulator.h @@ -29,9 +29,7 @@ namespace Eval::NNUE { struct alignas(kCacheLineSize) Accumulator { std::int16_t accumulation[2][kRefreshTriggers.size()][kTransformedFeatureDimensions]; - Value score; bool computed_accumulation; - bool computed_score; }; } // namespace Eval::NNUE diff --git a/src/nnue/nnue_feature_transformer.h b/src/nnue/nnue_feature_transformer.h index 43707610..2b6259c3 100644 --- a/src/nnue/nnue_feature_transformer.h +++ b/src/nnue/nnue_feature_transformer.h @@ -50,11 +50,13 @@ namespace Eval::NNUE { // Hash value embedded in the evaluation file static constexpr std::uint32_t GetHashValue() { + return RawFeatures::kHashValue ^ kOutputDimensions; } // Read network parameters bool ReadParameters(std::istream& stream) { + for (std::size_t i = 0; i < kHalfDimensions; ++i) biases_[i] = read_little_endian(stream); for (std::size_t i = 0; i < kHalfDimensions * kInputDimensions; ++i) @@ -64,23 +66,26 @@ namespace Eval::NNUE { // Proceed with the difference calculation if possible bool UpdateAccumulatorIfPossible(const Position& pos) const { + const auto now = pos.state(); - if (now->accumulator.computed_accumulation) { + if (now->accumulator.computed_accumulation) return true; - } + const auto prev = now->previous; if (prev && prev->accumulator.computed_accumulation) { UpdateAccumulator(pos); return true; } + return false; } // Convert input features - void Transform(const Position& pos, OutputType* output, bool refresh) const { - if (refresh || !UpdateAccumulatorIfPossible(pos)) { + void Transform(const Position& pos, OutputType* output) const { + + if (!UpdateAccumulatorIfPossible(pos)) RefreshAccumulator(pos); - } + const auto& accumulation = pos.state()->accumulator.accumulation; #if defined(USE_AVX2) @@ -177,6 +182,7 @@ namespace Eval::NNUE { private: // Calculate cumulative value without using difference calculation void RefreshAccumulator(const Position& pos) const { + auto& accumulator = pos.state()->accumulator; IndexType i = 0; Features::IndexList active_indices[2]; @@ -216,9 +222,8 @@ namespace Eval::NNUE { &accumulator.accumulation[perspective][i][0]); auto column = reinterpret_cast(&weights_[offset]); constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); - for (IndexType j = 0; j < kNumChunks; ++j) { + for (IndexType j = 0; j < kNumChunks; ++j) accumulation[j] = _mm_add_pi16(accumulation[j], column[j]); - } #elif defined(USE_NEON) auto accumulation = reinterpret_cast( @@ -240,11 +245,11 @@ namespace Eval::NNUE { #endif accumulator.computed_accumulation = true; - accumulator.computed_score = false; } // Calculate cumulative value using difference calculation void UpdateAccumulator(const Position& pos) const { + const auto prev_accumulator = pos.state()->previous->accumulator; auto& accumulator = pos.state()->accumulator; IndexType i = 0; @@ -288,33 +293,27 @@ namespace Eval::NNUE { #if defined(USE_AVX2) auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { + for (IndexType j = 0; j < kNumChunks; ++j) accumulation[j] = _mm256_sub_epi16(accumulation[j], column[j]); - } #elif defined(USE_SSE2) auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { + for (IndexType j = 0; j < kNumChunks; ++j) accumulation[j] = _mm_sub_epi16(accumulation[j], column[j]); - } #elif defined(USE_MMX) auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { + for (IndexType j = 0; j < kNumChunks; ++j) accumulation[j] = _mm_sub_pi16(accumulation[j], column[j]); - } #elif defined(USE_NEON) auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { + for (IndexType j = 0; j < kNumChunks; ++j) accumulation[j] = vsubq_s16(accumulation[j], column[j]); - } #else - for (IndexType j = 0; j < kHalfDimensions; ++j) { - accumulator.accumulation[perspective][i][j] -= - weights_[offset + j]; - } + for (IndexType j = 0; j < kHalfDimensions; ++j) + accumulator.accumulation[perspective][i][j] -= weights_[offset + j]; #endif } @@ -325,33 +324,27 @@ namespace Eval::NNUE { #if defined(USE_AVX2) auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { + for (IndexType j = 0; j < kNumChunks; ++j) accumulation[j] = _mm256_add_epi16(accumulation[j], column[j]); - } #elif defined(USE_SSE2) auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { + for (IndexType j = 0; j < kNumChunks; ++j) accumulation[j] = _mm_add_epi16(accumulation[j], column[j]); - } #elif defined(USE_MMX) auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { + for (IndexType j = 0; j < kNumChunks; ++j) accumulation[j] = _mm_add_pi16(accumulation[j], column[j]); - } #elif defined(USE_NEON) auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { + for (IndexType j = 0; j < kNumChunks; ++j) accumulation[j] = vaddq_s16(accumulation[j], column[j]); - } #else - for (IndexType j = 0; j < kHalfDimensions; ++j) { - accumulator.accumulation[perspective][i][j] += - weights_[offset + j]; - } + for (IndexType j = 0; j < kHalfDimensions; ++j) + accumulator.accumulation[perspective][i][j] += weights_[offset + j]; #endif } @@ -362,7 +355,6 @@ namespace Eval::NNUE { #endif accumulator.computed_accumulation = true; - accumulator.computed_score = false; } using BiasType = std::int16_t; diff --git a/src/position.cpp b/src/position.cpp index fe89b753..e6a760d2 100644 --- a/src/position.cpp +++ b/src/position.cpp @@ -704,7 +704,6 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) { // Used by NNUE st->accumulator.computed_accumulation = false; - st->accumulator.computed_score = false; auto& dp = st->dirtyPiece; dp.dirty_num = 1; @@ -1000,7 +999,6 @@ void Position::do_null_move(StateInfo& newSt) { if (Eval::useNNUE) { std::memcpy(&newSt, st, sizeof(StateInfo)); - st->accumulator.computed_score = false; } else std::memcpy(&newSt, st, offsetof(StateInfo, accumulator)); From d2562cde12cdcc3df654279d6d632ae74c5f71af Mon Sep 17 00:00:00 2001 From: Gian-Carlo Pascutto Date: Tue, 8 Sep 2020 15:37:53 +0200 Subject: [PATCH 43/52] Always re-enable NNUE after "bench". Restore the default NNUE setting (enabled) after a bench command. This also makes the resulting program settings independent of the number of FENs that are being benched. Fixes issue #3112. closes https://github.com/official-stockfish/Stockfish/pull/3113 No functional change. --- src/benchmark.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/benchmark.cpp b/src/benchmark.cpp index 806e9840..ffb631a2 100644 --- a/src/benchmark.cpp +++ b/src/benchmark.cpp @@ -164,5 +164,7 @@ vector setup_bench(const Position& current, istream& is) { ++posCounter; } + list.emplace_back("setoption name Use NNUE value true"); + return list; } From 0405f3540366cc16245d51531881c55d3726c8b5 Mon Sep 17 00:00:00 2001 From: SFisGOD Date: Mon, 7 Sep 2020 04:54:26 +0800 Subject: [PATCH 44/52] Double probability of using classical eval This patch doubles the moderate imbalance threshold and probability of using classical eval. So now if imbalance is greater than PawnValueMg / 4 then there is a 1/8 chance of using classical eval. STC: LLR: 2.93 (-2.94,2.94) {-0.25,1.25} Total: 10984 W: 1303 L: 1140 D: 8541 Ptnml(0-2): 58, 867, 3489, 1010, 68 https://tests.stockfishchess.org/tests/view/5f554c9f97da2d5437d3813e LTC: LLR: 2.95 (-2.94,2.94) {0.25,1.25} Total: 43064 W: 2476 L: 2276 D: 38312 Ptnml(0-2): 37, 1985, 17308, 2145, 57 https://tests.stockfishchess.org/tests/view/5f55690a00a0aa2ca79f0a43 closes https://github.com/official-stockfish/Stockfish/pull/3114 Bench: 4161067 --- src/evaluate.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index db8379da..faf71d27 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -1015,10 +1015,13 @@ make_v: Value Eval::evaluate(const Position& pos) { + // Use classical eval if there is a large imbalance + // If there is a moderate imbalance, use classical eval with probability (1/8), + // as derived from the node counter. bool useClassical = abs(eg_value(pos.psq_score())) * 16 > NNUEThreshold1 * (16 + pos.rule50_count()); bool classical = !Eval::useNNUE || useClassical - || (abs(eg_value(pos.psq_score())) > PawnValueMg / 8 && !(pos.this_thread()->nodes & 0xF)); + || (abs(eg_value(pos.psq_score())) > PawnValueMg / 4 && !(pos.this_thread()->nodes & 0xB)); Value v = classical ? Evaluation(pos).value() : NNUE::evaluate(pos) * 5 / 4 + Tempo; From 35ab8254b70f62a4e0138c475fad0c77dcc0af2d Mon Sep 17 00:00:00 2001 From: mckx00 Date: Sun, 13 Sep 2020 19:28:32 -0700 Subject: [PATCH 45/52] Simplify StatSCore Initialization No need to initialize StatScore at rootNode. Current Logic is redundant because at subsequent levels the grandchildren statScore is initialized to zero. closes https://github.com/official-stockfish/Stockfish/pull/3122 Non functional change. --- src/search.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 4aeadc28..07c491b6 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -654,9 +654,7 @@ namespace { // starts with statScore = 0. Later grandchildren start with the last calculated // statScore of the previous grandchild. This influences the reduction rules in // LMR which are based on the statScore of parent position. - if (rootNode) - (ss+4)->statScore = 0; - else + if (!rootNode) (ss+2)->statScore = 0; // Step 4. Transposition table lookup. We don't want the score of a partial From 7135678f71b7f6ee32e92b8dbef2b16b403d8ea9 Mon Sep 17 00:00:00 2001 From: Sergio Vieri Date: Mon, 14 Sep 2020 17:24:05 +0800 Subject: [PATCH 46/52] Update default net to nn-03744f8d56d8.nnue Equivalent to 20200914-1520 closes https://github.com/official-stockfish/Stockfish/pull/3123 Bench: 4222126 --- src/evaluate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/evaluate.h b/src/evaluate.h index 3da6a9fe..c723bd8f 100644 --- a/src/evaluate.h +++ b/src/evaluate.h @@ -38,7 +38,7 @@ namespace Eval { // The default net name MUST follow the format nn-[SHA256 first 12 digits].nnue // for the build process (profile-build and fishtest) to work. Do not change the // name of the macro, as it is used in the Makefile. - #define EvalFileDefaultName "nn-308d71810dff.nnue" + #define EvalFileDefaultName "nn-03744f8d56d8.nnue" namespace NNUE { From 5f426d8667feda65eaf1eca699f629d31e170d43 Mon Sep 17 00:00:00 2001 From: xoto10 Date: Thu, 10 Sep 2020 21:10:57 +0100 Subject: [PATCH 47/52] Use 2 * bestMoveChanges. NNUE appears to provide a more stable eval than the classic eval, so the time use dependencies on bestMoveChanges, fallingEval, etc may need to change to make the best use of available time. This change doubles the effect of totBestMoveChanges when giving more time because the choice of best move is unstable. STC: LLR: 2.94 (-2.94,2.94) {-0.25,1.25} Total: 101928 W: 11995 L: 11698 D: 78235 Elo +0.78 Ptnml(0-2): 592, 8707, 32103, 8936, 626 https://tests.stockfishchess.org/tests/view/5f538a462d02727c56b36cec LTC: LLR: 2.94 (-2.94,2.94) {0.25,1.25} Total: 186392 W: 10383 L: 9877 D: 166132 Elo +0.81 Ptnml(0-2): 207, 8370, 75539, 8870, 210 https://tests.stockfishchess.org/tests/view/5f54a9712d02727c56b36d5a closes https://github.com/official-stockfish/Stockfish/pull/3119 Bench 4222126 --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index 07c491b6..c7d2efd4 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -520,7 +520,7 @@ void Thread::search() { totBestMoveChanges += th->bestMoveChanges; th->bestMoveChanges = 0; } - double bestMoveInstability = 1 + totBestMoveChanges / Threads.size(); + double bestMoveInstability = 1 + 2 * totBestMoveChanges / Threads.size(); double totalTime = rootMoves.size() == 1 ? 0 : Time.optimum() * fallingEval * reduction * bestMoveInstability; From d86663af141f1256bfc32ab95891e944d84e8755 Mon Sep 17 00:00:00 2001 From: syzygy1 <3028851+syzygy1@users.noreply.github.com> Date: Sun, 13 Sep 2020 20:16:52 +0200 Subject: [PATCH 48/52] Improve NDK section in Makefile This PR sets the "comp" variable simply to "clang", which seems to be more consistent and allows a small simplification. The PR also moves the section that sets "profile_make" and "profile_use" to after the NDK section, which ensures that these variables are now set correctly for NDK/clang. closes https://github.com/official-stockfish/Stockfish/pull/3121 No functional change --- src/Makefile | 32 +++++++++++++------------------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/src/Makefile b/src/Makefile index 340b3008..54868b39 100644 --- a/src/Makefile +++ b/src/Makefile @@ -381,19 +381,6 @@ ifeq ($(COMP),clang) endif endif -ifeq ($(comp),icc) - profile_make = icc-profile-make - profile_use = icc-profile-use -else -ifeq ($(comp),clang) - profile_make = clang-profile-make - profile_use = clang-profile-use -else - profile_make = gcc-profile-make - profile_use = gcc-profile-use -endif -endif - ifeq ($(KERNEL),Darwin) CXXFLAGS += -arch $(arch) -mmacosx-version-min=10.14 LDFLAGS += -arch $(arch) -mmacosx-version-min=10.14 @@ -405,20 +392,30 @@ endif # Currently we don't know how to make PGO builds with the NDK yet. ifeq ($(COMP),ndk) CXXFLAGS += -stdlib=libc++ -fPIE + comp=clang ifeq ($(arch),armv7) - comp=armv7a-linux-androideabi16-clang CXX=armv7a-linux-androideabi16-clang++ CXXFLAGS += -mthumb -march=armv7-a -mfloat-abi=softfp -mfpu=neon STRIP=arm-linux-androideabi-strip endif ifeq ($(arch),armv8) - comp=aarch64-linux-android21-clang CXX=aarch64-linux-android21-clang++ STRIP=aarch64-linux-android-strip endif LDFLAGS += -static-libstdc++ -pie -lm -latomic endif +ifeq ($(comp),icc) + profile_make = icc-profile-make + profile_use = icc-profile-use +else ifeq ($(comp),clang) + profile_make = clang-profile-make + profile_use = clang-profile-use +else + profile_make = gcc-profile-make + profile_use = gcc-profile-use +endif + ### Travis CI script uses COMPILER to overwrite CXX ifdef COMPILER COMPCXX=$(COMPILER) @@ -590,10 +587,7 @@ endif ### needs access to the optimization flags. ifeq ($(optimize),yes) ifeq ($(debug), no) - ifeq ($(COMP),ndk) - CXXFLAGS += -flto=thin - LDFLAGS += $(CXXFLAGS) - else ifeq ($(comp),clang) + ifeq ($(comp),clang) CXXFLAGS += -flto=thin ifneq ($(findstring MINGW,$(KERNEL)),) CXXFLAGS += -fuse-ld=lld From df43805953b241f95c246ff3e96aece76b518590 Mon Sep 17 00:00:00 2001 From: GoldenRare Date: Thu, 10 Sep 2020 00:24:40 -0400 Subject: [PATCH 49/52] Added FEN string to bench output fixes https://github.com/official-stockfish/Stockfish/pull/3117 closes https://github.com/official-stockfish/Stockfish/pull/3118 No functional change --- AUTHORS | 1 + src/uci.cpp | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/AUTHORS b/AUTHORS index c00ab657..198dfa5a 100644 --- a/AUTHORS +++ b/AUTHORS @@ -63,6 +63,7 @@ Gary Heckman (gheckman) George Sobala (gsobala) gguliash Gian-Carlo Pascutto (gcp) +Deshawn Mohan-Smith (GoldenRare) Gontran Lemaire (gonlem) Goodkov Vasiliy Aleksandrovich (goodkov) Gregor Cramer diff --git a/src/uci.cpp b/src/uci.cpp index bc0ee0a0..3f3cc458 100644 --- a/src/uci.cpp +++ b/src/uci.cpp @@ -170,7 +170,7 @@ namespace { if (token == "go" || token == "eval") { - cerr << "\nPosition: " << cnt++ << '/' << num << endl; + cerr << "\nPosition: " << cnt++ << '/' << num << " (" << pos.fen() << ")" << endl; if (token == "go") { go(pos, is, states); From 0ca93c5b94b820a41e2850ede084096120128a28 Mon Sep 17 00:00:00 2001 From: Unai Corzo Date: Wed, 16 Sep 2020 19:14:32 +0200 Subject: [PATCH 50/52] Remove castling extension STC https://tests.stockfishchess.org/tests/view/5f5fa5348fbc1c8a3f476eca LLR: 2.94 (-2.94,2.94) {-1.25,0.25} Total: 38520 W: 4713 L: 4610 D: 29197 Ptnml(0-2): 233, 3486, 11734, 3559, 248 LTC https://tests.stockfishchess.org/tests/view/5f62166a912c15f19854b806 LLR: 2.93 (-2.94,2.94) {-0.75,0.25} Total: 48024 W: 2673 L: 2600 D: 42751 Ptnml(0-2): 64, 2247, 19316, 2322, 63 closes https://github.com/official-stockfish/Stockfish/pull/3128 bench: 3818400 --- src/search.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index c7d2efd4..17cd0a73 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1127,11 +1127,6 @@ moves_loop: // When in check, search starts from here && pos.non_pawn_material() <= 2 * RookValueMg) extension = 1; - // Castling extension - if ( type_of(move) == CASTLING - && popcount(pos.pieces(us) & ~pos.pieces(PAWN) & (to_sq(move) & KingSide ? KingSide : QueenSide)) <= 2) - extension = 1; - // Late irreversible move extension if ( move == ttMove && pos.rule50_count() > 80 From 64a63464d7bc72a3aac33aa680cd2b2b240ff903 Mon Sep 17 00:00:00 2001 From: Unai Corzo Date: Wed, 16 Sep 2020 20:42:38 +0200 Subject: [PATCH 51/52] Simplify futility pruning for captures STC https://tests.stockfishchess.org/tests/view/5f61f0e4b91f2ec371e429c2 LLR: 2.94 (-2.94,2.94) {-1.25,0.25} Total: 75512 W: 8747 L: 8704 D: 58061 Ptnml(0-2): 440, 6589, 23683, 6576, 468 LTC https://tests.stockfishchess.org/tests/view/5f6215d3912c15f19854b801 LLR: 2.95 (-2.94,2.94) {-0.75,0.25} Total: 92912 W: 5030 L: 4992 D: 82890 Ptnml(0-2): 88, 4363, 37532, 4369, 104 closes https://github.com/official-stockfish/Stockfish/pull/3129 bench: 3856086 --- src/search.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index 17cd0a73..9c5fb58b 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1056,7 +1056,6 @@ moves_loop: // When in check, search starts from here if ( !givesCheck && lmrDepth < 6 && !(PvNode && abs(bestValue) < 2) - && PieceValue[MG][type_of(movedPiece)] >= PieceValue[MG][type_of(pos.piece_on(to_sq(move)))] && !ss->inCheck && ss->staticEval + 169 + 244 * lmrDepth + PieceValue[MG][type_of(pos.piece_on(to_sq(move)))] <= alpha) From 8b8a510fd6a1a17b39b2d4b166f60ac7be0dab23 Mon Sep 17 00:00:00 2001 From: syzygy1 <3028851+syzygy1@users.noreply.github.com> Date: Wed, 16 Sep 2020 17:39:11 +0200 Subject: [PATCH 52/52] Use tiling to speed up accumulator refreshes and updates Perform the update and refresh operations tile by tile in a local array of vectors. By selecting the array size carefully, we achieve that the compiler keeps the whole array in vector registers. Idea and original implementation by @sf-x. STC: https://tests.stockfishchess.org/tests/view/5f623eec912c15f19854b855 LLR: 2.94 (-2.94,2.94) {-0.25,1.25} Total: 4872 W: 623 L: 477 D: 3772 Ptnml(0-2): 14, 350, 1585, 450, 37 LTC: https://tests.stockfishchess.org/tests/view/5f62434e912c15f19854b860 LLR: 2.94 (-2.94,2.94) {0.25,1.25} Total: 25808 W: 1565 L: 1401 D: 22842 Ptnml(0-2): 23, 1186, 10332, 1330, 33 closes https://github.com/official-stockfish/Stockfish/pull/3130 No functional change --- src/nnue/nnue_feature_transformer.h | 233 +++++++++++++++------------- 1 file changed, 125 insertions(+), 108 deletions(-) diff --git a/src/nnue/nnue_feature_transformer.h b/src/nnue/nnue_feature_transformer.h index 2b6259c3..e71ee60d 100644 --- a/src/nnue/nnue_feature_transformer.h +++ b/src/nnue/nnue_feature_transformer.h @@ -29,6 +29,56 @@ namespace Eval::NNUE { + // If vector instructions are enabled, we update and refresh the + // accumulator tile by tile such that each tile fits in the CPU's + // vector registers. + #define TILING + + #ifdef USE_AVX512 + typedef __m512i vec_t; + #define vec_load(a) _mm512_loadA_si512(a) + #define vec_store(a,b) _mm512_storeA_si512(a,b) + #define vec_add_16(a,b) _mm512_add_epi16(a,b) + #define vec_sub_16(a,b) _mm512_sub_epi16(a,b) + static constexpr IndexType kNumRegs = 8; // only 8 are needed + + #elif USE_AVX2 + typedef __m256i vec_t; + #define vec_load(a) _mm256_loadA_si256(a) + #define vec_store(a,b) _mm256_storeA_si256(a,b) + #define vec_add_16(a,b) _mm256_add_epi16(a,b) + #define vec_sub_16(a,b) _mm256_sub_epi16(a,b) + static constexpr IndexType kNumRegs = 16; + + #elif USE_SSE2 + typedef __m128i vec_t; + #define vec_load(a) (*(a)) + #define vec_store(a,b) *(a)=(b) + #define vec_add_16(a,b) _mm_add_epi16(a,b) + #define vec_sub_16(a,b) _mm_sub_epi16(a,b) + static constexpr IndexType kNumRegs = Is64Bit ? 16 : 8; + + #elif USE_MMX + typedef __m64 vec_t; + #define vec_load(a) (*(a)) + #define vec_store(a,b) *(a)=(b) + #define vec_add_16(a,b) _mm_add_pi16(a,b) + #define vec_sub_16(a,b) _mm_sub_pi16(a,b) + static constexpr IndexType kNumRegs = 8; + + #elif USE_NEON + typedef int16x8_t vec_t; + #define vec_load(a) (*(a)) + #define vec_store(a,b) *(a)=(b) + #define vec_add_16(a,b) vaddq_s16(a,b) + #define vec_sub_16(a,b) vsubq_s16(a,b) + static constexpr IndexType kNumRegs = 16; + + #else + #undef TILING + + #endif + // Input feature converter class FeatureTransformer { @@ -36,6 +86,11 @@ namespace Eval::NNUE { // Number of output dimensions for one side static constexpr IndexType kHalfDimensions = kTransformedFeatureDimensions; + #ifdef TILING + static constexpr IndexType kTileHeight = kNumRegs * sizeof(vec_t) / 2; + static_assert(kHalfDimensions % kTileHeight == 0, "kTileHeight must divide kHalfDimensions"); + #endif + public: // Output type using OutputType = TransformedFeatureType; @@ -189,57 +244,41 @@ namespace Eval::NNUE { RawFeatures::AppendActiveIndices(pos, kRefreshTriggers[i], active_indices); for (Color perspective : { WHITE, BLACK }) { + #ifdef TILING + for (unsigned j = 0; j < kHalfDimensions / kTileHeight; ++j) { + auto biasesTile = reinterpret_cast( + &biases_[j * kTileHeight]); + auto accTile = reinterpret_cast( + &accumulator.accumulation[perspective][i][j * kTileHeight]); + vec_t acc[kNumRegs]; + + for (unsigned k = 0; k < kNumRegs; ++k) + acc[k] = biasesTile[k]; + + for (const auto index : active_indices[perspective]) { + const IndexType offset = kHalfDimensions * index + j * kTileHeight; + auto column = reinterpret_cast(&weights_[offset]); + + for (unsigned k = 0; k < kNumRegs; ++k) + acc[k] = vec_add_16(acc[k], column[k]); + } + + for (unsigned k = 0; k < kNumRegs; k++) + vec_store(&accTile[k], acc[k]); + } + #else std::memcpy(accumulator.accumulation[perspective][i], biases_, - kHalfDimensions * sizeof(BiasType)); + kHalfDimensions * sizeof(BiasType)); + for (const auto index : active_indices[perspective]) { const IndexType offset = kHalfDimensions * index; - #if defined(USE_AVX512) - auto accumulation = reinterpret_cast<__m512i*>( - &accumulator.accumulation[perspective][i][0]); - auto column = reinterpret_cast(&weights_[offset]); - constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth; - for (IndexType j = 0; j < kNumChunks; ++j) - _mm512_storeA_si512(&accumulation[j], _mm512_add_epi16(_mm512_loadA_si512(&accumulation[j]), column[j])); - #elif defined(USE_AVX2) - auto accumulation = reinterpret_cast<__m256i*>( - &accumulator.accumulation[perspective][i][0]); - auto column = reinterpret_cast(&weights_[offset]); - constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); - for (IndexType j = 0; j < kNumChunks; ++j) - _mm256_storeA_si256(&accumulation[j], _mm256_add_epi16(_mm256_loadA_si256(&accumulation[j]), column[j])); - - #elif defined(USE_SSE2) - auto accumulation = reinterpret_cast<__m128i*>( - &accumulator.accumulation[perspective][i][0]); - auto column = reinterpret_cast(&weights_[offset]); - constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); - for (IndexType j = 0; j < kNumChunks; ++j) - accumulation[j] = _mm_add_epi16(accumulation[j], column[j]); - - #elif defined(USE_MMX) - auto accumulation = reinterpret_cast<__m64*>( - &accumulator.accumulation[perspective][i][0]); - auto column = reinterpret_cast(&weights_[offset]); - constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); - for (IndexType j = 0; j < kNumChunks; ++j) - accumulation[j] = _mm_add_pi16(accumulation[j], column[j]); - - #elif defined(USE_NEON) - auto accumulation = reinterpret_cast( - &accumulator.accumulation[perspective][i][0]); - auto column = reinterpret_cast(&weights_[offset]); - constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); - for (IndexType j = 0; j < kNumChunks; ++j) - accumulation[j] = vaddq_s16(accumulation[j], column[j]); - - #else for (IndexType j = 0; j < kHalfDimensions; ++j) accumulator.accumulation[perspective][i][j] += weights_[offset + j]; - #endif - } + #endif } + #if defined(USE_MMX) _mm_empty(); #endif @@ -257,29 +296,55 @@ namespace Eval::NNUE { bool reset[2]; RawFeatures::AppendChangedIndices(pos, kRefreshTriggers[i], removed_indices, added_indices, reset); - for (Color perspective : { WHITE, BLACK }) { - #if defined(USE_AVX2) - constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); - auto accumulation = reinterpret_cast<__m256i*>( - &accumulator.accumulation[perspective][i][0]); + #ifdef TILING + for (IndexType j = 0; j < kHalfDimensions / kTileHeight; ++j) { + for (Color perspective : { WHITE, BLACK }) { + auto accTile = reinterpret_cast( + &accumulator.accumulation[perspective][i][j * kTileHeight]); + vec_t acc[kNumRegs]; - #elif defined(USE_SSE2) - constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); - auto accumulation = reinterpret_cast<__m128i*>( - &accumulator.accumulation[perspective][i][0]); + if (reset[perspective]) { + auto biasesTile = reinterpret_cast( + &biases_[j * kTileHeight]); + for (unsigned k = 0; k < kNumRegs; ++k) + acc[k] = biasesTile[k]; + } else { + auto prevAccTile = reinterpret_cast( + &prev_accumulator.accumulation[perspective][i][j * kTileHeight]); + for (IndexType k = 0; k < kNumRegs; ++k) + acc[k] = vec_load(&prevAccTile[k]); - #elif defined(USE_MMX) - constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); - auto accumulation = reinterpret_cast<__m64*>( - &accumulator.accumulation[perspective][i][0]); + // Difference calculation for the deactivated features + for (const auto index : removed_indices[perspective]) { + const IndexType offset = kHalfDimensions * index + j * kTileHeight; + auto column = reinterpret_cast(&weights_[offset]); - #elif defined(USE_NEON) - constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); - auto accumulation = reinterpret_cast( - &accumulator.accumulation[perspective][i][0]); + for (IndexType k = 0; k < kNumRegs; ++k) + acc[k] = vec_sub_16(acc[k], column[k]); + } + } + { // Difference calculation for the activated features + for (const auto index : added_indices[perspective]) { + const IndexType offset = kHalfDimensions * index + j * kTileHeight; + auto column = reinterpret_cast(&weights_[offset]); + + for (IndexType k = 0; k < kNumRegs; ++k) + acc[k] = vec_add_16(acc[k], column[k]); + } + } + + for (IndexType k = 0; k < kNumRegs; ++k) + vec_store(&accTile[k], acc[k]); + } + } + #if defined(USE_MMX) + _mm_empty(); #endif + #else + for (Color perspective : { WHITE, BLACK }) { + if (reset[perspective]) { std::memcpy(accumulator.accumulation[perspective][i], biases_, kHalfDimensions * sizeof(BiasType)); @@ -291,67 +356,19 @@ namespace Eval::NNUE { for (const auto index : removed_indices[perspective]) { const IndexType offset = kHalfDimensions * index; - #if defined(USE_AVX2) - auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) - accumulation[j] = _mm256_sub_epi16(accumulation[j], column[j]); - - #elif defined(USE_SSE2) - auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) - accumulation[j] = _mm_sub_epi16(accumulation[j], column[j]); - - #elif defined(USE_MMX) - auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) - accumulation[j] = _mm_sub_pi16(accumulation[j], column[j]); - - #elif defined(USE_NEON) - auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) - accumulation[j] = vsubq_s16(accumulation[j], column[j]); - - #else for (IndexType j = 0; j < kHalfDimensions; ++j) accumulator.accumulation[perspective][i][j] -= weights_[offset + j]; - #endif - } } { // Difference calculation for the activated features for (const auto index : added_indices[perspective]) { const IndexType offset = kHalfDimensions * index; - #if defined(USE_AVX2) - auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) - accumulation[j] = _mm256_add_epi16(accumulation[j], column[j]); - - #elif defined(USE_SSE2) - auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) - accumulation[j] = _mm_add_epi16(accumulation[j], column[j]); - - #elif defined(USE_MMX) - auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) - accumulation[j] = _mm_add_pi16(accumulation[j], column[j]); - - #elif defined(USE_NEON) - auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) - accumulation[j] = vaddq_s16(accumulation[j], column[j]); - - #else for (IndexType j = 0; j < kHalfDimensions; ++j) accumulator.accumulation[perspective][i][j] += weights_[offset + j]; - #endif - } } } - #if defined(USE_MMX) - _mm_empty(); #endif accumulator.computed_accumulation = true;