diff --git a/.travis.yml b/.travis.yml index 12596f1e..092c7f53 100644 --- a/.travis.yml +++ b/.travis.yml @@ -71,13 +71,15 @@ script: - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-32-sse2 build && ../tests/signature.sh $benchref; fi - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-32 build && ../tests/signature.sh $benchref; fi - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=general-32 build && ../tests/signature.sh $benchref; fi - - if [[ "$TRAVIS_OS_NAME" == "linux" && "$COMP" == "gcc" ]]; then make clean && make -j2 ARCH=x86-64-modern profile-build && ../tests/signature.sh $benchref; fi + # workaround: exclude a custom version of llvm+clang, which doesn't find llvm-profdata on ubuntu + - if [[ "$TRAVIS_OS_NAME" != "linux" || "$COMP" == "gcc" ]]; then make clean && make -j2 ARCH=x86-64-modern profile-build && ../tests/signature.sh $benchref; fi # compile only for some more advanced architectures (might not run in travis) - make clean && make -j2 ARCH=x86-64-avx2 build - make clean && make -j2 ARCH=x86-64-bmi2 build - # needs gcc 10 to compile - - if [[ "$COMPILER" != "g++-8" ]]; then make clean && make -j2 ARCH=x86-64-avx512 build; fi + - make clean && make -j2 ARCH=x86-64-avx512 build + - make clean && make -j2 ARCH=x86-64-vnni512 build + - make clean && make -j2 ARCH=x86-64-vnni256 build # # Check perft and reproducible search diff --git a/AUTHORS b/AUTHORS index d8f4d30e..198dfa5a 100644 --- a/AUTHORS +++ b/AUTHORS @@ -36,10 +36,11 @@ Bryan Cross (crossbr) candirufish Chess13234 Chris Cain (ceebo) +Dale Weiler (graphitemaster) Dan Schmidt (dfannius) Daniel Axtens (daxtens) Daniel Dugovic (ddugovic) -Dariusz Orzechowski +Dariusz Orzechowski (dorzechowski) David Zar Daylen Yang (daylen) DiscanX @@ -59,8 +60,10 @@ Fauzi Akram Dabat (FauziAkram) Felix Wittmann gamander Gary Heckman (gheckman) +George Sobala (gsobala) gguliash Gian-Carlo Pascutto (gcp) +Deshawn Mohan-Smith (GoldenRare) Gontran Lemaire (gonlem) Goodkov Vasiliy Aleksandrovich (goodkov) Gregor Cramer diff --git a/README.md b/README.md index 54f61b52..348d77f8 100644 --- a/README.md +++ b/README.md @@ -4,17 +4,17 @@ [![Build Status](https://ci.appveyor.com/api/projects/status/github/official-stockfish/Stockfish?branch=master&svg=true)](https://ci.appveyor.com/project/mcostalba/stockfish/branch/master) [Stockfish](https://stockfishchess.org) is a free, powerful UCI chess engine -derived from Glaurung 2.1. It features two evaluation functions, the classical -evaluation based on handcrafted terms, and the NNUE evaluation based on -efficiently updateable neural networks. The classical evaluation runs efficiently -on most 64bit CPU architectures, while the NNUE evaluation benefits strongly from the -vector intrinsics available on modern CPUs (avx2 or similar). +derived from Glaurung 2.1. Stockfish is not a complete chess program and requires a +UCI-compatible graphical user interface (GUI) (e.g. XBoard with PolyGlot, Scid, +Cute Chess, eboard, Arena, Sigma Chess, Shredder, Chess Partner or Fritz) in order +to be used comfortably. Read the documentation for your GUI of choice for information +about how to use Stockfish with it. -Stockfish is not a complete chess program and requires a -UCI-compatible GUI (e.g. XBoard with PolyGlot, Scid, Cute Chess, eboard, Arena, -Sigma Chess, Shredder, Chess Partner or Fritz) in order to be used comfortably. -Read the documentation for your GUI of choice for information about how to use -Stockfish with it. +The Stockfish engine features two evaluation functions for chess, the classical +evaluation based on handcrafted terms, and the NNUE evaluation based on efficiently +updateable neural networks. The classical evaluation runs efficiently on almost all +CPU architectures, while the NNUE evaluation benefits from the vector +intrinsics available on most CPUs (sse2, avx2, neon, or similar). ## Files @@ -28,10 +28,14 @@ This distribution of Stockfish consists of the following files: * src, a subdirectory containing the full source code, including a Makefile that can be used to compile Stockfish on Unix-like systems. -To use the NNUE evaluation an additional data file with neural network parameters -needs to be downloaded. The filename for the default set can be found as the default -value of the `EvalFile` UCI option, with the format -`nn-[SHA256 first 12 digits].nnue` (e.g. nn-c157e0a5755b.nnue). This file can be downloaded from + * a file with the .nnue extension, storing the neural network for the NNUE + evaluation. Binary distributions will have this file embedded. + +Note: to use the NNUE evaluation, the additional data file with neural network parameters +needs to be available. Normally, this file is already embedded in the binary or it can be downloaded. +The filename for the default (recommended) net can be found as the default +value of the `EvalFile` UCI option, with the format `nn-[SHA256 first 12 digits].nnue` +(for instance, `nn-c157e0a5755b.nnue`). This file can be downloaded from ``` https://tests.stockfishchess.org/api/nn/[filename] ``` @@ -58,19 +62,14 @@ Currently, Stockfish has the following UCI options: * #### Use NNUE Toggle between the NNUE and classical evaluation functions. If set to "true", - the network parameters must be available to load from file (see also EvalFile). + the network parameters must be available to load from file (see also EvalFile), + if they are not embedded in the binary. * #### EvalFile The name of the file of the NNUE evaluation parameters. Depending on the GUI the - filename should include the full path to the folder/directory that contains the file. - - * #### Contempt - A positive value for contempt favors middle game positions and avoids draws, - effective for the classical evaluation only. - - * #### Analysis Contempt - By default, contempt is set to prefer the side to move. Set this option to "White" - or "Black" to analyse with contempt for that side, or "Off" to disable contempt. + filename might have to include the full path to the folder/directory that contains the file. + Other locations, such as the directory that contains the binary and the working directory, + are also searched. * #### UCI_AnalyseMode An option handled by your GUI. @@ -120,6 +119,14 @@ Currently, Stockfish has the following UCI options: Limit Syzygy tablebase probing to positions with at most this many pieces left (including kings and pawns). + * #### Contempt + A positive value for contempt favors middle game positions and avoids draws, + effective for the classical evaluation only. + + * #### Analysis Contempt + By default, contempt is set to prefer the side to move. Set this option to "White" + or "Black" to analyse with contempt for that side, or "Off" to disable contempt. + * #### Move Overhead Assume a time delay of x ms due to network and GUI overheads. This is useful to avoid losses on time in those cases. @@ -138,7 +145,7 @@ Currently, Stockfish has the following UCI options: * #### Debug Log File Write all communication to and from the engine into a text file. -## Classical and NNUE evaluation +## A note on classical and NNUE evaluation Both approaches assign a value to a position that is used in alpha-beta (PVS) search to find the best move. The classical evaluation computes this value as a function @@ -255,6 +262,7 @@ targets with corresponding descriptions. cd src make help make build ARCH=x86-64-modern + make net ``` When not using the Makefile to compile (for instance with Microsoft MSVC) you @@ -266,8 +274,7 @@ compiler you used to create your executable. These informations can be found by typing the following commands in a console: ``` - ./stockfish - compiler + ./stockfish compiler ``` ## Understanding the code base and participating in the project diff --git a/Top CPU Contributors.txt b/Top CPU Contributors.txt index 0ea5ac72..482e9000 100644 --- a/Top CPU Contributors.txt +++ b/Top CPU Contributors.txt @@ -1,154 +1,173 @@ -Contributors with >10,000 CPU hours as of January 7, 2020 +Contributors with >10,000 CPU hours as of Sept 2, 2020 Thank you! Username CPU Hours Games played -------------------------------------------------- -noobpwnftw 9305707 695548021 -mlang 780050 61648867 -dew 621626 43921547 -mibere 524702 42238645 -crunchy 354587 27344275 -cw 354495 27274181 -fastgm 332801 22804359 -JojoM 295750 20437451 -CSU_Dynasty 262015 21828122 -Fisherman 232181 18939229 -ctoks 218866 17622052 -glinscott 201989 13780820 -tvijlbrief 201204 15337115 -velislav 188630 14348485 -gvreuls 187164 15149976 -bking_US 180289 11876016 -nordlandia 172076 13467830 -leszek 157152 11443978 -Thanar 148021 12365359 -spams 141975 10319326 -drabel 138073 11121749 -vdv 137850 9394330 -mgrabiak 133578 10454324 -TueRens 132485 10878471 -bcross 129683 11557084 -marrco 126078 9356740 -sqrt2 125830 9724586 -robal 122873 9593418 -vdbergh 120766 8926915 -malala 115926 8002293 -CoffeeOne 114241 5004100 -dsmith 113189 7570238 -BrunoBanani 104644 7436849 -Data 92328 8220352 -mhoram 89333 6695109 -davar 87924 7009424 -xoto 81094 6869316 -ElbertoOne 80899 7023771 -grandphish2 78067 6160199 -brabos 77212 6186135 -psk 75733 5984901 -BRAVONE 73875 5054681 -sunu 70771 5597972 -sterni1971 70605 5590573 -MaZePallas 66886 5188978 -Vizvezdenec 63708 4967313 -nssy 63462 5259388 -jromang 61634 4940891 -teddybaer 61231 5407666 -Pking_cda 60099 5293873 -solarlight 57469 5028306 -dv8silencer 56913 3883992 -tinker 54936 4086118 -renouve 49732 3501516 -Freja 49543 3733019 -robnjr 46972 4053117 -rap 46563 3219146 -Bobo1239 46036 3817196 -ttruscott 45304 3649765 -racerschmacer 44881 3975413 -finfish 44764 3370515 -eva42 41783 3599691 -biffhero 40263 3111352 -bigpen0r 39817 3291647 -mhunt 38871 2691355 -ronaldjerum 38820 3240695 -Antihistamine 38785 2761312 -pb00067 38038 3086320 -speedycpu 37591 3003273 -rkl 37207 3289580 -VoyagerOne 37050 3441673 -jbwiebe 35320 2805433 -cuistot 34191 2146279 -homyur 33927 2850481 -manap 32873 2327384 -gri 32538 2515779 -oryx 31267 2899051 -EthanOConnor 30959 2090311 -SC 30832 2730764 -csnodgrass 29505 2688994 -jmdana 29458 2205261 -strelock 28219 2067805 -jkiiski 27832 1904470 -Pyafue 27533 1902349 -Garf 27515 2747562 -eastorwest 27421 2317535 -slakovv 26903 2021889 -Prcuvu 24835 2170122 -anst 24714 2190091 -hyperbolic.tom 24319 2017394 -Patrick_G 23687 1801617 -Sharaf_DG 22896 1786697 -nabildanial 22195 1519409 -chriswk 21931 1868317 -achambord 21665 1767323 -Zirie 20887 1472937 -team-oh 20217 1636708 -Isidor 20096 1680691 -ncfish1 19931 1520927 -nesoneg 19875 1463031 -Spprtr 19853 1548165 -JanErik 19849 1703875 -agg177 19478 1395014 -SFTUser 19231 1567999 -xor12 19017 1680165 -sg4032 18431 1641865 -rstoesser 18118 1293588 -MazeOfGalious 17917 1629593 -j3corre 17743 941444 -cisco2015 17725 1690126 -ianh2105 17706 1632562 -dex 17678 1467203 -jundery 17194 1115855 -iisiraider 17019 1101015 -horst.prack 17012 1465656 -Adrian.Schmidt123 16563 1281436 -purplefishies 16342 1092533 -wei 16274 1745989 -ville 16144 1384026 -eudhan 15712 1283717 -OuaisBla 15581 972000 -DragonLord 15559 1162790 -dju 14716 875569 -chris 14479 1487385 -0xB00B1ES 14079 1001120 -OssumOpossum 13776 1007129 -enedene 13460 905279 -bpfliegel 13346 884523 -Ente 13198 1156722 -IgorLeMasson 13087 1147232 -jpulman 13000 870599 -ako027ako 12775 1173203 -Nikolay.IT 12352 1068349 -Andrew Grant 12327 895539 -joster 12008 950160 -AdrianSA 11996 804972 -Nesa92 11455 1111993 -fatmurphy 11345 853210 -Dark_wizzie 11108 1007152 -modolief 10869 896470 -mschmidt 10757 803401 -infinity 10594 727027 -mabichito 10524 749391 -Thomas A. Anderson 10474 732094 -thijsk 10431 719357 -Flopzee 10339 894821 -crocogoat 10104 1013854 -SapphireBrand 10104 969604 -stocky 10017 699440 +noobpwnftw 19352969 1231459677 +mlang 957168 61657446 +dew 949885 56893432 +mibere 703817 46865007 +crunchy 427035 27344275 +cw 416006 27521077 +JojoM 415904 24479564 +fastgm 404873 23953472 +CSU_Dynasty 335774 22850550 +tvijlbrief 335199 21871270 +Fisherman 325053 21786603 +gvreuls 311480 20751516 +ctoks 275877 18710423 +velislav 241267 15596372 +glinscott 217799 13780820 +nordlandia 211692 13484886 +bcross 206213 14934233 +bking_US 198894 11876016 +leszek 189170 11446821 +mgrabiak 183896 11778092 +drabel 181408 12489478 +TueRens 181349 12192000 +Thanar 179852 12365359 +vdv 175171 9881246 +robal 166948 10702862 +spams 157128 10319326 +marrco 149947 9376421 +sqrt2 147963 9724586 +vdbergh 137041 8926915 +CoffeeOne 136294 5004100 +malala 136182 8002293 +mhoram 128934 8177193 +davar 122092 7960001 +dsmith 122059 7570238 +xoto 119696 8222144 +grandphish2 116481 7582197 +Data 113305 8220352 +BrunoBanani 112960 7436849 +ElbertoOne 99028 7023771 +MaZePallas 98571 6362619 +brabos 92118 6186135 +psk 89957 5984901 +sunu 88463 6007033 +sterni1971 86948 5613788 +Vizvezdenec 83752 5343724 +BRAVONE 81239 5054681 +nssy 76497 5259388 +teddybaer 75125 5407666 +Pking_cda 73776 5293873 +jromang 70695 4940891 +solarlight 70517 5028306 +dv8silencer 70287 3883992 +Bobo1239 68515 4652287 +racerschmacer 67468 4935996 +manap 66273 4121774 +tinker 63458 4213726 +linrock 59082 4516053 +robnjr 57262 4053117 +Freja 56938 3733019 +ttruscott 56005 3679485 +renouve 53811 3501516 +cuistot 52532 3014920 +finfish 51360 3370515 +eva42 51272 3599691 +rkl 50759 3840947 +rap 49985 3219146 +pb00067 49727 3298270 +ronaldjerum 47654 3240695 +bigpen0r 47278 3291647 +biffhero 46564 3111352 +VoyagerOne 45386 3445881 +speedycpu 43842 3003273 +jbwiebe 43305 2805433 +Antihistamine 41788 2761312 +mhunt 41735 2691355 +eastorwest 40387 2812173 +homyur 39893 2850481 +gri 39871 2515779 +oryx 38228 2941656 +0x3C33 37773 2529097 +SC 37290 2731014 +csnodgrass 36207 2688994 +jmdana 36108 2205261 +strelock 34716 2074055 +Garf 33800 2747562 +EthanOConnor 33370 2090311 +slakovv 32915 2021889 +Spprtr 32591 2139601 +Prcuvu 30377 2170122 +anst 30301 2190091 +jkiiski 30136 1904470 +hyperbolic.tom 29840 2017394 +Pyafue 29650 1902349 +OuaisBla 27629 1578000 +chriswk 26902 1868317 +achambord 26582 1767323 +Patrick_G 26276 1801617 +yorkman 26193 1992080 +SFTUser 25182 1675689 +nabildanial 24942 1519409 +Sharaf_DG 24765 1786697 +ncfish1 24411 1520927 +agg177 23890 1395014 +JanErik 23408 1703875 +Isidor 23388 1680691 +Norabor 22976 1587862 +cisco2015 22880 1759669 +Zirie 22542 1472937 +team-oh 22272 1636708 +MazeOfGalious 21978 1629593 +sg4032 21945 1643065 +ianh2105 21725 1632562 +xor12 21628 1680365 +dex 21612 1467203 +nesoneg 21494 1463031 +horst.prack 20878 1465656 +0xB00B1ES 20590 1208666 +j3corre 20405 941444 +Adrian.Schmidt123 20316 1281436 +wei 19973 1745989 +rstoesser 19569 1293588 +eudhan 19274 1283717 +Ente 19070 1373058 +jundery 18445 1115855 +iisiraider 18247 1101015 +ville 17883 1384026 +chris 17698 1487385 +purplefishies 17595 1092533 +DragonLord 17014 1162790 +dju 16515 929427 +IgorLeMasson 16064 1147232 +ako027ako 15671 1173203 +Nikolay.IT 15154 1068349 +Andrew Grant 15114 895539 +yurikvelo 15027 1165616 +OssumOpossum 14857 1007129 +enedene 14476 905279 +bpfliegel 14298 884523 +jpulman 13982 870599 +joster 13794 950160 +Nesa92 13786 1114691 +Dark_wizzie 13422 1007152 +Hjax 13350 900887 +Fifis 13313 965473 +mabichito 12903 749391 +thijsk 12886 722107 +crocogoat 12876 1048802 +AdrianSA 12860 804972 +Flopzee 12698 894821 +fatmurphy 12547 853210 +SapphireBrand 12416 969604 +modolief 12386 896470 +scuzzi 12362 833465 +pgontarz 12151 848794 +stocky 11954 699440 +mschmidt 11941 803401 +infinity 11470 727027 +torbjo 11387 728873 +Thomas A. Anderson 11372 732094 +snicolet 11106 869170 +amicic 10779 733593 +rpngn 10712 688203 +d64 10680 771144 +basepi 10637 744851 +jjoshua2 10559 670905 +dzjp 10343 732529 +ols 10259 570669 +lbraesch 10252 647825 diff --git a/appveyor.yml b/appveyor.yml index a3732a23..ab608409 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -63,7 +63,7 @@ build_script: - cmake --build . --config %CONFIGURATION% -- /verbosity:minimal - ps: | # Download default NNUE net from fishtest - $nnuenet = Get-Content -Path src\ucioption.cpp | Select-String -CaseSensitive -Pattern "Option" | Select-String -CaseSensitive -Pattern "nn-[a-z0-9]{12}.nnue" + $nnuenet = Get-Content -Path src\evaluate.h | Select-String -CaseSensitive -Pattern "EvalFileDefaultName" | Select-String -CaseSensitive -Pattern "nn-[a-z0-9]{12}.nnue" $dummy = $nnuenet -match "(?nn-[a-z0-9]{12}.nnue)" $nnuenet = $Matches.nnuenet Write-Host "Default net:" $nnuenet diff --git a/src/Makefile b/src/Makefile index a8ef55e3..552cbcb4 100644 --- a/src/Makefile +++ b/src/Makefile @@ -75,7 +75,8 @@ endif # sse41 = yes/no --- -msse4.1 --- Use Intel Streaming SIMD Extensions 4.1 # avx2 = yes/no --- -mavx2 --- Use Intel Advanced Vector Extensions 2 # avx512 = yes/no --- -mavx512bw --- Use Intel Advanced Vector Extensions 512 -# vnni = yes/no --- -mavx512vnni --- Use Intel Vector Neural Network Instructions 512 +# vnni256 = yes/no --- -mavx512vnni --- Use Intel Vector Neural Network Instructions 256 +# vnni512 = yes/no --- -mavx512vnni --- Use Intel Vector Neural Network Instructions 512 # neon = yes/no --- -DUSE_NEON --- Use ARM SIMD architecture # mpi = yes/no --- -DUSE_MPI --- Use Message Passing Interface # @@ -86,7 +87,19 @@ endif ### 2.1. General and architecture defaults ifeq ($(ARCH),) - empty_arch = yes + ARCH = x86-64-modern + help_skip_sanity = yes +endif +# explicitly check for the list of supported architectures (as listed with make help), +# the user can override with `make ARCH=x86-32-vnni256 SUPPORTED_ARCH=true` +ifeq ($(ARCH), $(filter $(ARCH), \ + x86-64-vnni512 x86-64-vnni256 x86-64-avx512 x86-64-bmi2 x86-64-avx2 \ + x86-64-sse41-popcnt x86-64-modern x86-64-ssse3 x86-64-sse3-popcnt \ + x86-64 x86-32-sse41-popcnt x86-32-sse2 x86-32 ppc-64 ppc-32 \ + armv7 armv7-neon armv8 apple-silicon general-64 general-32)) + SUPPORTED_ARCH=true +else + SUPPORTED_ARCH=false endif optimize = yes @@ -103,10 +116,10 @@ ssse3 = no sse41 = no avx2 = no avx512 = no -vnni = no +vnni256 = no +vnni512 = no neon = no mpi = no -ARCH = x86-64-modern STRIP = strip ### 2.2 Architecture specific @@ -194,7 +207,18 @@ ifeq ($(findstring -avx512,$(ARCH)),-avx512) avx512 = yes endif -ifeq ($(findstring -vnni,$(ARCH)),-vnni) +ifeq ($(findstring -vnni256,$(ARCH)),-vnni256) + popcnt = yes + sse = yes + sse2 = yes + ssse3 = yes + sse41 = yes + avx2 = yes + pext = yes + vnni256 = yes +endif + +ifeq ($(findstring -vnni512,$(ARCH)),-vnni512) popcnt = yes sse = yes sse2 = yes @@ -203,7 +227,7 @@ ifeq ($(findstring -vnni,$(ARCH)),-vnni) avx2 = yes pext = yes avx512 = yes - vnni = yes + vnni512 = yes endif ifeq ($(sse),yes) @@ -243,7 +267,7 @@ ifeq ($(ARCH),armv7-neon) endif ifeq ($(ARCH),armv8) - arch = armv8-a + arch = armv8 prefetch = yes popcnt = yes neon = yes @@ -287,7 +311,7 @@ ifeq ($(COMP),gcc) CXX=g++ CXXFLAGS += -pedantic -Wextra -Wshadow - ifeq ($(arch),$(filter $(arch),armv7 armv8-a)) + ifeq ($(arch),$(filter $(arch),armv7 armv8)) ifeq ($(OS),Android) CXXFLAGS += -m$(bits) LDFLAGS += -m$(bits) @@ -304,9 +328,6 @@ ifeq ($(COMP),gcc) ifneq ($(KERNEL),Darwin) LDFLAGS += -Wl,--no-as-needed endif - - gccversion = $(shell $(CXX) --version) - gccisclang = $(findstring clang,$(gccversion)) endif ifeq ($(COMP),mingw) @@ -362,22 +383,10 @@ ifeq ($(COMP),clang) endif endif -ifeq ($(comp),icc) - profile_make = icc-profile-make - profile_use = icc-profile-use -else -ifeq ($(comp),clang) - profile_make = clang-profile-make - profile_use = clang-profile-use -else - profile_make = gcc-profile-make - profile_use = gcc-profile-use -endif -endif - ifeq ($(KERNEL),Darwin) CXXFLAGS += -arch $(arch) -mmacosx-version-min=10.14 LDFLAGS += -arch $(arch) -mmacosx-version-min=10.14 + XCRUN = xcrun endif # To cross-compile for Android, NDK version r21 or later is recommended. @@ -385,20 +394,30 @@ endif # Currently we don't know how to make PGO builds with the NDK yet. ifeq ($(COMP),ndk) CXXFLAGS += -stdlib=libc++ -fPIE + comp=clang ifeq ($(arch),armv7) - comp=armv7a-linux-androideabi16-clang CXX=armv7a-linux-androideabi16-clang++ CXXFLAGS += -mthumb -march=armv7-a -mfloat-abi=softfp -mfpu=neon STRIP=arm-linux-androideabi-strip endif - ifeq ($(arch),armv8-a) - comp=aarch64-linux-android21-clang + ifeq ($(arch),armv8) CXX=aarch64-linux-android21-clang++ STRIP=aarch64-linux-android-strip endif LDFLAGS += -static-libstdc++ -pie -lm -latomic endif +ifeq ($(comp),icc) + profile_make = icc-profile-make + profile_use = icc-profile-use +else ifeq ($(comp),clang) + profile_make = clang-profile-make + profile_use = clang-profile-use +else + profile_make = gcc-profile-make + profile_use = gcc-profile-use +endif + ### Travis CI script uses COMPILER to overwrite CXX ifdef COMPILER COMPCXX=$(COMPILER) @@ -409,8 +428,19 @@ ifdef COMPCXX CXX=$(COMPCXX) endif +### Sometimes gcc is really clang +ifeq ($(COMP),gcc) + gccversion = $(shell $(CXX) --version) + gccisclang = $(findstring clang,$(gccversion)) + ifneq ($(gccisclang),) + profile_make = clang-profile-make + profile_use = clang-profile-use + endif +endif + ### On mingw use Windows threads, otherwise POSIX ifneq ($(comp),mingw) + CXXFLAGS += -DUSE_PTHREADS # On Android Bionic's C library comes with its own pthread implementation bundled in ifneq ($(OS),Android) # Haiku has pthreads in its libroot, so only link it in on other platforms @@ -469,7 +499,7 @@ endif ### 3.6 popcnt ifeq ($(popcnt),yes) - ifeq ($(arch),$(filter $(arch),ppc64 armv7 armv8-a arm64)) + ifeq ($(arch),$(filter $(arch),ppc64 armv7 armv8 arm64)) CXXFLAGS += -DUSE_POPCNT else ifeq ($(comp),icc) CXXFLAGS += -msse3 -DUSE_POPCNT @@ -493,7 +523,14 @@ ifeq ($(avx512),yes) endif endif -ifeq ($(vnni),yes) +ifeq ($(vnni256),yes) + CXXFLAGS += -DUSE_VNNI + ifeq ($(comp),$(filter $(comp),gcc clang mingw)) + CXXFLAGS += -mavx512f -mavx512bw -mavx512vnni -mavx512dq -mavx512vl -mprefer-vector-width=256 + endif +endif + +ifeq ($(vnni512),yes) CXXFLAGS += -DUSE_VNNI ifeq ($(comp),$(filter $(comp),gcc clang mingw)) CXXFLAGS += -mavx512vnni -mavx512dq -mavx512vl @@ -532,9 +569,11 @@ ifeq ($(neon),yes) CXXFLAGS += -DUSE_NEON ifeq ($(KERNEL),Linux) ifneq ($(COMP),ndk) + ifneq ($(arch),armv8) CXXFLAGS += -mfpu=neon endif endif + endif endif ### 3.7 pext @@ -550,11 +589,13 @@ endif ### needs access to the optimization flags. ifeq ($(optimize),yes) ifeq ($(debug), no) - ifeq ($(COMP),ndk) - CXXFLAGS += -flto=thin - LDFLAGS += $(CXXFLAGS) - else ifeq ($(comp),clang) + ifeq ($(comp),clang) CXXFLAGS += -flto=thin + ifneq ($(findstring MINGW,$(KERNEL)),) + CXXFLAGS += -fuse-ld=lld + else ifneq ($(findstring MSYS,$(KERNEL)),) + CXXFLAGS += -fuse-ld=lld + endif LDFLAGS += $(CXXFLAGS) # GCC and CLANG use different methods for parallelizing LTO and CLANG pretends to be @@ -578,10 +619,12 @@ ifeq ($(debug), no) # So, only enable it for a cross from Linux by default. else ifeq ($(comp),mingw) ifeq ($(KERNEL),Linux) + ifneq ($(arch),i386) CXXFLAGS += -flto LDFLAGS += $(CXXFLAGS) -flto=jobserver endif endif + endif endif endif @@ -603,6 +646,7 @@ endif ### Section 4. Public Targets ### ========================================================================== + help: @echo "" @echo "To compile stockfish, type: " @@ -621,7 +665,8 @@ help: @echo "" @echo "Supported archs:" @echo "" - @echo "x86-64-vnni > x86 64-bit with vnni support" + @echo "x86-64-vnni512 > x86 64-bit with vnni support 512bit wide" + @echo "x86-64-vnni256 > x86 64-bit with vnni support 256bit wide" @echo "x86-64-avx512 > x86 64-bit with avx512 support" @echo "x86-64-bmi2 > x86 64-bit with bmi2 support" @echo "x86-64-avx2 > x86 64-bit with avx2 support" @@ -661,10 +706,13 @@ help: @echo "make -j profile-build ARCH=x86-64-bmi2 COMP=gcc COMPCXX=g++-9.0" @echo "make -j build ARCH=x86-64-ssse3 COMP=clang" @echo "" -ifneq ($(empty_arch), yes) @echo "-------------------------------" +ifeq ($(SUPPORTED_ARCH)$(help_skip_sanity), true) @echo "The selected architecture $(ARCH) will enable the following configuration: " @$(MAKE) ARCH=$(ARCH) COMP=$(COMP) config-sanity +else + @echo "Specify a supported architecture with the ARCH option for more details" + @echo "" endif @@ -672,7 +720,7 @@ endif config-sanity icc-profile-use icc-profile-make gcc-profile-use gcc-profile-make \ clang-profile-use clang-profile-make -build: config-sanity +build: config-sanity net $(MAKE) ARCH=$(ARCH) COMP=$(COMP) all profile-build: net config-sanity objclean profileclean @@ -698,12 +746,13 @@ install: -cp $(EXE) $(BINDIR) -strip $(BINDIR)/$(EXE) -#clean all +# clean all clean: objclean profileclean @rm -f .depend *~ core +# evaluation network (nnue) net: - $(eval nnuenet := $(shell grep EvalFile ucioption.cpp | grep Option | sed 's/.*\(nn-[a-z0-9]\{12\}.nnue\).*/\1/')) + $(eval nnuenet := $(shell grep EvalFileDefaultName evaluate.h | grep define | sed 's/.*\(nn-[a-z0-9]\{12\}.nnue\).*/\1/')) @echo "Default net: $(nnuenet)" $(eval nnuedownloadurl := https://tests.stockfishchess.org/api/nn/$(nnuenet)) $(eval curl_or_wget := $(shell if hash curl 2>/dev/null; then echo "curl -skL"; elif hash wget 2>/dev/null; then echo "wget -qO-"; fi)) @@ -725,7 +774,6 @@ net: echo "shasum / sha256sum not found, skipping net validation"; \ fi - # clean binaries and objects objclean: @rm -f $(EXE) *.o ./syzygy/*.o ./nnue/*.o ./nnue/features/*.o @@ -765,7 +813,8 @@ config-sanity: @echo "sse41: '$(sse41)'" @echo "avx2: '$(avx2)'" @echo "avx512: '$(avx512)'" - @echo "vnni: '$(vnni)'" + @echo "vnni256: '$(vnni256)'" + @echo "vnni512: '$(vnni512)'" @echo "neon: '$(neon)'" @echo "mpi: '$(mpi)'" @echo "" @@ -779,9 +828,10 @@ config-sanity: @test "$(debug)" = "yes" || test "$(debug)" = "no" @test "$(sanitize)" = "undefined" || test "$(sanitize)" = "thread" || test "$(sanitize)" = "address" || test "$(sanitize)" = "no" @test "$(optimize)" = "yes" || test "$(optimize)" = "no" + @test "$(SUPPORTED_ARCH)" = "true" @test "$(arch)" = "any" || test "$(arch)" = "x86_64" || test "$(arch)" = "i386" || \ test "$(arch)" = "ppc64" || test "$(arch)" = "ppc" || \ - test "$(arch)" = "armv7" || test "$(arch)" = "armv8-a" || test "$(arch)" = "arm64" + test "$(arch)" = "armv7" || test "$(arch)" = "armv8" || test "$(arch)" = "arm64" @test "$(bits)" = "32" || test "$(bits)" = "64" @test "$(prefetch)" = "yes" || test "$(prefetch)" = "no" @test "$(popcnt)" = "yes" || test "$(popcnt)" = "no" @@ -793,7 +843,8 @@ config-sanity: @test "$(sse41)" = "yes" || test "$(sse41)" = "no" @test "$(avx2)" = "yes" || test "$(avx2)" = "no" @test "$(avx512)" = "yes" || test "$(avx512)" = "no" - @test "$(vnni)" = "yes" || test "$(vnni)" = "no" + @test "$(vnni256)" = "yes" || test "$(vnni256)" = "no" + @test "$(vnni512)" = "yes" || test "$(vnni512)" = "no" @test "$(neon)" = "yes" || test "$(neon)" = "no" @test "$(comp)" = "gcc" || test "$(comp)" = "icc" || test "$(comp)" = "mingw" || test "$(comp)" = "clang" \ || test "$(comp)" = "armv7a-linux-androideabi16-clang" || test "$(comp)" = "aarch64-linux-android21-clang" @@ -808,7 +859,7 @@ clang-profile-make: all clang-profile-use: - llvm-profdata merge -output=stockfish.profdata *.profraw + $(XCRUN) llvm-profdata merge -output=stockfish.profdata *.profraw $(MAKE) ARCH=$(ARCH) COMP=$(COMP) \ EXTRACXXFLAGS='-fprofile-instr-use=stockfish.profdata' \ EXTRALDFLAGS='-fprofile-use ' \ diff --git a/src/benchmark.cpp b/src/benchmark.cpp index 806e9840..ffb631a2 100644 --- a/src/benchmark.cpp +++ b/src/benchmark.cpp @@ -164,5 +164,7 @@ vector setup_bench(const Position& current, istream& is) { ++posCounter; } + list.emplace_back("setoption name Use NNUE value true"); + return list; } diff --git a/src/bitboard.cpp b/src/bitboard.cpp index f531010c..80206b58 100644 --- a/src/bitboard.cpp +++ b/src/bitboard.cpp @@ -39,6 +39,16 @@ namespace { Bitboard BishopTable[0x1480]; // To store bishop attacks void init_magics(PieceType pt, Bitboard table[], Magic magics[]); + +} + + +/// safe_destination() returns the bitboard of target square for the given step +/// from the given square. If the step is off the board, returns empty bitboard. + +inline Bitboard safe_destination(Square s, int step) { + Square to = Square(s + step); + return is_ok(to) && distance(s, to) <= 2 ? square_bb(to) : Bitboard(0); } @@ -110,7 +120,7 @@ namespace { Direction RookDirections[4] = {NORTH, SOUTH, EAST, WEST}; Direction BishopDirections[4] = {NORTH_EAST, SOUTH_EAST, SOUTH_WEST, NORTH_WEST}; - for(Direction d : (pt == ROOK ? RookDirections : BishopDirections)) + for (Direction d : (pt == ROOK ? RookDirections : BishopDirections)) { Square s = sq; while(safe_destination(s, d) && !(occupied & s)) diff --git a/src/bitboard.h b/src/bitboard.h index a899d879..29d8f66d 100644 --- a/src/bitboard.h +++ b/src/bitboard.h @@ -279,16 +279,6 @@ inline int edge_distance(File f) { return std::min(f, File(FILE_H - f)); } inline int edge_distance(Rank r) { return std::min(r, Rank(RANK_8 - r)); } -/// safe_destination() returns the bitboard of target square for the given step -/// from the given square. If the step is off the board, returns empty bitboard. - -inline Bitboard safe_destination(Square s, int step) -{ - Square to = Square(s + step); - return is_ok(to) && distance(s, to) <= 2 ? square_bb(to) : Bitboard(0); -} - - /// attacks_bb(Square) returns the pseudo attacks of the give piece type /// assuming an empty board. diff --git a/src/evaluate.cpp b/src/evaluate.cpp index c08144de..694a15ea 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -20,53 +20,129 @@ #include #include #include // For std::memset +#include #include #include #include +#include +#include #include "bitboard.h" #include "cluster.h" #include "evaluate.h" #include "material.h" +#include "misc.h" #include "pawns.h" #include "thread.h" #include "uci.h" +#include "incbin/incbin.h" + + +// Macro to embed the default NNUE file data in the engine binary (using incbin.h, by Dale Weiler). +// This macro invocation will declare the following three variables +// const unsigned char gEmbeddedNNUEData[]; // a pointer to the embedded data +// const unsigned char *const gEmbeddedNNUEEnd; // a marker to the end +// const unsigned int gEmbeddedNNUESize; // the size of the embedded file +// Note that this does not work in Microsof Visual Studio. +#if !defined(_MSC_VER) && !defined(NNUE_EMBEDDING_OFF) + INCBIN(EmbeddedNNUE, EvalFileDefaultName); +#else + const unsigned char gEmbeddedNNUEData[1] = {0x0}; + const unsigned char *const gEmbeddedNNUEEnd = &gEmbeddedNNUEData[1]; + const unsigned int gEmbeddedNNUESize = 1; +#endif + + +using namespace std; +using namespace Eval::NNUE; namespace Eval { bool useNNUE; - std::string eval_file_loaded="None"; + string eval_file_loaded = "None"; + + /// init_NNUE() tries to load a nnue network at startup time, or when the engine + /// receives a UCI command "setoption name EvalFile value nn-[a-z0-9]{12}.nnue" + /// The name of the nnue network is always retrieved from the EvalFile option. + /// We search the given network in three locations: internally (the default + /// network may be embedded in the binary), in the active working directory and + /// in the engine directory. Distro packagers may define the DEFAULT_NNUE_DIRECTORY + /// variable to have the engine search in a special directory in their distro. void init_NNUE() { useNNUE = Options["Use NNUE"]; - std::string eval_file = std::string(Options["EvalFile"]); - if (useNNUE && eval_file_loaded != eval_file) - if (Eval::NNUE::load_eval_file(eval_file)) - eval_file_loaded = eval_file; + if (!useNNUE) + return; + + string eval_file = string(Options["EvalFile"]); + + #if defined(DEFAULT_NNUE_DIRECTORY) + #define stringify2(x) #x + #define stringify(x) stringify2(x) + vector dirs = { "" , "" , CommandLine::binaryDirectory , stringify(DEFAULT_NNUE_DIRECTORY) }; + #else + vector dirs = { "" , "" , CommandLine::binaryDirectory }; + #endif + + for (string directory : dirs) + if (eval_file_loaded != eval_file) + { + if (directory != "") + { + ifstream stream(directory + eval_file, ios::binary); + if (load_eval(eval_file, stream)) + eval_file_loaded = eval_file; + } + + if (directory == "" && eval_file == EvalFileDefaultName) + { + // C++ way to prepare a buffer for a memory stream + class MemoryBuffer : public basic_streambuf { + public: MemoryBuffer(char* p, size_t n) { setg(p, p, p + n); setp(p, p + n); } + }; + + MemoryBuffer buffer(const_cast(reinterpret_cast(gEmbeddedNNUEData)), + size_t(gEmbeddedNNUESize)); + + istream stream(&buffer); + if (load_eval(eval_file, stream)) + eval_file_loaded = eval_file; + } + } } + /// verify_NNUE() verifies that the last net used was loaded successfully void verify_NNUE() { - std::string eval_file = std::string(Options["EvalFile"]); + string eval_file = string(Options["EvalFile"]); + if (useNNUE && eval_file_loaded != eval_file) { UCI::OptionsMap defaults; UCI::init(defaults); - std::cerr << "NNUE evaluation used, but the network file " << eval_file << " was not loaded successfully. " - << "These network evaluation parameters must be available, and compatible with this version of the code. " - << "The UCI option EvalFile might need to specify the full path, including the directory/folder name, to the file. " - << "The default net can be downloaded from: https://tests.stockfishchess.org/api/nn/"+std::string(defaults["EvalFile"]) << std::endl; - std::exit(EXIT_FAILURE); + string msg1 = "If the UCI option \"Use NNUE\" is set to true, network evaluation parameters compatible with the engine must be available."; + string msg2 = "The option is set to true, but the network file " + eval_file + " was not loaded successfully."; + string msg3 = "The UCI option EvalFile might need to specify the full path, including the directory name, to the network file."; + string msg4 = "The default net can be downloaded from: https://tests.stockfishchess.org/api/nn/" + string(defaults["EvalFile"]); + string msg5 = "The engine will be terminated now."; + + sync_cout << "info string ERROR: " << msg1 << sync_endl; + sync_cout << "info string ERROR: " << msg2 << sync_endl; + sync_cout << "info string ERROR: " << msg3 << sync_endl; + sync_cout << "info string ERROR: " << msg4 << sync_endl; + sync_cout << "info string ERROR: " << msg5 << sync_endl; + + exit(EXIT_FAILURE); } if (Cluster::is_root()) { if (useNNUE) - sync_cout << "info string NNUE evaluation using " << eval_file << " enabled." << sync_endl; + sync_cout << "info string NNUE evaluation using " << eval_file << " enabled" << sync_endl; else - sync_cout << "info string classical evaluation enabled." << sync_endl; + sync_cout << "info string classical evaluation enabled" << sync_endl; } } } @@ -155,26 +231,26 @@ namespace { // Outpost[knight/bishop] contains bonuses for each knight or bishop occupying a // pawn protected square on rank 4 to 6 which is also safe from a pawn attack. - constexpr Score Outpost[] = { S(56, 36), S(30, 23) }; + constexpr Score Outpost[] = { S(56, 34), S(31, 23) }; // PassedRank[Rank] contains a bonus according to the rank of a passed pawn constexpr Score PassedRank[RANK_NB] = { - S(0, 0), S(10, 28), S(17, 33), S(15, 41), S(62, 72), S(168, 177), S(276, 260) + S(0, 0), S(9, 28), S(15, 31), S(17, 39), S(64, 70), S(171, 177), S(277, 260) }; // RookOnFile[semiopen/open] contains bonuses for each rook when there is // no (friendly) pawn on the rook file. - constexpr Score RookOnFile[] = { S(19, 7), S(48, 29) }; + constexpr Score RookOnFile[] = { S(19, 7), S(48, 27) }; // ThreatByMinor/ByRook[attacked PieceType] contains bonuses according to // which piece type attacks which one. Attacks on lesser pieces which are // pawn-defended are not considered. constexpr Score ThreatByMinor[PIECE_TYPE_NB] = { - S(0, 0), S(5, 32), S(57, 41), S(77, 56), S(88, 119), S(79, 161) + S(0, 0), S(5, 32), S(55, 41), S(77, 56), S(89, 119), S(79, 162) }; constexpr Score ThreatByRook[PIECE_TYPE_NB] = { - S(0, 0), S(3, 46), S(37, 68), S(42, 60), S(0, 38), S(58, 41) + S(0, 0), S(3, 44), S(37, 68), S(42, 60), S(0, 39), S(58, 43) }; // Assorted bonuses and penalties @@ -291,8 +367,8 @@ namespace { attackedBy2[Us] = dblAttackByPawn | (attackedBy[Us][KING] & attackedBy[Us][PAWN]); // Init our king safety tables - Square s = make_square(Utility::clamp(file_of(ksq), FILE_B, FILE_G), - Utility::clamp(rank_of(ksq), RANK_2, RANK_7)); + Square s = make_square(std::clamp(file_of(ksq), FILE_B, FILE_G), + std::clamp(rank_of(ksq), RANK_2, RANK_7)); kingRing[Us] = attacks_bb(s) | s; kingAttackersCount[Them] = popcount(kingRing[Us] & pe->pawn_attacks(Them)); @@ -689,8 +765,8 @@ namespace { Square blockSq = s + Up; // Adjust bonus based on the king's proximity - bonus += make_score(0, ( (king_proximity(Them, blockSq) * 19) / 4 - - king_proximity(Us, blockSq) * 2) * w); + bonus += make_score(0, ( king_proximity(Them, blockSq) * 19 / 4 + - king_proximity(Us, blockSq) * 2) * w); // If blockSq is not the queening square then consider also a second push if (r != RANK_7) @@ -734,7 +810,7 @@ namespace { // Evaluation::space() computes a space evaluation for a given side, aiming to improve game - // play in the opening. It is based on the number of safe squares on the 4 central files + // play in the opening. It is based on the number of safe squares on the four central files // on ranks 2 to 4. Completely safe squares behind a friendly pawn are counted twice. // Finally, the space bonus is multiplied by a weight which decreases according to occupancy. @@ -807,7 +883,7 @@ namespace { // Now apply the bonus: note that we find the attacking side by extracting the // sign of the midgame or endgame values, and that we carefully cap the bonus // so that the midgame and endgame scores do not change sign after the bonus. - int u = ((mg > 0) - (mg < 0)) * Utility::clamp(complexity + 50, -abs(mg), 0); + int u = ((mg > 0) - (mg < 0)) * std::clamp(complexity + 50, -abs(mg), 0); int v = ((eg > 0) - (eg < 0)) * std::max(complexity, -abs(eg)); mg += u; @@ -943,19 +1019,26 @@ make_v: Value Eval::evaluate(const Position& pos) { + // Use classical eval if there is a large imbalance + // If there is a moderate imbalance, use classical eval with probability (1/8), + // as derived from the node counter. + bool useClassical = abs(eg_value(pos.psq_score())) * 16 > NNUEThreshold1 * (16 + pos.rule50_count()); bool classical = !Eval::useNNUE - || abs(eg_value(pos.psq_score())) * 16 > NNUEThreshold1 * (16 + pos.rule50_count()); + || useClassical + || (abs(eg_value(pos.psq_score())) > PawnValueMg / 4 && !(pos.this_thread()->nodes & 0xB)); Value v = classical ? Evaluation(pos).value() : NNUE::evaluate(pos) * 5 / 4 + Tempo; - if (classical && Eval::useNNUE && abs(v) * 16 < NNUEThreshold2 * (16 + pos.rule50_count())) + if ( useClassical + && Eval::useNNUE + && abs(v) * 16 < NNUEThreshold2 * (16 + pos.rule50_count())) v = NNUE::evaluate(pos) * 5 / 4 + Tempo; // Damp down the evaluation linearly when shuffling v = v * (100 - pos.rule50_count()) / 100; - // Guarantee evalution outside of TB range - v = Utility::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1); + // Guarantee evaluation does not hit the tablebase range + v = std::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1); return v; } @@ -975,42 +1058,46 @@ std::string Eval::trace(const Position& pos) { Value v; - if (Eval::useNNUE) - { - v = NNUE::evaluate(pos); - } - else - { - std::memset(scores, 0, sizeof(scores)); + std::memset(scores, 0, sizeof(scores)); - pos.this_thread()->contempt = SCORE_ZERO; // Reset any dynamic contempt + pos.this_thread()->contempt = SCORE_ZERO; // Reset any dynamic contempt - v = Evaluation(pos).value(); + v = Evaluation(pos).value(); - ss << std::showpoint << std::noshowpos << std::fixed << std::setprecision(2) - << " Term | White | Black | Total \n" - << " | MG EG | MG EG | MG EG \n" - << " ------------+-------------+-------------+------------\n" - << " Material | " << Term(MATERIAL) - << " Imbalance | " << Term(IMBALANCE) - << " Pawns | " << Term(PAWN) - << " Knights | " << Term(KNIGHT) - << " Bishops | " << Term(BISHOP) - << " Rooks | " << Term(ROOK) - << " Queens | " << Term(QUEEN) - << " Mobility | " << Term(MOBILITY) - << " King safety | " << Term(KING) - << " Threats | " << Term(THREAT) - << " Passed | " << Term(PASSED) - << " Space | " << Term(SPACE) - << " Winnable | " << Term(WINNABLE) - << " ------------+-------------+-------------+------------\n" - << " Total | " << Term(TOTAL); - } + ss << std::showpoint << std::noshowpos << std::fixed << std::setprecision(2) + << " Term | White | Black | Total \n" + << " | MG EG | MG EG | MG EG \n" + << " ------------+-------------+-------------+------------\n" + << " Material | " << Term(MATERIAL) + << " Imbalance | " << Term(IMBALANCE) + << " Pawns | " << Term(PAWN) + << " Knights | " << Term(KNIGHT) + << " Bishops | " << Term(BISHOP) + << " Rooks | " << Term(ROOK) + << " Queens | " << Term(QUEEN) + << " Mobility | " << Term(MOBILITY) + << " King safety | " << Term(KING) + << " Threats | " << Term(THREAT) + << " Passed | " << Term(PASSED) + << " Space | " << Term(SPACE) + << " Winnable | " << Term(WINNABLE) + << " ------------+-------------+-------------+------------\n" + << " Total | " << Term(TOTAL); v = pos.side_to_move() == WHITE ? v : -v; - ss << "\nFinal evaluation: " << to_cp(v) << " (white side)\n"; + ss << "\nClassical evaluation: " << to_cp(v) << " (white side)\n"; + + if (Eval::useNNUE) + { + v = NNUE::evaluate(pos); + v = pos.side_to_move() == WHITE ? v : -v; + ss << "\nNNUE evaluation: " << to_cp(v) << " (white side)\n"; + } + + v = evaluate(pos); + v = pos.side_to_move() == WHITE ? v : -v; + ss << "\nFinal evaluation: " << to_cp(v) << " (white side)\n"; return ss.str(); } diff --git a/src/evaluate.h b/src/evaluate.h index e808068d..c723bd8f 100644 --- a/src/evaluate.h +++ b/src/evaluate.h @@ -35,12 +35,17 @@ namespace Eval { void init_NNUE(); void verify_NNUE(); + // The default net name MUST follow the format nn-[SHA256 first 12 digits].nnue + // for the build process (profile-build and fishtest) to work. Do not change the + // name of the macro, as it is used in the Makefile. + #define EvalFileDefaultName "nn-03744f8d56d8.nnue" + namespace NNUE { Value evaluate(const Position& pos); Value compute_eval(const Position& pos); void update_eval(const Position& pos); - bool load_eval_file(const std::string& evalFile); + bool load_eval(std::string streamName, std::istream& stream); } // namespace NNUE diff --git a/src/incbin/UNLICENCE b/src/incbin/UNLICENCE new file mode 100644 index 00000000..32484ab5 --- /dev/null +++ b/src/incbin/UNLICENCE @@ -0,0 +1,26 @@ +The file "incbin.h" is free and unencumbered software released into +the public domain by Dale Weiler, see: + + +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. + +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to diff --git a/src/incbin/incbin.h b/src/incbin/incbin.h new file mode 100755 index 00000000..c19684d7 --- /dev/null +++ b/src/incbin/incbin.h @@ -0,0 +1,368 @@ +/** + * @file incbin.h + * @author Dale Weiler + * @brief Utility for including binary files + * + * Facilities for including binary files into the current translation unit and + * making use from them externally in other translation units. + */ +#ifndef INCBIN_HDR +#define INCBIN_HDR +#include +#if defined(__AVX512BW__) || \ + defined(__AVX512CD__) || \ + defined(__AVX512DQ__) || \ + defined(__AVX512ER__) || \ + defined(__AVX512PF__) || \ + defined(__AVX512VL__) || \ + defined(__AVX512F__) +# define INCBIN_ALIGNMENT_INDEX 6 +#elif defined(__AVX__) || \ + defined(__AVX2__) +# define INCBIN_ALIGNMENT_INDEX 5 +#elif defined(__SSE__) || \ + defined(__SSE2__) || \ + defined(__SSE3__) || \ + defined(__SSSE3__) || \ + defined(__SSE4_1__) || \ + defined(__SSE4_2__) || \ + defined(__neon__) +# define INCBIN_ALIGNMENT_INDEX 4 +#elif ULONG_MAX != 0xffffffffu +# define INCBIN_ALIGNMENT_INDEX 3 +# else +# define INCBIN_ALIGNMENT_INDEX 2 +#endif + +/* Lookup table of (1 << n) where `n' is `INCBIN_ALIGNMENT_INDEX' */ +#define INCBIN_ALIGN_SHIFT_0 1 +#define INCBIN_ALIGN_SHIFT_1 2 +#define INCBIN_ALIGN_SHIFT_2 4 +#define INCBIN_ALIGN_SHIFT_3 8 +#define INCBIN_ALIGN_SHIFT_4 16 +#define INCBIN_ALIGN_SHIFT_5 32 +#define INCBIN_ALIGN_SHIFT_6 64 + +/* Actual alignment value */ +#define INCBIN_ALIGNMENT \ + INCBIN_CONCATENATE( \ + INCBIN_CONCATENATE(INCBIN_ALIGN_SHIFT, _), \ + INCBIN_ALIGNMENT_INDEX) + +/* Stringize */ +#define INCBIN_STR(X) \ + #X +#define INCBIN_STRINGIZE(X) \ + INCBIN_STR(X) +/* Concatenate */ +#define INCBIN_CAT(X, Y) \ + X ## Y +#define INCBIN_CONCATENATE(X, Y) \ + INCBIN_CAT(X, Y) +/* Deferred macro expansion */ +#define INCBIN_EVAL(X) \ + X +#define INCBIN_INVOKE(N, ...) \ + INCBIN_EVAL(N(__VA_ARGS__)) + +/* Green Hills uses a different directive for including binary data */ +#if defined(__ghs__) +# if (__ghs_asm == 2) +# define INCBIN_MACRO ".file" +/* Or consider the ".myrawdata" entry in the ld file */ +# else +# define INCBIN_MACRO "\tINCBIN" +# endif +#else +# define INCBIN_MACRO ".incbin" +#endif + +#ifndef _MSC_VER +# define INCBIN_ALIGN \ + __attribute__((aligned(INCBIN_ALIGNMENT))) +#else +# define INCBIN_ALIGN __declspec(align(INCBIN_ALIGNMENT)) +#endif + +#if defined(__arm__) || /* GNU C and RealView */ \ + defined(__arm) || /* Diab */ \ + defined(_ARM) /* ImageCraft */ +# define INCBIN_ARM +#endif + +#ifdef __GNUC__ +/* Utilize .balign where supported */ +# define INCBIN_ALIGN_HOST ".balign " INCBIN_STRINGIZE(INCBIN_ALIGNMENT) "\n" +# define INCBIN_ALIGN_BYTE ".balign 1\n" +#elif defined(INCBIN_ARM) +/* + * On arm assemblers, the alignment value is calculated as (1 << n) where `n' is + * the shift count. This is the value passed to `.align' + */ +# define INCBIN_ALIGN_HOST ".align " INCBIN_STRINGIZE(INCBIN_ALIGNMENT_INDEX) "\n" +# define INCBIN_ALIGN_BYTE ".align 0\n" +#else +/* We assume other inline assembler's treat `.align' as `.balign' */ +# define INCBIN_ALIGN_HOST ".align " INCBIN_STRINGIZE(INCBIN_ALIGNMENT) "\n" +# define INCBIN_ALIGN_BYTE ".align 1\n" +#endif + +/* INCBIN_CONST is used by incbin.c generated files */ +#if defined(__cplusplus) +# define INCBIN_EXTERNAL extern "C" +# define INCBIN_CONST extern const +#else +# define INCBIN_EXTERNAL extern +# define INCBIN_CONST const +#endif + +/** + * @brief Optionally override the linker section into which data is emitted. + * + * @warning If you use this facility, you'll have to deal with platform-specific linker output + * section naming on your own + * + * Overriding the default linker output section, e.g for esp8266/Arduino: + * @code + * #define INCBIN_OUTPUT_SECTION ".irom.text" + * #include "incbin.h" + * INCBIN(Foo, "foo.txt"); + * // Data is emitted into program memory that never gets copied to RAM + * @endcode + */ +#if !defined(INCBIN_OUTPUT_SECTION) +# if defined(__APPLE__) +# define INCBIN_OUTPUT_SECTION ".const_data" +# else +# define INCBIN_OUTPUT_SECTION ".rodata" +# endif +#endif + +#if defined(__APPLE__) +/* The directives are different for Apple branded compilers */ +# define INCBIN_SECTION INCBIN_OUTPUT_SECTION "\n" +# define INCBIN_GLOBAL(NAME) ".globl " INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME "\n" +# define INCBIN_INT ".long " +# define INCBIN_MANGLE "_" +# define INCBIN_BYTE ".byte " +# define INCBIN_TYPE(...) +#else +# define INCBIN_SECTION ".section " INCBIN_OUTPUT_SECTION "\n" +# define INCBIN_GLOBAL(NAME) ".global " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME "\n" +# if defined(__ghs__) +# define INCBIN_INT ".word " +# else +# define INCBIN_INT ".int " +# endif +# if defined(__USER_LABEL_PREFIX__) +# define INCBIN_MANGLE INCBIN_STRINGIZE(__USER_LABEL_PREFIX__) +# else +# define INCBIN_MANGLE "" +# endif +# if defined(INCBIN_ARM) +/* On arm assemblers, `@' is used as a line comment token */ +# define INCBIN_TYPE(NAME) ".type " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME ", %object\n" +# elif defined(__MINGW32__) || defined(__MINGW64__) +/* Mingw doesn't support this directive either */ +# define INCBIN_TYPE(NAME) +# else +/* It's safe to use `@' on other architectures */ +# define INCBIN_TYPE(NAME) ".type " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME ", @object\n" +# endif +# define INCBIN_BYTE ".byte " +#endif + +/* List of style types used for symbol names */ +#define INCBIN_STYLE_CAMEL 0 +#define INCBIN_STYLE_SNAKE 1 + +/** + * @brief Specify the prefix to use for symbol names. + * + * By default this is `g', producing symbols of the form: + * @code + * #include "incbin.h" + * INCBIN(Foo, "foo.txt"); + * + * // Now you have the following symbols: + * // const unsigned char gFooData[]; + * // const unsigned char *const gFooEnd; + * // const unsigned int gFooSize; + * @endcode + * + * If however you specify a prefix before including: e.g: + * @code + * #define INCBIN_PREFIX incbin + * #include "incbin.h" + * INCBIN(Foo, "foo.txt"); + * + * // Now you have the following symbols instead: + * // const unsigned char incbinFooData[]; + * // const unsigned char *const incbinFooEnd; + * // const unsigned int incbinFooSize; + * @endcode + */ +#if !defined(INCBIN_PREFIX) +# define INCBIN_PREFIX g +#endif + +/** + * @brief Specify the style used for symbol names. + * + * Possible options are + * - INCBIN_STYLE_CAMEL "CamelCase" + * - INCBIN_STYLE_SNAKE "snake_case" + * + * Default option is *INCBIN_STYLE_CAMEL* producing symbols of the form: + * @code + * #include "incbin.h" + * INCBIN(Foo, "foo.txt"); + * + * // Now you have the following symbols: + * // const unsigned char FooData[]; + * // const unsigned char *const FooEnd; + * // const unsigned int FooSize; + * @endcode + * + * If however you specify a style before including: e.g: + * @code + * #define INCBIN_STYLE INCBIN_STYLE_SNAKE + * #include "incbin.h" + * INCBIN(foo, "foo.txt"); + * + * // Now you have the following symbols: + * // const unsigned char foo_data[]; + * // const unsigned char *const foo_end; + * // const unsigned int foo_size; + * @endcode + */ +#if !defined(INCBIN_STYLE) +# define INCBIN_STYLE INCBIN_STYLE_CAMEL +#endif + +/* Style lookup tables */ +#define INCBIN_STYLE_0_DATA Data +#define INCBIN_STYLE_0_END End +#define INCBIN_STYLE_0_SIZE Size +#define INCBIN_STYLE_1_DATA _data +#define INCBIN_STYLE_1_END _end +#define INCBIN_STYLE_1_SIZE _size + +/* Style lookup: returning identifier */ +#define INCBIN_STYLE_IDENT(TYPE) \ + INCBIN_CONCATENATE( \ + INCBIN_STYLE_, \ + INCBIN_CONCATENATE( \ + INCBIN_EVAL(INCBIN_STYLE), \ + INCBIN_CONCATENATE(_, TYPE))) + +/* Style lookup: returning string literal */ +#define INCBIN_STYLE_STRING(TYPE) \ + INCBIN_STRINGIZE( \ + INCBIN_STYLE_IDENT(TYPE)) \ + +/* Generate the global labels by indirectly invoking the macro with our style + * type and concatenating the name against them. */ +#define INCBIN_GLOBAL_LABELS(NAME, TYPE) \ + INCBIN_INVOKE( \ + INCBIN_GLOBAL, \ + INCBIN_CONCATENATE( \ + NAME, \ + INCBIN_INVOKE( \ + INCBIN_STYLE_IDENT, \ + TYPE))) \ + INCBIN_INVOKE( \ + INCBIN_TYPE, \ + INCBIN_CONCATENATE( \ + NAME, \ + INCBIN_INVOKE( \ + INCBIN_STYLE_IDENT, \ + TYPE))) + +/** + * @brief Externally reference binary data included in another translation unit. + * + * Produces three external symbols that reference the binary data included in + * another translation unit. + * + * The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with + * "Data", as well as "End" and "Size" after. An example is provided below. + * + * @param NAME The name given for the binary data + * + * @code + * INCBIN_EXTERN(Foo); + * + * // Now you have the following symbols: + * // extern const unsigned char FooData[]; + * // extern const unsigned char *const FooEnd; + * // extern const unsigned int FooSize; + * @endcode + */ +#define INCBIN_EXTERN(NAME) \ + INCBIN_EXTERNAL const INCBIN_ALIGN unsigned char \ + INCBIN_CONCATENATE( \ + INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \ + INCBIN_STYLE_IDENT(DATA))[]; \ + INCBIN_EXTERNAL const INCBIN_ALIGN unsigned char *const \ + INCBIN_CONCATENATE( \ + INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \ + INCBIN_STYLE_IDENT(END)); \ + INCBIN_EXTERNAL const unsigned int \ + INCBIN_CONCATENATE( \ + INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \ + INCBIN_STYLE_IDENT(SIZE)) + +/** + * @brief Include a binary file into the current translation unit. + * + * Includes a binary file into the current translation unit, producing three symbols + * for objects that encode the data and size respectively. + * + * The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with + * "Data", as well as "End" and "Size" after. An example is provided below. + * + * @param NAME The name to associate with this binary data (as an identifier.) + * @param FILENAME The file to include (as a string literal.) + * + * @code + * INCBIN(Icon, "icon.png"); + * + * // Now you have the following symbols: + * // const unsigned char IconData[]; + * // const unsigned char *const IconEnd; + * // const unsigned int IconSize; + * @endcode + * + * @warning This must be used in global scope + * @warning The identifiers may be different if INCBIN_STYLE is not default + * + * To externally reference the data included by this in another translation unit + * please @see INCBIN_EXTERN. + */ +#ifdef _MSC_VER +#define INCBIN(NAME, FILENAME) \ + INCBIN_EXTERN(NAME) +#else +#define INCBIN(NAME, FILENAME) \ + __asm__(INCBIN_SECTION \ + INCBIN_GLOBAL_LABELS(NAME, DATA) \ + INCBIN_ALIGN_HOST \ + INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(DATA) ":\n" \ + INCBIN_MACRO " \"" FILENAME "\"\n" \ + INCBIN_GLOBAL_LABELS(NAME, END) \ + INCBIN_ALIGN_BYTE \ + INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(END) ":\n" \ + INCBIN_BYTE "1\n" \ + INCBIN_GLOBAL_LABELS(NAME, SIZE) \ + INCBIN_ALIGN_HOST \ + INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(SIZE) ":\n" \ + INCBIN_INT INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(END) " - " \ + INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(DATA) "\n" \ + INCBIN_ALIGN_HOST \ + ".text\n" \ + ); \ + INCBIN_EXTERN(NAME) + +#endif +#endif diff --git a/src/main.cpp b/src/main.cpp index fc2f4029..94c6a2a7 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -37,6 +37,7 @@ int main(int argc, char* argv[]) { if (Cluster::is_root()) std::cout << engine_info() << std::endl; + CommandLine::init(argc, argv); UCI::init(Options); Tune::init(); PSQT::init(); diff --git a/src/material.cpp b/src/material.cpp index 0ef9926f..870a5e11 100644 --- a/src/material.cpp +++ b/src/material.cpp @@ -130,7 +130,7 @@ Entry* probe(const Position& pos) { Value npm_w = pos.non_pawn_material(WHITE); Value npm_b = pos.non_pawn_material(BLACK); - Value npm = Utility::clamp(npm_w + npm_b, EndgameLimit, MidgameLimit); + Value npm = std::clamp(npm_w + npm_b, EndgameLimit, MidgameLimit); // Map total non-pawn material into [PHASE_ENDGAME, PHASE_MIDGAME] e->gamePhase = Phase(((npm - EndgameLimit) * PHASE_MIDGAME) / (MidgameLimit - EndgameLimit)); diff --git a/src/misc.cpp b/src/misc.cpp index 459ea100..3fbdea35 100644 --- a/src/misc.cpp +++ b/src/misc.cpp @@ -132,6 +132,7 @@ public: } // namespace + /// engine_info() returns the full name of the current Stockfish version. This /// will be either "Stockfish DD-MM-YY" (where DD-MM-YY is the date when /// the program was compiled) or "Stockfish ", depending on whether @@ -328,16 +329,16 @@ void prefetch(void* addr) { #endif -/// Wrappers for systems where the c++17 implementation doesn't guarantee the availability of aligned_alloc. -/// Memory allocated with std_aligned_alloc must be freed with std_aligned_free. -/// + +/// std_aligned_alloc() is our wrapper for systems where the c++17 implementation +/// does not guarantee the availability of aligned_alloc(). Memory allocated with +/// std_aligned_alloc() must be freed with std_aligned_free(). void* std_aligned_alloc(size_t alignment, size_t size) { + #if defined(POSIXALIGNEDALLOC) - void *pointer; - if(posix_memalign(&pointer, alignment, size) == 0) - return pointer; - return nullptr; + void *mem; + return posix_memalign(&mem, alignment, size) ? nullptr : mem; #elif defined(_WIN32) return _mm_malloc(size, alignment); #else @@ -346,6 +347,7 @@ void* std_aligned_alloc(size_t alignment, size_t size) { } void std_aligned_free(void* ptr) { + #if defined(POSIXALIGNEDALLOC) free(ptr); #elif defined(_WIN32) @@ -355,7 +357,7 @@ void std_aligned_free(void* ptr) { #endif } -/// aligned_ttmem_alloc() will return suitably aligned memory, and if possible use large pages. +/// aligned_ttmem_alloc() will return suitably aligned memory, if possible using large pages. /// The returned pointer is the aligned one, while the mem argument is the one that needs /// to be passed to free. With c++17 some of this functionality could be simplified. @@ -367,7 +369,9 @@ void* aligned_ttmem_alloc(size_t allocSize, void*& mem) { size_t size = ((allocSize + alignment - 1) / alignment) * alignment; // multiple of alignment if (posix_memalign(&mem, alignment, size)) mem = nullptr; +#if defined(MADV_HUGEPAGE) madvise(mem, allocSize, MADV_HUGEPAGE); +#endif return mem; } @@ -586,3 +590,61 @@ void bindThisThread(size_t idx) { #endif } // namespace WinProcGroup + +#ifdef _WIN32 +#include +#define GETCWD _getcwd +#else +#include +#define GETCWD getcwd +#endif + +namespace CommandLine { + +string argv0; // path+name of the executable binary, as given by argv[0] +string binaryDirectory; // path of the executable directory +string workingDirectory; // path of the working directory +string pathSeparator; // Separator for our current OS + +void init(int argc, char* argv[]) { + (void)argc; + string separator; + + // extract the path+name of the executable binary + argv0 = argv[0]; + +#ifdef _WIN32 + pathSeparator = "\\"; + #ifdef _MSC_VER + // Under windows argv[0] may not have the extension. Also _get_pgmptr() had + // issues in some windows 10 versions, so check returned values carefully. + char* pgmptr = nullptr; + if (!_get_pgmptr(&pgmptr) && pgmptr != nullptr && *pgmptr) + argv0 = pgmptr; + #endif +#else + pathSeparator = "/"; +#endif + + // extract the working directory + workingDirectory = ""; + char buff[40000]; + char* cwd = GETCWD(buff, 40000); + if (cwd) + workingDirectory = cwd; + + // extract the binary directory path from argv0 + binaryDirectory = argv0; + size_t pos = binaryDirectory.find_last_of("\\/"); + if (pos == std::string::npos) + binaryDirectory = "." + pathSeparator; + else + binaryDirectory.resize(pos + 1); + + // pattern replacement: "./" at the start of path is replaced by the working directory + if (binaryDirectory.find("." + pathSeparator) == 0) + binaryDirectory.replace(0, 1, workingDirectory); +} + + +} // namespace CommandLine diff --git a/src/misc.h b/src/misc.h index eb4e05c0..68b9c884 100644 --- a/src/misc.h +++ b/src/misc.h @@ -42,9 +42,7 @@ void dbg_mean_of(int v); void dbg_print(); typedef std::chrono::milliseconds::rep TimePoint; // A value in milliseconds - static_assert(sizeof(TimePoint) == sizeof(int64_t), "TimePoint should be 64 bits"); - inline TimePoint now() { return std::chrono::duration_cast (std::chrono::steady_clock::now().time_since_epoch()).count(); @@ -65,14 +63,6 @@ std::ostream& operator<<(std::ostream&, SyncCout); #define sync_cout std::cout << IO_LOCK #define sync_endl std::endl << IO_UNLOCK -namespace Utility { - -/// Clamp a value between lo and hi. Available in c++17. -template constexpr const T& clamp(const T& v, const T& lo, const T& hi) { - return v < lo ? lo : v > hi ? hi : v; -} - -} /// xorshift64star Pseudo-Random Number Generator /// This class is based on original code written and dedicated @@ -134,4 +124,11 @@ namespace WinProcGroup { void bindThisThread(size_t idx); } +namespace CommandLine { + void init(int argc, char* argv[]); + + extern std::string binaryDirectory; // path of the executable directory + extern std::string workingDirectory; // path of the working directory +} + #endif // #ifndef MISC_H_INCLUDED diff --git a/src/movegen.cpp b/src/movegen.cpp index d74df4c3..3340f65c 100644 --- a/src/movegen.cpp +++ b/src/movegen.cpp @@ -248,7 +248,7 @@ namespace { *moveList++ = make_move(ksq, pop_lsb(&b)); if ((Type != CAPTURES) && pos.can_castle(Us & ANY_CASTLING)) - for(CastlingRights cr : { Us & KING_SIDE, Us & QUEEN_SIDE } ) + for (CastlingRights cr : { Us & KING_SIDE, Us & QUEEN_SIDE } ) if (!pos.castling_impeded(cr) && pos.can_castle(cr)) *moveList++ = make(ksq, pos.castling_rook_square(cr)); } diff --git a/src/movepick.cpp b/src/movepick.cpp index 96a44449..153d323e 100644 --- a/src/movepick.cpp +++ b/src/movepick.cpp @@ -182,7 +182,7 @@ top: --endMoves; ++stage; - /* fallthrough */ + [[fallthrough]]; case REFUTATION: if (select([&](){ return *cur != MOVE_NONE @@ -190,7 +190,7 @@ top: && pos.pseudo_legal(*cur); })) return *(cur - 1); ++stage; - /* fallthrough */ + [[fallthrough]]; case QUIET_INIT: if (!skipQuiets) @@ -203,7 +203,7 @@ top: } ++stage; - /* fallthrough */ + [[fallthrough]]; case QUIET: if ( !skipQuiets @@ -217,7 +217,7 @@ top: endMoves = endBadCaptures; ++stage; - /* fallthrough */ + [[fallthrough]]; case BAD_CAPTURE: return select([](){ return true; }); @@ -228,7 +228,7 @@ top: score(); ++stage; - /* fallthrough */ + [[fallthrough]]; case EVASION: return select([](){ return true; }); @@ -246,14 +246,14 @@ top: return MOVE_NONE; ++stage; - /* fallthrough */ + [[fallthrough]]; case QCHECK_INIT: cur = moves; endMoves = generate(pos, cur); ++stage; - /* fallthrough */ + [[fallthrough]]; case QCHECK: return select([](){ return true; }); diff --git a/src/movepick.h b/src/movepick.h index f080935a..4c0ad551 100644 --- a/src/movepick.h +++ b/src/movepick.h @@ -86,9 +86,9 @@ enum StatsType { NoCaptures, Captures }; /// the move's from and to squares, see www.chessprogramming.org/Butterfly_Boards typedef Stats ButterflyHistory; -/// At higher depths LowPlyHistory records successful quiet moves near the root and quiet -/// moves which are/were in the PV (ttPv) -/// It is cleared with each new search and filled during iterative deepening +/// At higher depths LowPlyHistory records successful quiet moves near the root +/// and quiet moves which are/were in the PV (ttPv). It is cleared with each new +/// search and filled during iterative deepening. constexpr int MAX_LPH = 4; typedef Stats LowPlyHistory; diff --git a/src/nnue/evaluate_nnue.cpp b/src/nnue/evaluate_nnue.cpp index dfbb1ac2..ed138881 100644 --- a/src/nnue/evaluate_nnue.cpp +++ b/src/nnue/evaluate_nnue.cpp @@ -18,7 +18,6 @@ // Code for calculating NNUE evaluation function -#include #include #include @@ -29,30 +28,29 @@ #include "evaluate_nnue.h" -ExtPieceSquare kpp_board_index[PIECE_NB] = { - // convention: W - us, B - them - // viewed from other side, W and B are reversed - { PS_NONE, PS_NONE }, - { PS_W_PAWN, PS_B_PAWN }, - { PS_W_KNIGHT, PS_B_KNIGHT }, - { PS_W_BISHOP, PS_B_BISHOP }, - { PS_W_ROOK, PS_B_ROOK }, - { PS_W_QUEEN, PS_B_QUEEN }, - { PS_W_KING, PS_B_KING }, - { PS_NONE, PS_NONE }, - { PS_NONE, PS_NONE }, - { PS_B_PAWN, PS_W_PAWN }, - { PS_B_KNIGHT, PS_W_KNIGHT }, - { PS_B_BISHOP, PS_W_BISHOP }, - { PS_B_ROOK, PS_W_ROOK }, - { PS_B_QUEEN, PS_W_QUEEN }, - { PS_B_KING, PS_W_KING }, - { PS_NONE, PS_NONE } -}; - - namespace Eval::NNUE { + uint32_t kpp_board_index[PIECE_NB][COLOR_NB] = { + // convention: W - us, B - them + // viewed from other side, W and B are reversed + { PS_NONE, PS_NONE }, + { PS_W_PAWN, PS_B_PAWN }, + { PS_W_KNIGHT, PS_B_KNIGHT }, + { PS_W_BISHOP, PS_B_BISHOP }, + { PS_W_ROOK, PS_B_ROOK }, + { PS_W_QUEEN, PS_B_QUEEN }, + { PS_W_KING, PS_B_KING }, + { PS_NONE, PS_NONE }, + { PS_NONE, PS_NONE }, + { PS_B_PAWN, PS_W_PAWN }, + { PS_B_KNIGHT, PS_W_KNIGHT }, + { PS_B_BISHOP, PS_W_BISHOP }, + { PS_B_ROOK, PS_W_ROOK }, + { PS_B_QUEEN, PS_W_QUEEN }, + { PS_B_KING, PS_W_KING }, + { PS_NONE, PS_NONE } + }; + // Input feature converter AlignedPtr feature_transformer; @@ -117,59 +115,24 @@ namespace Eval::NNUE { return stream && stream.peek() == std::ios::traits_type::eof(); } - // Proceed with the difference calculation if possible - static void UpdateAccumulatorIfPossible(const Position& pos) { - - feature_transformer->UpdateAccumulatorIfPossible(pos); - } - - // Calculate the evaluation value - static Value ComputeScore(const Position& pos, bool refresh) { - - auto& accumulator = pos.state()->accumulator; - if (!refresh && accumulator.computed_score) { - return accumulator.score; - } + // Evaluation function. Perform differential calculation. + Value evaluate(const Position& pos) { alignas(kCacheLineSize) TransformedFeatureType transformed_features[FeatureTransformer::kBufferSize]; - feature_transformer->Transform(pos, transformed_features, refresh); + feature_transformer->Transform(pos, transformed_features); alignas(kCacheLineSize) char buffer[Network::kBufferSize]; const auto output = network->Propagate(transformed_features, buffer); - auto score = static_cast(output[0] / FV_SCALE); - - accumulator.score = score; - accumulator.computed_score = true; - return accumulator.score; + return static_cast(output[0] / FV_SCALE); } - // Load the evaluation function file - bool load_eval_file(const std::string& evalFile) { + // Load eval, from a file stream or a memory stream + bool load_eval(std::string streamName, std::istream& stream) { Initialize(); - fileName = evalFile; - - std::ifstream stream(evalFile, std::ios::binary); - - const bool result = ReadParameters(stream); - - return result; - } - - // Evaluation function. Perform differential calculation. - Value evaluate(const Position& pos) { - return ComputeScore(pos, false); - } - - // Evaluation function. Perform full calculation. - Value compute_eval(const Position& pos) { - return ComputeScore(pos, true); - } - - // Proceed with the difference calculation if possible - void update_eval(const Position& pos) { - UpdateAccumulatorIfPossible(pos); + fileName = streamName; + return ReadParameters(stream); } } // namespace Eval::NNUE diff --git a/src/nnue/features/feature_set.h b/src/nnue/features/feature_set.h index 79ca83ae..558a6b22 100644 --- a/src/nnue/features/feature_set.h +++ b/src/nnue/features/feature_set.h @@ -68,8 +68,7 @@ namespace Eval::NNUE::Features { reset[perspective] = false; switch (trigger) { case TriggerEvent::kFriendKingMoved: - reset[perspective] = - dp.pieceId[0] == PIECE_ID_KING + perspective; + reset[perspective] = dp.piece[0] == make_piece(perspective, KING); break; default: assert(false); diff --git a/src/nnue/features/half_kp.cpp b/src/nnue/features/half_kp.cpp index 628add6e..88e384a3 100644 --- a/src/nnue/features/half_kp.cpp +++ b/src/nnue/features/half_kp.cpp @@ -23,25 +23,17 @@ namespace Eval::NNUE::Features { - // Find the index of the feature quantity from the king position and PieceSquare - template - inline IndexType HalfKP::MakeIndex(Square sq_k, PieceSquare p) { - return static_cast(PS_END) * static_cast(sq_k) + p; + // Orient a square according to perspective (rotates by 180 for black) + inline Square orient(Color perspective, Square s) { + return Square(int(s) ^ (bool(perspective) * 63)); } - // Get pieces information + // Find the index of the feature quantity from the king position and PieceSquare template - inline void HalfKP::GetPieces( - const Position& pos, Color perspective, - PieceSquare** pieces, Square* sq_target_k) { + inline IndexType HalfKP::MakeIndex( + Color perspective, Square s, Piece pc, Square ksq) { - *pieces = (perspective == BLACK) ? - pos.eval_list()->piece_list_fb() : - pos.eval_list()->piece_list_fw(); - const PieceId target = (AssociatedKing == Side::kFriend) ? - static_cast(PIECE_ID_KING + perspective) : - static_cast(PIECE_ID_KING + ~perspective); - *sq_target_k = static_cast(((*pieces)[target] - PS_W_KING) % SQUARE_NB); + return IndexType(orient(perspective, s) + kpp_board_index[pc][perspective] + PS_END * ksq); } // Get a list of indices for active features @@ -49,16 +41,11 @@ namespace Eval::NNUE::Features { void HalfKP::AppendActiveIndices( const Position& pos, Color perspective, IndexList* active) { - // Do nothing if array size is small to avoid compiler warning - if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return; - - PieceSquare* pieces; - Square sq_target_k; - GetPieces(pos, perspective, &pieces, &sq_target_k); - for (PieceId i = PIECE_ID_ZERO; i < PIECE_ID_KING; ++i) { - if (pieces[i] != PS_NONE) { - active->push_back(MakeIndex(sq_target_k, pieces[i])); - } + Square ksq = orient(perspective, pos.square(perspective)); + Bitboard bb = pos.pieces() & ~pos.pieces(KING); + while (bb) { + Square s = pop_lsb(&bb); + active->push_back(MakeIndex(perspective, s, pos.piece_on(s), ksq)); } } @@ -68,22 +55,15 @@ namespace Eval::NNUE::Features { const Position& pos, Color perspective, IndexList* removed, IndexList* added) { - PieceSquare* pieces; - Square sq_target_k; - GetPieces(pos, perspective, &pieces, &sq_target_k); + Square ksq = orient(perspective, pos.square(perspective)); const auto& dp = pos.state()->dirtyPiece; for (int i = 0; i < dp.dirty_num; ++i) { - if (dp.pieceId[i] >= PIECE_ID_KING) continue; - const auto old_p = static_cast( - dp.old_piece[i].from[perspective]); - if (old_p != PS_NONE) { - removed->push_back(MakeIndex(sq_target_k, old_p)); - } - const auto new_p = static_cast( - dp.new_piece[i].from[perspective]); - if (new_p != PS_NONE) { - added->push_back(MakeIndex(sq_target_k, new_p)); - } + Piece pc = dp.piece[i]; + if (type_of(pc) == KING) continue; + if (dp.from[i] != SQ_NONE) + removed->push_back(MakeIndex(perspective, dp.from[i], pc, ksq)); + if (dp.to[i] != SQ_NONE) + added->push_back(MakeIndex(perspective, dp.to[i], pc, ksq)); } } diff --git a/src/nnue/features/half_kp.h b/src/nnue/features/half_kp.h index 99842eea..ee6a8df3 100644 --- a/src/nnue/features/half_kp.h +++ b/src/nnue/features/half_kp.h @@ -41,7 +41,7 @@ namespace Eval::NNUE::Features { static constexpr IndexType kDimensions = static_cast(SQUARE_NB) * static_cast(PS_END); // Maximum number of simultaneously active features - static constexpr IndexType kMaxActiveDimensions = PIECE_ID_KING; + static constexpr IndexType kMaxActiveDimensions = 30; // Kings don't count // Trigger for full calculation instead of difference calculation static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kFriendKingMoved; @@ -53,13 +53,9 @@ namespace Eval::NNUE::Features { static void AppendChangedIndices(const Position& pos, Color perspective, IndexList* removed, IndexList* added); - // Index of a feature for a given king position and another piece on some square - static IndexType MakeIndex(Square sq_k, PieceSquare p); - private: - // Get pieces information - static void GetPieces(const Position& pos, Color perspective, - PieceSquare** pieces, Square* sq_target_k); + // Index of a feature for a given king position and another piece on some square + static IndexType MakeIndex(Color perspective, Square s, Piece pc, Square sq_k); }; } // namespace Eval::NNUE::Features diff --git a/src/nnue/layers/affine_transform.h b/src/nnue/layers/affine_transform.h index 7ac5a1c0..94d0b5a9 100644 --- a/src/nnue/layers/affine_transform.h +++ b/src/nnue/layers/affine_transform.h @@ -85,8 +85,10 @@ namespace Eval::NNUE::Layers { #elif defined(USE_AVX2) constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth; - const __m256i kOnes = _mm256_set1_epi16(1); const auto input_vector = reinterpret_cast(input); + #if !defined(USE_VNNI) + const __m256i kOnes = _mm256_set1_epi16(1); + #endif #elif defined(USE_SSE2) constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth; @@ -145,9 +147,13 @@ namespace Eval::NNUE::Layers { __m256i sum = _mm256_setzero_si256(); const auto row = reinterpret_cast(&weights_[offset]); for (IndexType j = 0; j < kNumChunks; ++j) { + #if defined(USE_VNNI) + sum = _mm256_dpbusd_epi32(sum, _mm256_loadA_si256(&input_vector[j]), _mm256_load_si256(&row[j])); + #else __m256i product = _mm256_maddubs_epi16(_mm256_loadA_si256(&input_vector[j]), _mm256_load_si256(&row[j])); product = _mm256_madd_epi16(product, kOnes); sum = _mm256_add_epi32(sum, product); + #endif } __m128i sum128 = _mm_add_epi32(_mm256_castsi256_si128(sum), _mm256_extracti128_si256(sum, 1)); sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_BADC)); diff --git a/src/nnue/nnue_accumulator.h b/src/nnue/nnue_accumulator.h index 69dfaad2..26370710 100644 --- a/src/nnue/nnue_accumulator.h +++ b/src/nnue/nnue_accumulator.h @@ -29,9 +29,7 @@ namespace Eval::NNUE { struct alignas(kCacheLineSize) Accumulator { std::int16_t accumulation[2][kRefreshTriggers.size()][kTransformedFeatureDimensions]; - Value score; bool computed_accumulation; - bool computed_score; }; } // namespace Eval::NNUE diff --git a/src/nnue/nnue_common.h b/src/nnue/nnue_common.h index 4c93e3d1..7bc905dc 100644 --- a/src/nnue/nnue_common.h +++ b/src/nnue/nnue_common.h @@ -47,7 +47,7 @@ // compiled with older g++ crashes because the output memory is not aligned // even though alignas is specified. #if defined(USE_AVX2) -#if defined(__GNUC__ ) && (__GNUC__ < 9) && defined(_WIN32) +#if defined(__GNUC__ ) && (__GNUC__ < 9) && defined(_WIN32) && !defined(__clang__) #define _mm256_loadA_si256 _mm256_loadu_si256 #define _mm256_storeA_si256 _mm256_storeu_si256 #else @@ -57,7 +57,7 @@ #endif #if defined(USE_AVX512) -#if defined(__GNUC__ ) && (__GNUC__ < 9) && defined(_WIN32) +#if defined(__GNUC__ ) && (__GNUC__ < 9) && defined(_WIN32) && !defined(__clang__) #define _mm512_loadA_si512 _mm512_loadu_si512 #define _mm512_storeA_si512 _mm512_storeu_si512 #else @@ -94,6 +94,27 @@ namespace Eval::NNUE { constexpr std::size_t kMaxSimdWidth = 32; + // unique number for each piece type on each square + enum { + PS_NONE = 0, + PS_W_PAWN = 1, + PS_B_PAWN = 1 * SQUARE_NB + 1, + PS_W_KNIGHT = 2 * SQUARE_NB + 1, + PS_B_KNIGHT = 3 * SQUARE_NB + 1, + PS_W_BISHOP = 4 * SQUARE_NB + 1, + PS_B_BISHOP = 5 * SQUARE_NB + 1, + PS_W_ROOK = 6 * SQUARE_NB + 1, + PS_B_ROOK = 7 * SQUARE_NB + 1, + PS_W_QUEEN = 8 * SQUARE_NB + 1, + PS_B_QUEEN = 9 * SQUARE_NB + 1, + PS_W_KING = 10 * SQUARE_NB + 1, + PS_END = PS_W_KING, // pieces without kings (pawns included) + PS_B_KING = 11 * SQUARE_NB + 1, + PS_END2 = 12 * SQUARE_NB + 1 + }; + + extern uint32_t kpp_board_index[PIECE_NB][COLOR_NB]; + // Type of input feature after conversion using TransformedFeatureType = std::uint8_t; using IndexType = std::uint32_t; diff --git a/src/nnue/nnue_feature_transformer.h b/src/nnue/nnue_feature_transformer.h index 43707610..e71ee60d 100644 --- a/src/nnue/nnue_feature_transformer.h +++ b/src/nnue/nnue_feature_transformer.h @@ -29,6 +29,56 @@ namespace Eval::NNUE { + // If vector instructions are enabled, we update and refresh the + // accumulator tile by tile such that each tile fits in the CPU's + // vector registers. + #define TILING + + #ifdef USE_AVX512 + typedef __m512i vec_t; + #define vec_load(a) _mm512_loadA_si512(a) + #define vec_store(a,b) _mm512_storeA_si512(a,b) + #define vec_add_16(a,b) _mm512_add_epi16(a,b) + #define vec_sub_16(a,b) _mm512_sub_epi16(a,b) + static constexpr IndexType kNumRegs = 8; // only 8 are needed + + #elif USE_AVX2 + typedef __m256i vec_t; + #define vec_load(a) _mm256_loadA_si256(a) + #define vec_store(a,b) _mm256_storeA_si256(a,b) + #define vec_add_16(a,b) _mm256_add_epi16(a,b) + #define vec_sub_16(a,b) _mm256_sub_epi16(a,b) + static constexpr IndexType kNumRegs = 16; + + #elif USE_SSE2 + typedef __m128i vec_t; + #define vec_load(a) (*(a)) + #define vec_store(a,b) *(a)=(b) + #define vec_add_16(a,b) _mm_add_epi16(a,b) + #define vec_sub_16(a,b) _mm_sub_epi16(a,b) + static constexpr IndexType kNumRegs = Is64Bit ? 16 : 8; + + #elif USE_MMX + typedef __m64 vec_t; + #define vec_load(a) (*(a)) + #define vec_store(a,b) *(a)=(b) + #define vec_add_16(a,b) _mm_add_pi16(a,b) + #define vec_sub_16(a,b) _mm_sub_pi16(a,b) + static constexpr IndexType kNumRegs = 8; + + #elif USE_NEON + typedef int16x8_t vec_t; + #define vec_load(a) (*(a)) + #define vec_store(a,b) *(a)=(b) + #define vec_add_16(a,b) vaddq_s16(a,b) + #define vec_sub_16(a,b) vsubq_s16(a,b) + static constexpr IndexType kNumRegs = 16; + + #else + #undef TILING + + #endif + // Input feature converter class FeatureTransformer { @@ -36,6 +86,11 @@ namespace Eval::NNUE { // Number of output dimensions for one side static constexpr IndexType kHalfDimensions = kTransformedFeatureDimensions; + #ifdef TILING + static constexpr IndexType kTileHeight = kNumRegs * sizeof(vec_t) / 2; + static_assert(kHalfDimensions % kTileHeight == 0, "kTileHeight must divide kHalfDimensions"); + #endif + public: // Output type using OutputType = TransformedFeatureType; @@ -50,11 +105,13 @@ namespace Eval::NNUE { // Hash value embedded in the evaluation file static constexpr std::uint32_t GetHashValue() { + return RawFeatures::kHashValue ^ kOutputDimensions; } // Read network parameters bool ReadParameters(std::istream& stream) { + for (std::size_t i = 0; i < kHalfDimensions; ++i) biases_[i] = read_little_endian(stream); for (std::size_t i = 0; i < kHalfDimensions * kInputDimensions; ++i) @@ -64,23 +121,26 @@ namespace Eval::NNUE { // Proceed with the difference calculation if possible bool UpdateAccumulatorIfPossible(const Position& pos) const { + const auto now = pos.state(); - if (now->accumulator.computed_accumulation) { + if (now->accumulator.computed_accumulation) return true; - } + const auto prev = now->previous; if (prev && prev->accumulator.computed_accumulation) { UpdateAccumulator(pos); return true; } + return false; } // Convert input features - void Transform(const Position& pos, OutputType* output, bool refresh) const { - if (refresh || !UpdateAccumulatorIfPossible(pos)) { + void Transform(const Position& pos, OutputType* output) const { + + if (!UpdateAccumulatorIfPossible(pos)) RefreshAccumulator(pos); - } + const auto& accumulation = pos.state()->accumulator.accumulation; #if defined(USE_AVX2) @@ -177,74 +237,58 @@ namespace Eval::NNUE { private: // Calculate cumulative value without using difference calculation void RefreshAccumulator(const Position& pos) const { + auto& accumulator = pos.state()->accumulator; IndexType i = 0; Features::IndexList active_indices[2]; RawFeatures::AppendActiveIndices(pos, kRefreshTriggers[i], active_indices); for (Color perspective : { WHITE, BLACK }) { - std::memcpy(accumulator.accumulation[perspective][i], biases_, - kHalfDimensions * sizeof(BiasType)); - for (const auto index : active_indices[perspective]) { - const IndexType offset = kHalfDimensions * index; - #if defined(USE_AVX512) - auto accumulation = reinterpret_cast<__m512i*>( - &accumulator.accumulation[perspective][i][0]); - auto column = reinterpret_cast(&weights_[offset]); - constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth; - for (IndexType j = 0; j < kNumChunks; ++j) - _mm512_storeA_si512(&accumulation[j], _mm512_add_epi16(_mm512_loadA_si512(&accumulation[j]), column[j])); + #ifdef TILING + for (unsigned j = 0; j < kHalfDimensions / kTileHeight; ++j) { + auto biasesTile = reinterpret_cast( + &biases_[j * kTileHeight]); + auto accTile = reinterpret_cast( + &accumulator.accumulation[perspective][i][j * kTileHeight]); + vec_t acc[kNumRegs]; - #elif defined(USE_AVX2) - auto accumulation = reinterpret_cast<__m256i*>( - &accumulator.accumulation[perspective][i][0]); - auto column = reinterpret_cast(&weights_[offset]); - constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); - for (IndexType j = 0; j < kNumChunks; ++j) - _mm256_storeA_si256(&accumulation[j], _mm256_add_epi16(_mm256_loadA_si256(&accumulation[j]), column[j])); + for (unsigned k = 0; k < kNumRegs; ++k) + acc[k] = biasesTile[k]; - #elif defined(USE_SSE2) - auto accumulation = reinterpret_cast<__m128i*>( - &accumulator.accumulation[perspective][i][0]); - auto column = reinterpret_cast(&weights_[offset]); - constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); - for (IndexType j = 0; j < kNumChunks; ++j) - accumulation[j] = _mm_add_epi16(accumulation[j], column[j]); + for (const auto index : active_indices[perspective]) { + const IndexType offset = kHalfDimensions * index + j * kTileHeight; + auto column = reinterpret_cast(&weights_[offset]); - #elif defined(USE_MMX) - auto accumulation = reinterpret_cast<__m64*>( - &accumulator.accumulation[perspective][i][0]); - auto column = reinterpret_cast(&weights_[offset]); - constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); - for (IndexType j = 0; j < kNumChunks; ++j) { - accumulation[j] = _mm_add_pi16(accumulation[j], column[j]); + for (unsigned k = 0; k < kNumRegs; ++k) + acc[k] = vec_add_16(acc[k], column[k]); } - #elif defined(USE_NEON) - auto accumulation = reinterpret_cast( - &accumulator.accumulation[perspective][i][0]); - auto column = reinterpret_cast(&weights_[offset]); - constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); - for (IndexType j = 0; j < kNumChunks; ++j) - accumulation[j] = vaddq_s16(accumulation[j], column[j]); - + for (unsigned k = 0; k < kNumRegs; k++) + vec_store(&accTile[k], acc[k]); + } #else + std::memcpy(accumulator.accumulation[perspective][i], biases_, + kHalfDimensions * sizeof(BiasType)); + + for (const auto index : active_indices[perspective]) { + const IndexType offset = kHalfDimensions * index; + for (IndexType j = 0; j < kHalfDimensions; ++j) accumulator.accumulation[perspective][i][j] += weights_[offset + j]; - #endif - } + #endif } + #if defined(USE_MMX) _mm_empty(); #endif accumulator.computed_accumulation = true; - accumulator.computed_score = false; } // Calculate cumulative value using difference calculation void UpdateAccumulator(const Position& pos) const { + const auto prev_accumulator = pos.state()->previous->accumulator; auto& accumulator = pos.state()->accumulator; IndexType i = 0; @@ -252,29 +296,55 @@ namespace Eval::NNUE { bool reset[2]; RawFeatures::AppendChangedIndices(pos, kRefreshTriggers[i], removed_indices, added_indices, reset); - for (Color perspective : { WHITE, BLACK }) { - #if defined(USE_AVX2) - constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); - auto accumulation = reinterpret_cast<__m256i*>( - &accumulator.accumulation[perspective][i][0]); + #ifdef TILING + for (IndexType j = 0; j < kHalfDimensions / kTileHeight; ++j) { + for (Color perspective : { WHITE, BLACK }) { + auto accTile = reinterpret_cast( + &accumulator.accumulation[perspective][i][j * kTileHeight]); + vec_t acc[kNumRegs]; - #elif defined(USE_SSE2) - constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); - auto accumulation = reinterpret_cast<__m128i*>( - &accumulator.accumulation[perspective][i][0]); + if (reset[perspective]) { + auto biasesTile = reinterpret_cast( + &biases_[j * kTileHeight]); + for (unsigned k = 0; k < kNumRegs; ++k) + acc[k] = biasesTile[k]; + } else { + auto prevAccTile = reinterpret_cast( + &prev_accumulator.accumulation[perspective][i][j * kTileHeight]); + for (IndexType k = 0; k < kNumRegs; ++k) + acc[k] = vec_load(&prevAccTile[k]); - #elif defined(USE_MMX) - constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); - auto accumulation = reinterpret_cast<__m64*>( - &accumulator.accumulation[perspective][i][0]); + // Difference calculation for the deactivated features + for (const auto index : removed_indices[perspective]) { + const IndexType offset = kHalfDimensions * index + j * kTileHeight; + auto column = reinterpret_cast(&weights_[offset]); - #elif defined(USE_NEON) - constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); - auto accumulation = reinterpret_cast( - &accumulator.accumulation[perspective][i][0]); + for (IndexType k = 0; k < kNumRegs; ++k) + acc[k] = vec_sub_16(acc[k], column[k]); + } + } + { // Difference calculation for the activated features + for (const auto index : added_indices[perspective]) { + const IndexType offset = kHalfDimensions * index + j * kTileHeight; + auto column = reinterpret_cast(&weights_[offset]); + + for (IndexType k = 0; k < kNumRegs; ++k) + acc[k] = vec_add_16(acc[k], column[k]); + } + } + + for (IndexType k = 0; k < kNumRegs; ++k) + vec_store(&accTile[k], acc[k]); + } + } + #if defined(USE_MMX) + _mm_empty(); #endif + #else + for (Color perspective : { WHITE, BLACK }) { + if (reset[perspective]) { std::memcpy(accumulator.accumulation[perspective][i], biases_, kHalfDimensions * sizeof(BiasType)); @@ -286,83 +356,22 @@ namespace Eval::NNUE { for (const auto index : removed_indices[perspective]) { const IndexType offset = kHalfDimensions * index; - #if defined(USE_AVX2) - auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { - accumulation[j] = _mm256_sub_epi16(accumulation[j], column[j]); - } - - #elif defined(USE_SSE2) - auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { - accumulation[j] = _mm_sub_epi16(accumulation[j], column[j]); - } - - #elif defined(USE_MMX) - auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { - accumulation[j] = _mm_sub_pi16(accumulation[j], column[j]); - } - - #elif defined(USE_NEON) - auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { - accumulation[j] = vsubq_s16(accumulation[j], column[j]); - } - - #else - for (IndexType j = 0; j < kHalfDimensions; ++j) { - accumulator.accumulation[perspective][i][j] -= - weights_[offset + j]; - } - #endif - + for (IndexType j = 0; j < kHalfDimensions; ++j) + accumulator.accumulation[perspective][i][j] -= weights_[offset + j]; } } { // Difference calculation for the activated features for (const auto index : added_indices[perspective]) { const IndexType offset = kHalfDimensions * index; - #if defined(USE_AVX2) - auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { - accumulation[j] = _mm256_add_epi16(accumulation[j], column[j]); - } - - #elif defined(USE_SSE2) - auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { - accumulation[j] = _mm_add_epi16(accumulation[j], column[j]); - } - - #elif defined(USE_MMX) - auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { - accumulation[j] = _mm_add_pi16(accumulation[j], column[j]); - } - - #elif defined(USE_NEON) - auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { - accumulation[j] = vaddq_s16(accumulation[j], column[j]); - } - - #else - for (IndexType j = 0; j < kHalfDimensions; ++j) { - accumulator.accumulation[perspective][i][j] += - weights_[offset + j]; - } - #endif - + for (IndexType j = 0; j < kHalfDimensions; ++j) + accumulator.accumulation[perspective][i][j] += weights_[offset + j]; } } } - #if defined(USE_MMX) - _mm_empty(); #endif accumulator.computed_accumulation = true; - accumulator.computed_score = false; } using BiasType = std::int16_t; diff --git a/src/pawns.cpp b/src/pawns.cpp index 868d0c8e..af0f6618 100644 --- a/src/pawns.cpp +++ b/src/pawns.cpp @@ -219,7 +219,7 @@ Score Entry::evaluate_shelter(const Position& pos, Square ksq) const { Score bonus = make_score(5, 5); - File center = Utility::clamp(file_of(ksq), FILE_B, FILE_G); + File center = std::clamp(file_of(ksq), FILE_B, FILE_G); for (File f = File(center - 1); f <= File(center + 1); ++f) { b = ourPawns & file_bb(f); diff --git a/src/position.cpp b/src/position.cpp index 46e5d78b..e6a760d2 100644 --- a/src/position.cpp +++ b/src/position.cpp @@ -198,9 +198,6 @@ Position& Position::set(const string& fenStr, bool isChess960, StateInfo* si, Th std::fill_n(&pieceList[0][0], sizeof(pieceList) / sizeof(Square), SQ_NONE); st = si; - // Each piece on board gets a unique ID used to track the piece later - PieceId piece_id, next_piece_id = PIECE_ID_ZERO; - ss >> std::noskipws; // 1. Piece placement @@ -212,21 +209,8 @@ Position& Position::set(const string& fenStr, bool isChess960, StateInfo* si, Th else if (token == '/') sq += 2 * SOUTH; - else if ((idx = PieceToChar.find(token)) != string::npos) - { - auto pc = Piece(idx); - put_piece(pc, sq); - - if (Eval::useNNUE) - { - // Kings get a fixed ID, other pieces get ID in order of placement - piece_id = - (idx == W_KING) ? PIECE_ID_WKING : - (idx == B_KING) ? PIECE_ID_BKING : - next_piece_id++; - evalList.put_piece(piece_id, sq, pc); - } - + else if ((idx = PieceToChar.find(token)) != string::npos) { + put_piece(Piece(idx), sq); ++sq; } } @@ -720,9 +704,6 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) { // Used by NNUE st->accumulator.computed_accumulation = false; - st->accumulator.computed_score = false; - PieceId dp0 = PIECE_ID_NONE; - PieceId dp1 = PIECE_ID_NONE; auto& dp = st->dirtyPiece; dp.dirty_num = 1; @@ -775,12 +756,10 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) { if (Eval::useNNUE) { - dp.dirty_num = 2; // 2 pieces moved - dp1 = piece_id_on(capsq); - dp.pieceId[1] = dp1; - dp.old_piece[1] = evalList.piece_with_id(dp1); - evalList.put_piece(dp1, capsq, NO_PIECE); - dp.new_piece[1] = evalList.piece_with_id(dp1); + dp.dirty_num = 2; // 1 piece moved, 1 piece captured + dp.piece[1] = captured; + dp.from[1] = capsq; + dp.to[1] = SQ_NONE; } // Update board and piece lists @@ -821,11 +800,9 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) { { if (Eval::useNNUE) { - dp0 = piece_id_on(from); - dp.pieceId[0] = dp0; - dp.old_piece[0] = evalList.piece_with_id(dp0); - evalList.put_piece(dp0, to, pc); - dp.new_piece[0] = evalList.piece_with_id(dp0); + dp.piece[0] = pc; + dp.from[0] = from; + dp.to[0] = to; } move_piece(from, to); @@ -854,9 +831,12 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) { if (Eval::useNNUE) { - dp0 = piece_id_on(to); - evalList.put_piece(dp0, to, promotion); - dp.new_piece[0] = evalList.piece_with_id(dp0); + // Promoting pawn to SQ_NONE, promoted piece from SQ_NONE + dp.to[0] = SQ_NONE; + dp.piece[dp.dirty_num] = promotion; + dp.from[dp.dirty_num] = SQ_NONE; + dp.to[dp.dirty_num] = to; + dp.dirty_num++; } // Update hash keys @@ -950,12 +930,6 @@ void Position::undo_move(Move m) { { move_piece(to, from); // Put the piece back at the source square - if (Eval::useNNUE) - { - PieceId dp0 = st->dirtyPiece.pieceId[0]; - evalList.put_piece(dp0, from, pc); - } - if (st->capturedPiece) { Square capsq = to; @@ -972,14 +946,6 @@ void Position::undo_move(Move m) { } put_piece(st->capturedPiece, capsq); // Restore the captured piece - - if (Eval::useNNUE) - { - PieceId dp1 = st->dirtyPiece.pieceId[1]; - assert(evalList.piece_with_id(dp1).from[WHITE] == PS_NONE); - assert(evalList.piece_with_id(dp1).from[BLACK] == PS_NONE); - evalList.put_piece(dp1, capsq, st->capturedPiece); - } } } @@ -1001,32 +967,16 @@ void Position::do_castling(Color us, Square from, Square& to, Square& rfrom, Squ rto = relative_square(us, kingSide ? SQ_F1 : SQ_D1); to = relative_square(us, kingSide ? SQ_G1 : SQ_C1); - if (Eval::useNNUE) + if (Do && Eval::useNNUE) { - PieceId dp0, dp1; auto& dp = st->dirtyPiece; - dp.dirty_num = 2; // 2 pieces moved - - if (Do) - { - dp0 = piece_id_on(from); - dp1 = piece_id_on(rfrom); - dp.pieceId[0] = dp0; - dp.old_piece[0] = evalList.piece_with_id(dp0); - evalList.put_piece(dp0, to, make_piece(us, KING)); - dp.new_piece[0] = evalList.piece_with_id(dp0); - dp.pieceId[1] = dp1; - dp.old_piece[1] = evalList.piece_with_id(dp1); - evalList.put_piece(dp1, rto, make_piece(us, ROOK)); - dp.new_piece[1] = evalList.piece_with_id(dp1); - } - else - { - dp0 = piece_id_on(to); - dp1 = piece_id_on(rto); - evalList.put_piece(dp0, from, make_piece(us, KING)); - evalList.put_piece(dp1, rfrom, make_piece(us, ROOK)); - } + dp.piece[0] = make_piece(us, KING); + dp.from[0] = from; + dp.to[0] = to; + dp.piece[1] = make_piece(us, ROOK); + dp.from[1] = rfrom; + dp.to[1] = rto; + dp.dirty_num = 2; } // Remove both pieces first since squares could overlap in Chess960 @@ -1049,7 +999,6 @@ void Position::do_null_move(StateInfo& newSt) { if (Eval::useNNUE) { std::memcpy(&newSt, st, sizeof(StateInfo)); - st->accumulator.computed_score = false; } else std::memcpy(&newSt, st, offsetof(StateInfo, accumulator)); @@ -1145,8 +1094,8 @@ bool Position::see_ge(Move m, Value threshold) const { // Don't allow pinned pieces to attack (except the king) as long as // there are pinners on their original square. - if (st->pinners[~stm] & occupied) - stmAttackers &= ~st->blockersForKing[stm]; + if (pinners(~stm) & occupied) + stmAttackers &= ~blockers_for_king(stm); if (!stmAttackers) break; diff --git a/src/position.h b/src/position.h index a77050eb..d6f5c9fd 100644 --- a/src/position.h +++ b/src/position.h @@ -113,6 +113,7 @@ public: Bitboard checkers() const; Bitboard blockers_for_king(Color c) const; Bitboard check_squares(PieceType pt) const; + Bitboard pinners(Color c) const; bool is_discovery_check_on_king(Color c, Move m) const; // Attacks to/from a given square @@ -170,7 +171,6 @@ public: // Used by NNUE StateInfo* state() const; - const EvalList* eval_list() const; private: // Initialization helpers (used while setting up a position) @@ -185,9 +185,6 @@ private: template void do_castling(Color us, Square from, Square& to, Square& rfrom, Square& rto); - // ID of a piece on a given square - PieceId piece_id_on(Square sq) const; - // Data members Piece board[SQUARE_NB]; Bitboard byTypeBB[PIECE_TYPE_NB]; @@ -204,9 +201,6 @@ private: Thread* thisThread; StateInfo* st; bool chess960; - - // List of pieces used in NNUE evaluation function - EvalList evalList; }; namespace PSQT { @@ -309,6 +303,10 @@ inline Bitboard Position::blockers_for_king(Color c) const { return st->blockersForKing[c]; } +inline Bitboard Position::pinners(Color c) const { + return st->pinners[c]; +} + inline Bitboard Position::check_squares(PieceType pt) const { return st->checkSquares[pt]; } @@ -446,20 +444,4 @@ inline StateInfo* Position::state() const { return st; } -inline const EvalList* Position::eval_list() const { - - return &evalList; -} - -inline PieceId Position::piece_id_on(Square sq) const -{ - - assert(piece_on(sq) != NO_PIECE); - - PieceId pid = evalList.piece_id_list[sq]; - assert(is_ok(pid)); - - return pid; -} - #endif // #ifndef POSITION_H_INCLUDED diff --git a/src/search.cpp b/src/search.cpp index 72bf924f..6fc30be2 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -364,7 +364,7 @@ void Thread::search() { // for match (TC 60+0.6) results spanning a wide range of k values. PRNG rng(now()); double floatLevel = Options["UCI_LimitStrength"] ? - Utility::clamp(std::pow((Options["UCI_Elo"] - 1346.6) / 143.4, 1 / 0.806), 0.0, 20.0) : + std::clamp(std::pow((Options["UCI_Elo"] - 1346.6) / 143.4, 1 / 0.806), 0.0, 20.0) : double(Options["Skill Level"]); int intLevel = int(floatLevel) + ((floatLevel - int(floatLevel)) * 1024 > rng.rand() % 1024 ? 1 : 0); @@ -545,7 +545,7 @@ void Thread::search() { { double fallingEval = (318 + 6 * (mainThread->bestPreviousScore - bestValue) + 6 * (mainThread->iterValue[iterIdx] - bestValue)) / 825.0; - fallingEval = Utility::clamp(fallingEval, 0.5, 1.5); + fallingEval = std::clamp(fallingEval, 0.5, 1.5); // If the bestMove is stable over several iterations, reduce time accordingly timeReduction = lastBestMoveDepth + 9 < completedDepth ? 1.92 : 0.95; @@ -557,7 +557,7 @@ void Thread::search() { totBestMoveChanges += th->bestMoveChanges; th->bestMoveChanges = 0; } - double bestMoveInstability = 1 + totBestMoveChanges / Threads.size(); + double bestMoveInstability = 1 + 2 * totBestMoveChanges / Threads.size(); double totalTime = rootMoves.size() == 1 ? 0 : Time.optimum() * fallingEval * reduction * bestMoveInstability; @@ -634,7 +634,7 @@ namespace { Move ttMove, move, excludedMove, bestMove; Depth extension, newDepth; Value bestValue, value, ttValue, eval, maxValue, probCutBeta; - bool ttHit, ttPv, formerPv, givesCheck, improving, didLMR, priorCapture; + bool formerPv, givesCheck, improving, didLMR, priorCapture; bool captureOrPromotion, doFullDepthSearch, moveCountPruning, ttCapture, singularQuietLMR; Piece movedPiece; @@ -681,6 +681,7 @@ namespace { assert(0 <= ss->ply && ss->ply < MAX_PLY); (ss+1)->ply = ss->ply + 1; + (ss+1)->ttPv = false; (ss+1)->excludedMove = bestMove = MOVE_NONE; (ss+2)->killers[0] = (ss+2)->killers[1] = MOVE_NONE; Square prevSq = to_sq((ss-1)->currentMove); @@ -690,9 +691,7 @@ namespace { // starts with statScore = 0. Later grandchildren start with the last calculated // statScore of the previous grandchild. This influences the reduction rules in // LMR which are based on the statScore of parent position. - if (rootNode) - (ss+4)->statScore = 0; - else + if (!rootNode) (ss+2)->statScore = 0; // Step 4. Transposition table lookup. We don't want the score of a partial @@ -700,14 +699,15 @@ namespace { // position key in case of an excluded move. excludedMove = ss->excludedMove; posKey = excludedMove == MOVE_NONE ? pos.key() : pos.key() ^ make_key(excludedMove); - tte = TT.probe(posKey, ttHit); - ttValue = ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE; + tte = TT.probe(posKey, ss->ttHit); + ttValue = ss->ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE; ttMove = rootNode ? thisThread->rootMoves[thisThread->pvIdx].pv[0] - : ttHit ? tte->move() : MOVE_NONE; - ttPv = PvNode || (ttHit && tte->is_pv()); - formerPv = ttPv && !PvNode; + : ss->ttHit ? tte->move() : MOVE_NONE; + if (!excludedMove) + ss->ttPv = PvNode || (ss->ttHit && tte->is_pv()); + formerPv = ss->ttPv && !PvNode; - if ( ttPv + if ( ss->ttPv && depth > 12 && ss->ply - 1 < MAX_LPH && !priorCapture @@ -716,11 +716,11 @@ namespace { // thisThread->ttHitAverage can be used to approximate the running average of ttHit thisThread->ttHitAverage = (TtHitAverageWindow - 1) * thisThread->ttHitAverage / TtHitAverageWindow - + TtHitAverageResolution * ttHit; + + TtHitAverageResolution * ss->ttHit; // At non-PV nodes we check for an early TT cutoff if ( !PvNode - && ttHit + && ss->ttHit && tte->depth() >= depth && ttValue != VALUE_NONE // Possible in case of TT access race && (ttValue >= beta ? (tte->bound() & BOUND_LOWER) @@ -786,7 +786,7 @@ namespace { || (b == BOUND_LOWER ? value >= beta : value <= alpha)) { Cluster::save(thisThread, tte, - posKey, value_to_tt(value, ss->ply), ttPv, b, + posKey, value_to_tt(value, ss->ply), ss->ttPv, b, std::min(MAX_PLY - 1, depth + 6), MOVE_NONE, VALUE_NONE); @@ -814,7 +814,7 @@ namespace { improving = false; goto moves_loop; } - else if (ttHit) + else if (ss->ttHit) { // Never assume anything about values stored in TT ss->staticEval = eval = tte->eval(); @@ -837,7 +837,7 @@ namespace { ss->staticEval = eval = -(ss-1)->staticEval + 2 * Tempo; Cluster::save(thisThread, tte, - posKey, VALUE_NONE, ttPv, BOUND_NONE, DEPTH_NONE, MOVE_NONE, + posKey, VALUE_NONE, ss->ttPv, BOUND_NONE, DEPTH_NONE, MOVE_NONE, eval); } @@ -847,8 +847,9 @@ namespace { && eval <= alpha - RazorMargin) return qsearch(pos, ss, alpha, beta); - improving = (ss-2)->staticEval == VALUE_NONE ? (ss->staticEval > (ss-4)->staticEval - || (ss-4)->staticEval == VALUE_NONE) : ss->staticEval > (ss-2)->staticEval; + improving = (ss-2)->staticEval == VALUE_NONE + ? ss->staticEval > (ss-4)->staticEval || (ss-4)->staticEval == VALUE_NONE + : ss->staticEval > (ss-2)->staticEval; // Step 8. Futility pruning: child node (~50 Elo) if ( !PvNode @@ -863,7 +864,7 @@ namespace { && (ss-1)->statScore < 22977 && eval >= beta && eval >= ss->staticEval - && ss->staticEval >= beta - 30 * depth - 28 * improving + 84 * ttPv + 182 + && ss->staticEval >= beta - 30 * depth - 28 * improving + 84 * ss->ttPv + 182 && !excludedMove && pos.non_pawn_material(us) && (ss->ply >= thisThread->nmpMinPly || us != thisThread->nmpColor)) @@ -919,14 +920,14 @@ namespace { // there and in further interactions with transposition table cutoff depth is set to depth - 3 // because probCut search has depth set to depth - 4 but we also do a move before it // so effective depth is equal to depth - 3 - && !( ttHit - && tte->depth() >= depth - 3 + && !( ss->ttHit + && tte->depth() >= depth - 3 && ttValue != VALUE_NONE && ttValue < probCutBeta)) { // if ttMove is a capture and value from transposition table is good enough produce probCut // cutoff without digging into actual probCut search - if ( ttHit + if ( ss->ttHit && tte->depth() >= depth - 3 && ttValue != VALUE_NONE && ttValue >= probCutBeta @@ -937,6 +938,8 @@ namespace { assert(probCutBeta < VALUE_INFINITE); MovePicker mp(pos, ttMove, probCutBeta - ss->staticEval, &captureHistory); int probCutCount = 0; + bool ttPv = ss->ttPv; + ss->ttPv = false; while ( (move = mp.next_move()) != MOVE_NONE && probCutCount < 2 + 2 * cutNode) @@ -968,7 +971,7 @@ namespace { if (value >= probCutBeta) { // if transposition table doesn't have equal or more deep info write probCut data into it - if ( !(ttHit + if ( !(ss->ttHit && tte->depth() >= depth - 3 && ttValue != VALUE_NONE)) tte->save(posKey, value_to_tt(value, ss->ply), ttPv, @@ -977,17 +980,14 @@ namespace { return value; } } + ss->ttPv = ttPv; } - // Step 11. Internal iterative deepening (~1 Elo) - if (depth >= 7 && !ttMove) - { - search(pos, ss, alpha, beta, depth - 7, cutNode); - - tte = TT.probe(posKey, ttHit); - ttValue = ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE; - ttMove = ttHit ? tte->move() : MOVE_NONE; - } + // Step 11. If the position is not in TT, decrease depth by 2 + if ( PvNode + && depth >= 6 + && !ttMove) + depth -= 2; moves_loop: // When in check, search starts from here @@ -1096,7 +1096,6 @@ moves_loop: // When in check, search starts from here if ( !givesCheck && lmrDepth < 6 && !(PvNode && abs(bestValue) < 2) - && PieceValue[MG][type_of(movedPiece)] >= PieceValue[MG][type_of(pos.piece_on(to_sq(move)))] && !ss->inCheck && ss->staticEval + 169 + 244 * lmrDepth + PieceValue[MG][type_of(pos.piece_on(to_sq(move)))] <= alpha) @@ -1162,9 +1161,9 @@ moves_loop: // When in check, search starts from here && (pos.is_discovery_check_on_king(~us, move) || pos.see_ge(move))) extension = 1; - // Castling extension - if ( type_of(move) == CASTLING - && popcount(pos.pieces(us) & ~pos.pieces(PAWN) & (to_sq(move) & KingSide ? KingSide : QueenSide)) <= 2) + // Last captures extension + else if ( PieceValue[EG][pos.captured_piece()] > PawnValueEg + && pos.non_pawn_material() <= 2 * RookValueMg) extension = 1; // Late irreversible move extension @@ -1193,7 +1192,6 @@ moves_loop: // When in check, search starts from here // re-searched at full depth. if ( depth >= 3 && moveCount > 1 + 2 * rootNode + 2 * (PvNode && abs(bestValue) < 2) - && (!rootNode || thisThread->best_move_count(move) == 0) && ( !captureOrPromotion || moveCountPruning || ss->staticEval + PieceValue[EG][pos.captured_piece()] <= alpha @@ -1202,13 +1200,6 @@ moves_loop: // When in check, search starts from here { Depth r = reduction(improving, depth, moveCount); - // Decrease reduction at non-check cut nodes for second move at low depths - if ( cutNode - && depth <= 10 - && moveCount <= 2 - && !ss->inCheck) - r--; - // Decrease reduction if the ttHit running average is large if (thisThread->ttHitAverage > 509 * TtHitAverageResolution * TtHitAverageWindow / 1024) r--; @@ -1218,7 +1209,7 @@ moves_loop: // When in check, search starts from here r++; // Decrease reduction if position is or has been on the PV (~10 Elo) - if (ttPv) + if (ss->ttPv) r -= 2; if (moveCountPruning && !formerPv) @@ -1230,7 +1221,7 @@ moves_loop: // When in check, search starts from here // Decrease reduction if ttMove has been singularly extended (~3 Elo) if (singularQuietLMR) - r -= 1 + formerPv; + r--; if (!captureOrPromotion) { @@ -1247,7 +1238,7 @@ moves_loop: // When in check, search starts from here // hence break make_move(). (~2 Elo) else if ( type_of(move) == NORMAL && !pos.see_ge(reverse_move(move))) - r -= 2 + ttPv - (type_of(movedPiece) == PAWN); + r -= 2 + ss->ttPv - (type_of(movedPiece) == PAWN); ss->statScore = thisThread->mainHistory[us][from_to(move)] + (*contHist[0])[movedPiece][to_sq(move)] @@ -1277,7 +1268,7 @@ moves_loop: // When in check, search starts from here r++; } - Depth d = Utility::clamp(newDepth - r, 1, newDepth); + Depth d = std::clamp(newDepth - r, 1, newDepth); value = -search(pos, ss+1, -(alpha+1), -alpha, d, true); @@ -1425,9 +1416,18 @@ moves_loop: // When in check, search starts from here if (PvNode) bestValue = std::min(bestValue, maxValue); + // If no good move is found and the previous position was ttPv, then the previous + // opponent move is probably good and the new position is added to the search tree. + if (bestValue <= alpha) + ss->ttPv = ss->ttPv || ((ss-1)->ttPv && depth > 3); + // Otherwise, a counter move has been found and if the position is the last leaf + // in the search tree, remove the position from the search tree. + else if (depth > 3) + ss->ttPv = ss->ttPv && (ss+1)->ttPv; + if (!excludedMove && !(rootNode && thisThread->pvIdx)) Cluster::save(thisThread, tte, - posKey, value_to_tt(bestValue, ss->ply), ttPv, + posKey, value_to_tt(bestValue, ss->ply), ss->ttPv, bestValue >= beta ? BOUND_LOWER : PvNode && bestMove ? BOUND_EXACT : BOUND_UPPER, depth, bestMove, ss->staticEval); @@ -1456,7 +1456,7 @@ moves_loop: // When in check, search starts from here Move ttMove, move, bestMove; Depth ttDepth; Value bestValue, value, ttValue, futilityValue, futilityBase, oldAlpha; - bool ttHit, pvHit, givesCheck, captureOrPromotion; + bool pvHit, givesCheck, captureOrPromotion; int moveCount; if (PvNode) @@ -1486,13 +1486,13 @@ moves_loop: // When in check, search starts from here : DEPTH_QS_NO_CHECKS; // Transposition table lookup posKey = pos.key(); - tte = TT.probe(posKey, ttHit); - ttValue = ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE; - ttMove = ttHit ? tte->move() : MOVE_NONE; - pvHit = ttHit && tte->is_pv(); + tte = TT.probe(posKey, ss->ttHit); + ttValue = ss->ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE; + ttMove = ss->ttHit ? tte->move() : MOVE_NONE; + pvHit = ss->ttHit && tte->is_pv(); if ( !PvNode - && ttHit + && ss->ttHit && tte->depth() >= ttDepth && ttValue != VALUE_NONE // Only in case of TT access race && (ttValue >= beta ? (tte->bound() & BOUND_LOWER) @@ -1507,7 +1507,7 @@ moves_loop: // When in check, search starts from here } else { - if (ttHit) + if (ss->ttHit) { // Never assume anything about values stored in TT if ((ss->staticEval = bestValue = tte->eval()) == VALUE_NONE) @@ -1526,7 +1526,7 @@ moves_loop: // When in check, search starts from here // Stand pat. Return immediately if static value is at least beta if (bestValue >= beta) { - if (!ttHit) + if (!ss->ttHit) Cluster::save(thisThread, tte, posKey, value_to_tt(bestValue, ss->ply), false, BOUND_LOWER, DEPTH_NONE, MOVE_NONE, ss->staticEval); @@ -1571,6 +1571,10 @@ moves_loop: // When in check, search starts from here { assert(type_of(move) != ENPASSANT); // Due to !pos.advanced_pawn_push + // moveCount pruning + if (moveCount > 2) + continue; + futilityValue = futilityBase + PieceValue[EG][pos.piece_on(to_sq(move))]; if (futilityValue <= alpha) @@ -1587,7 +1591,9 @@ moves_loop: // When in check, search starts from here } // Do not search moves with negative SEE values - if ( !ss->inCheck && !pos.see_ge(move)) + if ( !ss->inCheck + && !(givesCheck && pos.is_discovery_check_on_king(~pos.side_to_move(), move)) + && !pos.see_ge(move)) continue; // Speculative prefetch as early as possible @@ -1606,6 +1612,12 @@ moves_loop: // When in check, search starts from here [pos.moved_piece(move)] [to_sq(move)]; + if ( !captureOrPromotion + && moveCount + && (*contHist[0])[pos.moved_piece(move)][to_sq(move)] < CounterMovePruneThreshold + && (*contHist[1])[pos.moved_piece(move)][to_sq(move)] < CounterMovePruneThreshold) + continue; + // Make and search the move pos.do_move(move, st, givesCheck); value = -qsearch(pos, ss+1, -beta, -alpha, depth - 1); @@ -1734,8 +1746,8 @@ moves_loop: // When in check, search starts from here else captureHistory[moved_piece][to_sq(bestMove)][captured] << bonus1; - // Extra penalty for a quiet TT or main killer move in previous ply when it gets refuted - if ( ((ss-1)->moveCount == 1 || ((ss-1)->currentMove == (ss-1)->killers[0])) + // Extra penalty for a quiet early move that was not a TT move or main killer move in previous ply when it gets refuted + if ( ((ss-1)->moveCount == 1 + (ss-1)->ttHit || ((ss-1)->currentMove == (ss-1)->killers[0])) && !pos.captured_piece()) update_continuation_histories(ss-1, pos.piece_on(prevSq), prevSq, -bonus1); @@ -1881,12 +1893,15 @@ string UCI::pv(const Position& pos, Depth depth, Value alpha, Value beta) { { bool updated = rootMoves[i].score != -VALUE_INFINITE; - if (depth == 1 && !updated) + if (depth == 1 && !updated && i > 0) continue; - Depth d = updated ? depth : depth - 1; + Depth d = updated ? depth : std::max(1, depth - 1); Value v = updated ? rootMoves[i].score : rootMoves[i].previousScore; + if (v == -VALUE_INFINITE) + v = VALUE_ZERO; + bool tb = TB::RootInTB && abs(v) < VALUE_MATE_IN_MAX_PLY; v = tb ? rootMoves[i].tbScore : v; @@ -1984,7 +1999,7 @@ void Tablebases::rank_root_moves(Position& pos, Search::RootMoves& rootMoves) { if (RootInTB) { // Sort moves according to TB rank - std::sort(rootMoves.begin(), rootMoves.end(), + std::stable_sort(rootMoves.begin(), rootMoves.end(), [](const RootMove &a, const RootMove &b) { return a.tbRank > b.tbRank; } ); // Probe during search only if DTZ is not available and we are winning diff --git a/src/search.h b/src/search.h index 601ccf82..fb42d5e1 100644 --- a/src/search.h +++ b/src/search.h @@ -49,6 +49,8 @@ struct Stack { int statScore; int moveCount; bool inCheck; + bool ttPv; + bool ttHit; }; diff --git a/src/syzygy/tbprobe.cpp b/src/syzygy/tbprobe.cpp index eefb173c..39535c68 100644 --- a/src/syzygy/tbprobe.cpp +++ b/src/syzygy/tbprobe.cpp @@ -224,7 +224,9 @@ public: *mapping = statbuf.st_size; *baseAddress = mmap(nullptr, statbuf.st_size, PROT_READ, MAP_SHARED, fd, 0); +#if defined(MADV_RANDOM) madvise(*baseAddress, statbuf.st_size, MADV_RANDOM); +#endif ::close(fd); if (*baseAddress == MAP_FAILED) @@ -759,7 +761,7 @@ Ret do_probe_table(const Position& pos, T* entry, WDLScore wdl, ProbeState* resu if (entry->hasPawns) { idx = LeadPawnIdx[leadPawnsCnt][squares[0]]; - std::sort(squares + 1, squares + leadPawnsCnt, pawns_comp); + std::stable_sort(squares + 1, squares + leadPawnsCnt, pawns_comp); for (int i = 1; i < leadPawnsCnt; ++i) idx += Binomial[i][MapPawns[squares[i]]]; @@ -860,7 +862,7 @@ encode_remaining: while (d->groupLen[++next]) { - std::sort(groupSq, groupSq + d->groupLen[next]); + std::stable_sort(groupSq, groupSq + d->groupLen[next]); uint64_t n = 0; // Map down a square if "comes later" than a square in the previous diff --git a/src/thread.cpp b/src/thread.cpp index 2c1242ee..c96c1b60 100644 --- a/src/thread.cpp +++ b/src/thread.cpp @@ -51,17 +51,6 @@ Thread::~Thread() { } -/// Thread::bestMoveCount(Move move) return best move counter for the given root move - -int Thread::best_move_count(Move move) const { - - auto rm = std::find(rootMoves.begin() + pvIdx, - rootMoves.begin() + pvLast, move); - - return rm != rootMoves.begin() + pvLast ? rm->bestMoveCount : 0; -} - - /// Thread::clear() reset histories, usually before a new game void Thread::clear() { diff --git a/src/thread.h b/src/thread.h index 6927c6c5..62b51a1b 100644 --- a/src/thread.h +++ b/src/thread.h @@ -55,7 +55,6 @@ public: void idle_loop(); void start_searching(); void wait_for_search_finished(); - int best_move_count(Move move) const; Pawns::Table pawnsTable; Material::Table materialTable; diff --git a/src/thread_win32_osx.h b/src/thread_win32_osx.h index c4b55a48..75ef5d9a 100644 --- a/src/thread_win32_osx.h +++ b/src/thread_win32_osx.h @@ -27,7 +27,7 @@ /// The implementation calls pthread_create() with the stack size parameter /// equal to the linux 8MB default, on platforms that support it. -#if defined(__APPLE__) || defined(__MINGW32__) || defined(__MINGW64__) +#if defined(__APPLE__) || defined(__MINGW32__) || defined(__MINGW64__) || defined(USE_PTHREADS) #include diff --git a/src/timeman.cpp b/src/timeman.cpp index df4ba9b2..6d9c95ef 100644 --- a/src/timeman.cpp +++ b/src/timeman.cpp @@ -38,9 +38,9 @@ void TimeManagement::init(Search::LimitsType& limits, Color us, int ply) { TimePoint slowMover = TimePoint(Options["Slow Mover"]); TimePoint npmsec = TimePoint(Options["nodestime"]); - // opt_scale is a percentage of available time to use for the current move. - // max_scale is a multiplier applied to optimumTime. - double opt_scale, max_scale; + // optScale is a percentage of available time to use for the current move. + // maxScale is a multiplier applied to optimumTime. + double optScale, maxScale; // If we have to play in 'nodes as time' mode, then convert from time // to nodes, and use resulting values in time management formulas. @@ -75,22 +75,22 @@ void TimeManagement::init(Search::LimitsType& limits, Color us, int ply) { // game time for the current move, so also cap to 20% of available game time. if (limits.movestogo == 0) { - opt_scale = std::min(0.008 + std::pow(ply + 3.0, 0.5) / 250.0, + optScale = std::min(0.008 + std::pow(ply + 3.0, 0.5) / 250.0, 0.2 * limits.time[us] / double(timeLeft)); - max_scale = std::min(7.0, 4.0 + ply / 12.0); + maxScale = std::min(7.0, 4.0 + ply / 12.0); } // x moves in y seconds (+ z increment) else { - opt_scale = std::min((0.8 + ply / 128.0) / mtg, + optScale = std::min((0.8 + ply / 128.0) / mtg, 0.8 * limits.time[us] / double(timeLeft)); - max_scale = std::min(6.3, 1.5 + 0.11 * mtg); + maxScale = std::min(6.3, 1.5 + 0.11 * mtg); } // Never use more than 80% of the available time for this move - optimumTime = TimePoint(opt_scale * timeLeft); - maximumTime = TimePoint(std::min(0.8 * limits.time[us] - moveOverhead, max_scale * optimumTime)); + optimumTime = TimePoint(optScale * timeLeft); + maximumTime = TimePoint(std::min(0.8 * limits.time[us] - moveOverhead, maxScale * optimumTime)); if (Options["Ponder"]) optimumTime += optimumTime / 4; diff --git a/src/tt.cpp b/src/tt.cpp index d494c27d..60a3a5f1 100644 --- a/src/tt.cpp +++ b/src/tt.cpp @@ -37,18 +37,19 @@ void TTEntry::save(Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev) if (m || (uint16_t)k != key16) move16 = (uint16_t)m; - // Overwrite less valuable entries - if ((uint16_t)k != key16 - || d - DEPTH_OFFSET > depth8 - 4 - || b == BOUND_EXACT) + // Overwrite less valuable entries (cheapest checks first) + if (b == BOUND_EXACT + || (uint16_t)k != key16 + || d - DEPTH_OFFSET > depth8 - 4) { - assert(d >= DEPTH_OFFSET); + assert(d > DEPTH_OFFSET); + assert(d < 256 + DEPTH_OFFSET); key16 = (uint16_t)k; + depth8 = (uint8_t)(d - DEPTH_OFFSET); + genBound8 = (uint8_t)(TT.generation8 | uint8_t(pv) << 2 | b); value16 = (int16_t)v; eval16 = (int16_t)ev; - genBound8 = (uint8_t)(TT.generation8 | uint8_t(pv) << 2 | b); - depth8 = (uint8_t)(d - DEPTH_OFFSET); } } @@ -119,11 +120,11 @@ TTEntry* TranspositionTable::probe(const Key key, bool& found) const { const uint16_t key16 = (uint16_t)key; // Use the low 16 bits as key inside the cluster for (int i = 0; i < ClusterSize; ++i) - if (!tte[i].key16 || tte[i].key16 == key16) + if (tte[i].key16 == key16 || !tte[i].depth8) { tte[i].genBound8 = uint8_t(generation8 | (tte[i].genBound8 & 0x7)); // Refresh - return found = (bool)tte[i].key16, &tte[i]; + return found = (bool)tte[i].depth8, &tte[i]; } // Find an entry to be replaced according to the replacement strategy @@ -149,7 +150,7 @@ int TranspositionTable::hashfull() const { int cnt = 0; for (int i = 0; i < 1000; ++i) for (int j = 0; j < ClusterSize; ++j) - cnt += (table[i].entry[j].genBound8 & 0xF8) == generation8; + cnt += table[i].entry[j].depth8 && (table[i].entry[j].genBound8 & 0xF8) == generation8; return cnt / ClusterSize; } diff --git a/src/tt.h b/src/tt.h index 13a87112..ce7234e2 100644 --- a/src/tt.h +++ b/src/tt.h @@ -30,13 +30,13 @@ namespace Cluster { /// TTEntry struct is the 10 bytes transposition table entry, defined as below: /// /// key 16 bit -/// move 16 bit -/// value 16 bit -/// eval value 16 bit +/// depth 8 bit /// generation 5 bit /// pv node 1 bit /// bound type 2 bit -/// depth 8 bit +/// move 16 bit +/// value 16 bit +/// eval value 16 bit struct TTEntry { @@ -53,11 +53,11 @@ private: friend void Cluster::init(); uint16_t key16; + uint8_t depth8; + uint8_t genBound8; uint16_t move16; int16_t value16; int16_t eval16; - uint8_t genBound8; - uint8_t depth8; }; diff --git a/src/types.h b/src/types.h index 73da41e2..5873c698 100644 --- a/src/types.h +++ b/src/types.h @@ -201,22 +201,6 @@ enum Piece { PIECE_NB = 16 }; -// An ID used to track the pieces. Max. 32 pieces on board. -enum PieceId { - PIECE_ID_ZERO = 0, - PIECE_ID_KING = 30, - PIECE_ID_WKING = 30, - PIECE_ID_BKING = 31, - PIECE_ID_NONE = 32 -}; - -inline PieceId operator++(PieceId& d, int) { - - PieceId x = d; - d = PieceId(int(d) + 1); - return x; -} - constexpr Value PieceValue[PHASE_NB][PIECE_NB] = { { VALUE_ZERO, PawnValueMg, KnightValueMg, BishopValueMg, RookValueMg, QueenValueMg, VALUE_ZERO, VALUE_ZERO, VALUE_ZERO, PawnValueMg, KnightValueMg, BishopValueMg, RookValueMg, QueenValueMg, VALUE_ZERO, VALUE_ZERO }, @@ -232,7 +216,8 @@ enum : int { DEPTH_QS_RECAPTURES = -5, DEPTH_NONE = -6, - DEPTH_OFFSET = DEPTH_NONE + + DEPTH_OFFSET = -7 // value used only for TT entry occupancy check }; enum Square : int { @@ -270,93 +255,20 @@ enum Rank : int { RANK_1, RANK_2, RANK_3, RANK_4, RANK_5, RANK_6, RANK_7, RANK_8, RANK_NB }; -// unique number for each piece type on each square -enum PieceSquare : uint32_t { - PS_NONE = 0, - PS_W_PAWN = 1, - PS_B_PAWN = 1 * SQUARE_NB + 1, - PS_W_KNIGHT = 2 * SQUARE_NB + 1, - PS_B_KNIGHT = 3 * SQUARE_NB + 1, - PS_W_BISHOP = 4 * SQUARE_NB + 1, - PS_B_BISHOP = 5 * SQUARE_NB + 1, - PS_W_ROOK = 6 * SQUARE_NB + 1, - PS_B_ROOK = 7 * SQUARE_NB + 1, - PS_W_QUEEN = 8 * SQUARE_NB + 1, - PS_B_QUEEN = 9 * SQUARE_NB + 1, - PS_W_KING = 10 * SQUARE_NB + 1, - PS_END = PS_W_KING, // pieces without kings (pawns included) - PS_B_KING = 11 * SQUARE_NB + 1, - PS_END2 = 12 * SQUARE_NB + 1 -}; - -struct ExtPieceSquare { - PieceSquare from[COLOR_NB]; -}; - -// Array for finding the PieceSquare corresponding to the piece on the board -extern ExtPieceSquare kpp_board_index[PIECE_NB]; - -constexpr bool is_ok(PieceId pid); -constexpr Square rotate180(Square sq); - -// Structure holding which tracked piece (PieceId) is where (PieceSquare) -class EvalList { - -public: - // Max. number of pieces without kings is 30 but must be a multiple of 4 in AVX2 - static const int MAX_LENGTH = 32; - - // Array that holds the piece id for the pieces on the board - PieceId piece_id_list[SQUARE_NB]; - - // List of pieces, separate from White and Black POV - PieceSquare* piece_list_fw() const { return const_cast(pieceListFw); } - PieceSquare* piece_list_fb() const { return const_cast(pieceListFb); } - - // Place the piece pc with piece_id on the square sq on the board - void put_piece(PieceId piece_id, Square sq, Piece pc) - { - assert(is_ok(piece_id)); - if (pc != NO_PIECE) - { - pieceListFw[piece_id] = PieceSquare(kpp_board_index[pc].from[WHITE] + sq); - pieceListFb[piece_id] = PieceSquare(kpp_board_index[pc].from[BLACK] + rotate180(sq)); - piece_id_list[sq] = piece_id; - } - else - { - pieceListFw[piece_id] = PS_NONE; - pieceListFb[piece_id] = PS_NONE; - piece_id_list[sq] = piece_id; - } - } - - // Convert the specified piece_id piece to ExtPieceSquare type and return it - ExtPieceSquare piece_with_id(PieceId piece_id) const - { - ExtPieceSquare eps; - eps.from[WHITE] = pieceListFw[piece_id]; - eps.from[BLACK] = pieceListFb[piece_id]; - return eps; - } - -private: - PieceSquare pieceListFw[MAX_LENGTH]; - PieceSquare pieceListFb[MAX_LENGTH]; -}; - -// For differential evaluation of pieces that changed since last turn +// Keep track of what a move changes on the board (used by NNUE) struct DirtyPiece { // Number of changed pieces int dirty_num; - // The ids of changed pieces, max. 2 pieces can change in one move - PieceId pieceId[2]; + // Max 3 pieces can change in one move. A promotion with capture moves + // both the pawn and the captured piece to SQ_NONE and the piece promoted + // to from SQ_NONE to the capture square. + Piece piece[3]; - // What changed from the piece with that piece number - ExtPieceSquare old_piece[2]; - ExtPieceSquare new_piece[2]; + // From and to squares, which may be SQ_NONE + Square from[3]; + Square to[3]; }; /// Score enum stores a middlegame and an endgame value in a single integer (enum). @@ -406,8 +318,6 @@ ENABLE_FULL_OPERATORS_ON(Value) ENABLE_FULL_OPERATORS_ON(Direction) ENABLE_INCR_OPERATORS_ON(Piece) -ENABLE_INCR_OPERATORS_ON(PieceSquare) -ENABLE_INCR_OPERATORS_ON(PieceId) ENABLE_INCR_OPERATORS_ON(PieceType) ENABLE_INCR_OPERATORS_ON(Square) ENABLE_INCR_OPERATORS_ON(File) @@ -496,10 +406,6 @@ inline Color color_of(Piece pc) { return Color(pc >> 3); } -constexpr bool is_ok(PieceId pid) { - return pid < PIECE_ID_NONE; -} - constexpr bool is_ok(Square s) { return s >= SQ_A1 && s <= SQ_H8; } @@ -536,11 +442,6 @@ constexpr Square to_sq(Move m) { return Square(m & 0x3F); } -// Return relative square when turning the board 180 degrees -constexpr Square rotate180(Square sq) { - return (Square)(sq ^ 0x3F); -} - constexpr int from_to(Move m) { return m & 0xFFF; } diff --git a/src/uci.cpp b/src/uci.cpp index 19981b35..3b4240b4 100644 --- a/src/uci.cpp +++ b/src/uci.cpp @@ -172,7 +172,8 @@ namespace { if (token == "go" || token == "eval") { if (Cluster::is_root()) - cerr << "\nPosition: " << cnt++ << '/' << num << endl; + cerr << "\nPosition: " << cnt++ << '/' << num << " (" << pos.fen() << ")" << endl; + if (token == "go") { go(pos, is, states); @@ -214,7 +215,7 @@ namespace { double b = (((bs[0] * m + bs[1]) * m + bs[2]) * m) + bs[3]; // Transform eval to centipawns with limited range - double x = Utility::clamp(double(100 * v) / PawnValueEg, -1000.0, 1000.0); + double x = std::clamp(double(100 * v) / PawnValueEg, -1000.0, 1000.0); // Return win rate in per mille (rounded to nearest) return int(0.5 + 1000 / (1 + std::exp((a - x) / b))); diff --git a/src/ucioption.cpp b/src/ucioption.cpp index 2b66a475..5e747a7f 100644 --- a/src/ucioption.cpp +++ b/src/ucioption.cpp @@ -21,6 +21,7 @@ #include #include +#include "evaluate.h" #include "misc.h" #include "search.h" #include "thread.h" @@ -78,10 +79,8 @@ void init(OptionsMap& o) { o["SyzygyProbeDepth"] << Option(1, 1, 100); o["Syzygy50MoveRule"] << Option(true); o["SyzygyProbeLimit"] << Option(7, 0, 7); - o["Use NNUE"] << Option(false, on_use_NNUE); - // The default must follow the format nn-[SHA256 first 12 digits].nnue - // for the build process (profile-build and fishtest) to work. - o["EvalFile"] << Option("nn-82215d0fd0df.nnue", on_eval_file); + o["Use NNUE"] << Option(true, on_use_NNUE); + o["EvalFile"] << Option(EvalFileDefaultName, on_eval_file); }