1
0
Fork 0
mirror of https://github.com/sockspls/badfish synced 2025-04-29 16:23:09 +00:00

erge remote-tracking branch 'upstream/master'

This commit is contained in:
Kate 2024-12-14 11:28:13 +00:00
commit 7924defd98
70 changed files with 6597 additions and 2770 deletions

View file

@ -7,6 +7,7 @@
{ include: [ "<__fwd/sstream.h>", private, "<iosfwd>", public ] }, { include: [ "<__fwd/sstream.h>", private, "<iosfwd>", public ] },
{ include: [ "<__fwd/streambuf.h>", private, "<iosfwd>", public ] }, { include: [ "<__fwd/streambuf.h>", private, "<iosfwd>", public ] },
{ include: [ "<__fwd/string_view.h>", private, "<string_view>", public ] }, { include: [ "<__fwd/string_view.h>", private, "<string_view>", public ] },
{ include: [ "<__system_error/errc.h>", private, "<system_error>", public ] },
# Mappings for includes between public headers # Mappings for includes between public headers
{ include: [ "<ios>", public, "<iostream>", public ] }, { include: [ "<ios>", public, "<iostream>", public ] },

View file

@ -26,6 +26,7 @@ jobs:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with: with:
fetch-depth: 0 fetch-depth: 0
persist-credentials: false
- name: Download required linux packages - name: Download required linux packages
if: runner.os == 'Linux' if: runner.os == 'Linux'
@ -91,4 +92,7 @@ jobs:
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v4
with: with:
name: ${{ matrix.config.simple_name }} ${{ matrix.binaries }} name: ${{ matrix.config.simple_name }} ${{ matrix.binaries }}
path: . path: |
.
!.git
!.output

View file

@ -11,6 +11,10 @@ on:
paths: paths:
- "**.cpp" - "**.cpp"
- "**.h" - "**.h"
permissions:
pull-requests: write
jobs: jobs:
Clang-Format: Clang-Format:
name: Clang-Format name: Clang-Format
@ -25,7 +29,7 @@ jobs:
id: clang-format id: clang-format
continue-on-error: true continue-on-error: true
with: with:
clang-format-version: "17" clang-format-version: "18"
exclude-regex: "incbin" exclude-regex: "incbin"
- name: Comment on PR - name: Comment on PR
@ -33,12 +37,13 @@ jobs:
uses: thollander/actions-comment-pull-request@fabd468d3a1a0b97feee5f6b9e499eab0dd903f6 # @v2.5.0 uses: thollander/actions-comment-pull-request@fabd468d3a1a0b97feee5f6b9e499eab0dd903f6 # @v2.5.0
with: with:
message: | message: |
clang-format 17 needs to be run on this PR. clang-format 18 needs to be run on this PR.
If you do not have clang-format installed, the maintainer will run it when merging. If you do not have clang-format installed, the maintainer will run it when merging.
For the exact version please see https://packages.ubuntu.com/mantic/clang-format-17. For the exact version please see https://packages.ubuntu.com/noble/clang-format-18.
_(execution **${{ github.run_id }}** / attempt **${{ github.run_attempt }}**)_ _(execution **${{ github.run_id }}** / attempt **${{ github.run_attempt }}**)_
comment_tag: execution comment_tag: execution
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Comment on PR - name: Comment on PR
if: steps.clang-format.outcome != 'failure' if: steps.clang-format.outcome != 'failure'
@ -49,3 +54,4 @@ jobs:
create_if_not_exists: false create_if_not_exists: false
comment_tag: execution comment_tag: execution
mode: delete mode: delete
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

View file

@ -30,6 +30,8 @@ jobs:
steps: steps:
- name: Checkout repository - name: Checkout repository
uses: actions/checkout@v4 uses: actions/checkout@v4
with:
persist-credentials: false
# Initializes the CodeQL tools for scanning. # Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL - name: Initialize CodeQL

View file

@ -25,6 +25,8 @@ jobs:
shell: ${{ matrix.config.shell }} shell: ${{ matrix.config.shell }}
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with:
persist-credentials: false
- name: Install fixed GCC on Linux - name: Install fixed GCC on Linux
if: runner.os == 'Linux' if: runner.os == 'Linux'
@ -86,4 +88,7 @@ jobs:
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v4
with: with:
name: ${{ matrix.config.simple_name }} ${{ matrix.binaries }} name: ${{ matrix.config.simple_name }} ${{ matrix.binaries }}
path: . path: |
.
!.git
!.output

43
.github/workflows/games.yml vendored Normal file
View file

@ -0,0 +1,43 @@
# This workflow will play games with a debug enabled SF using the PR
name: Games
on:
workflow_call:
jobs:
Matetrack:
name: Games
runs-on: ubuntu-22.04
steps:
- name: Checkout SF repo
uses: actions/checkout@v4
with:
ref: ${{ github.event.pull_request.head.sha }}
path: Stockfish
persist-credentials: false
- name: build debug enabled version of SF
working-directory: Stockfish/src
run: make -j build debug=yes
- name: Checkout fast-chess repo
uses: actions/checkout@v4
with:
repository: Disservin/fast-chess
path: fast-chess
ref: d54af1910d5479c669dc731f1f54f9108a251951
persist-credentials: false
- name: fast-chess build
working-directory: fast-chess
run: make -j
- name: Run games
working-directory: fast-chess
run: |
./fast-chess -rounds 4 -games 2 -repeat -concurrency 4 -openings file=app/tests/data/openings.epd format=epd order=random -srand $RANDOM\
-engine name=sf1 cmd=/home/runner/work/Stockfish/Stockfish/Stockfish/src/stockfish\
-engine name=sf2 cmd=/home/runner/work/Stockfish/Stockfish/Stockfish/src/stockfish\
-ratinginterval 1 -report penta=true -each proto=uci tc=4+0.04 -log file=fast.log | tee fast.out
cat fast.log
! grep "Assertion" fast.log > /dev/null
! grep "disconnect" fast.out > /dev/null

View file

@ -14,6 +14,7 @@ jobs:
uses: actions/checkout@v4 uses: actions/checkout@v4
with: with:
path: Stockfish path: Stockfish
persist-credentials: false
- name: Checkout include-what-you-use - name: Checkout include-what-you-use
uses: actions/checkout@v4 uses: actions/checkout@v4
@ -21,6 +22,7 @@ jobs:
repository: include-what-you-use/include-what-you-use repository: include-what-you-use/include-what-you-use
ref: f25caa280dc3277c4086ec345ad279a2463fea0f ref: f25caa280dc3277c4086ec345ad279a2463fea0f
path: include-what-you-use path: include-what-you-use
persist-credentials: false
- name: Download required linux packages - name: Download required linux packages
run: | run: |

54
.github/workflows/matetrack.yml vendored Normal file
View file

@ -0,0 +1,54 @@
# This workflow will run matetrack on the PR
name: Matetrack
on:
workflow_call:
jobs:
Matetrack:
name: Matetrack
runs-on: ubuntu-22.04
steps:
- name: Checkout SF repo
uses: actions/checkout@v4
with:
ref: ${{ github.event.pull_request.head.sha }}
path: Stockfish
persist-credentials: false
- name: build SF
working-directory: Stockfish/src
run: make -j profile-build
- name: Checkout matetrack repo
uses: actions/checkout@v4
with:
repository: vondele/matetrack
path: matetrack
ref: 814160f82e6428ed2f6522dc06c2a6fa539cd413
persist-credentials: false
- name: matetrack install deps
working-directory: matetrack
run: pip install -r requirements.txt
- name: cache syzygy
id: cache-syzygy
uses: actions/cache@v4
with:
path: |
matetrack/3-4-5-wdl/
matetrack/3-4-5-dtz/
key: key-syzygy
- name: download syzygy 3-4-5 if needed
working-directory: matetrack
if: steps.cache-syzygy.outputs.cache-hit != 'true'
run: |
wget --no-verbose -r -nH --cut-dirs=2 --no-parent --reject="index.html*" -e robots=off https://tablebase.lichess.ovh/tables/standard/3-4-5-wdl/
wget --no-verbose -r -nH --cut-dirs=2 --no-parent --reject="index.html*" -e robots=off https://tablebase.lichess.ovh/tables/standard/3-4-5-dtz/
- name: Run matetrack
working-directory: matetrack
run: |
python matecheck.py --syzygyPath 3-4-5-wdl/:3-4-5-dtz/ --engine /home/runner/work/Stockfish/Stockfish/Stockfish/src/stockfish --epdFile mates2000.epd --nodes 100000 | tee matecheckout.out
! grep "issues were detected" matecheckout.out > /dev/null

View file

@ -40,6 +40,8 @@ jobs:
shell: ${{ matrix.config.shell }} shell: ${{ matrix.config.shell }}
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with:
persist-credentials: false
- name: Download required linux packages - name: Download required linux packages
run: | run: |
@ -73,4 +75,4 @@ jobs:
export CXXFLAGS="-O1 -fno-inline" export CXXFLAGS="-O1 -fno-inline"
make clean make clean
make -j4 ARCH=x86-64-sse41-popcnt ${{ matrix.sanitizers.make_option }} debug=yes optimize=no build > /dev/null make -j4 ARCH=x86-64-sse41-popcnt ${{ matrix.sanitizers.make_option }} debug=yes optimize=no build > /dev/null
../tests/instrumented.sh --${{ matrix.sanitizers.instrumented_option }} python3 ../tests/instrumented.py --${{ matrix.sanitizers.instrumented_option }} ./stockfish

View file

@ -15,8 +15,12 @@ jobs:
Prerelease: Prerelease:
if: github.repository == 'official-stockfish/Stockfish' && (github.ref == 'refs/heads/master' || (startsWith(github.ref_name, 'sf_') && github.ref_type == 'tag')) if: github.repository == 'official-stockfish/Stockfish' && (github.ref == 'refs/heads/master' || (startsWith(github.ref_name, 'sf_') && github.ref_type == 'tag'))
runs-on: ubuntu-latest runs-on: ubuntu-latest
permissions:
contents: write # For deleting/creating a prerelease
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with:
persist-credentials: false
# returns null if no pre-release exists # returns null if no pre-release exists
- name: Get Commit SHA of Latest Pre-release - name: Get Commit SHA of Latest Pre-release
@ -66,6 +70,8 @@ jobs:
arm_matrix: ${{ steps.set-arm-matrix.outputs.arm_matrix }} arm_matrix: ${{ steps.set-arm-matrix.outputs.arm_matrix }}
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with:
persist-credentials: false
- id: set-matrix - id: set-matrix
run: | run: |
TASKS=$(echo $(cat .github/ci/matrix.json) ) TASKS=$(echo $(cat .github/ci/matrix.json) )
@ -90,15 +96,27 @@ jobs:
uses: ./.github/workflows/sanitizers.yml uses: ./.github/workflows/sanitizers.yml
Tests: Tests:
uses: ./.github/workflows/tests.yml uses: ./.github/workflows/tests.yml
Matetrack:
uses: ./.github/workflows/matetrack.yml
Games:
uses: ./.github/workflows/games.yml
Binaries: Binaries:
if: github.repository == 'official-stockfish/Stockfish' if: github.repository == 'official-stockfish/Stockfish'
needs: [Matrix, Prerelease, Compilation] needs: [Matrix, Prerelease, Compilation]
uses: ./.github/workflows/upload_binaries.yml uses: ./.github/workflows/upload_binaries.yml
with: with:
matrix: ${{ needs.Matrix.outputs.matrix }} matrix: ${{ needs.Matrix.outputs.matrix }}
permissions:
contents: write # For deleting/creating a (pre)release
secrets:
token: ${{ secrets.GITHUB_TOKEN }}
ARM_Binaries: ARM_Binaries:
if: github.repository == 'official-stockfish/Stockfish' if: github.repository == 'official-stockfish/Stockfish'
needs: [Matrix, Prerelease, ARMCompilation] needs: [Matrix, Prerelease, ARMCompilation]
uses: ./.github/workflows/upload_binaries.yml uses: ./.github/workflows/upload_binaries.yml
with: with:
matrix: ${{ needs.Matrix.outputs.arm_matrix }} matrix: ${{ needs.Matrix.outputs.arm_matrix }}
permissions:
contents: write # For deleting/creating a (pre)release
secrets:
token: ${{ secrets.GITHUB_TOKEN }}

View file

@ -106,6 +106,7 @@ jobs:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with: with:
fetch-depth: 0 fetch-depth: 0
persist-credentials: false
- name: Download required linux packages - name: Download required linux packages
if: runner.os == 'Linux' if: runner.os == 'Linux'
@ -138,16 +139,16 @@ jobs:
- name: Build Docker container - name: Build Docker container
if: matrix.config.base_image if: matrix.config.base_image
run: | run: |
docker buildx build --load -t sf_builder - << EOF docker buildx build --platform ${{ matrix.config.platform }} --load -t sf_builder - << EOF
FROM ${{ matrix.config.base_image }} FROM ${{ matrix.config.base_image }}
WORKDIR /app WORKDIR /app
RUN apk update && apk add make g++ RUN apk update && apk add make g++
CMD ["sh", "script.sh"] CMD ["sh", "src/script.sh"]
EOF EOF
- name: Download required macOS packages - name: Download required macOS packages
if: runner.os == 'macOS' if: runner.os == 'macOS'
run: brew install coreutils run: brew install coreutils gcc@11
- name: Setup msys and install required packages - name: Setup msys and install required packages
if: runner.os == 'Windows' if: runner.os == 'Windows'
@ -175,7 +176,7 @@ jobs:
$COMPCXX -v $COMPCXX -v
else else
echo "$COMPCXX -v" > script.sh echo "$COMPCXX -v" > script.sh
docker run --rm --platform ${{ matrix.config.platform }} -v ${{ github.workspace }}/src:/app sf_builder docker run --rm --platform ${{ matrix.config.platform }} -v ${{ github.workspace }}:/app sf_builder
fi fi
- name: Test help target - name: Test help target
@ -341,8 +342,8 @@ jobs:
- name: Test riscv64 build - name: Test riscv64 build
if: matrix.config.run_riscv64_tests if: matrix.config.run_riscv64_tests
run: | run: |
echo "export LDFLAGS='-static' && make clean && make -j4 ARCH=riscv64 build" > script.sh echo "cd src && export LDFLAGS='-static' && make clean && make -j4 ARCH=riscv64 build" > script.sh
docker run --rm --platform ${{ matrix.config.platform }} -v ${{ github.workspace }}/src:/app sf_builder docker run --rm --platform ${{ matrix.config.platform }} -v ${{ github.workspace }}:/app sf_builder
../tests/signature.sh $benchref ../tests/signature.sh $benchref
# ppc64 tests # ppc64 tests
@ -350,8 +351,8 @@ jobs:
- name: Test ppc64 build - name: Test ppc64 build
if: matrix.config.run_ppc64_tests if: matrix.config.run_ppc64_tests
run: | run: |
echo "export LDFLAGS='-static' && make clean && make -j4 ARCH=ppc-64 build" > script.sh echo "cd src && export LDFLAGS='-static' && make clean && make -j4 ARCH=ppc-64 build" > script.sh
docker run --rm --platform ${{ matrix.config.platform }} -v ${{ github.workspace }}/src:/app sf_builder docker run --rm --platform ${{ matrix.config.platform }} -v ${{ github.workspace }}:/app sf_builder
../tests/signature.sh $benchref ../tests/signature.sh $benchref
# Other tests # Other tests

View file

@ -5,6 +5,9 @@ on:
matrix: matrix:
type: string type: string
required: true required: true
secrets:
token:
required: true
jobs: jobs:
Artifacts: Artifacts:
@ -25,6 +28,8 @@ jobs:
shell: ${{ matrix.config.shell }} shell: ${{ matrix.config.shell }}
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with:
persist-credentials: false
- name: Download artifact from compilation - name: Download artifact from compilation
uses: actions/download-artifact@v4 uses: actions/download-artifact@v4
@ -54,6 +59,7 @@ jobs:
mv "${{ matrix.config.simple_name }} ${{ matrix.binaries }}" stockfish-workflow mv "${{ matrix.config.simple_name }} ${{ matrix.binaries }}" stockfish-workflow
cd stockfish-workflow cd stockfish-workflow
cp -r src ../stockfish/ cp -r src ../stockfish/
cp -r scripts ../stockfish/
cp stockfish-$NAME-$BINARY$EXT ../stockfish/ cp stockfish-$NAME-$BINARY$EXT ../stockfish/
cp "Top CPU Contributors.txt" ../stockfish/ cp "Top CPU Contributors.txt" ../stockfish/
cp Copying.txt ../stockfish/ cp Copying.txt ../stockfish/
@ -78,6 +84,7 @@ jobs:
uses: softprops/action-gh-release@4634c16e79c963813287e889244c50009e7f0981 uses: softprops/action-gh-release@4634c16e79c963813287e889244c50009e7f0981
with: with:
files: stockfish-${{ matrix.config.simple_name }}-${{ matrix.binaries }}.${{ matrix.config.archive_ext }} files: stockfish-${{ matrix.config.simple_name }}-${{ matrix.binaries }}.${{ matrix.config.archive_ext }}
token: ${{ secrets.token }}
- name: Get last commit sha - name: Get last commit sha
id: last_commit id: last_commit
@ -104,3 +111,4 @@ jobs:
tag_name: stockfish-dev-${{ env.COMMIT_DATE }}-${{ env.COMMIT_SHA }} tag_name: stockfish-dev-${{ env.COMMIT_DATE }}-${{ env.COMMIT_SHA }}
prerelease: true prerelease: true
files: stockfish-${{ matrix.config.simple_name }}-${{ matrix.binaries }}.${{ matrix.config.archive_ext }} files: stockfish-${{ matrix.config.simple_name }}-${{ matrix.binaries }}.${{ matrix.config.archive_ext }}
token: ${{ secrets.token }}

5
.gitignore vendored
View file

@ -10,3 +10,8 @@ src/-lstdc++.res
# Neural network for the NNUE evaluation # Neural network for the NNUE evaluation
**/*.nnue **/*.nnue
# Files generated by the instrumented tests
tsan.supp
__pycache__/
tests/syzygy
tests/bench_tmp.epd

View file

@ -20,6 +20,7 @@ Alexander Kure
Alexander Pagel (Lolligerhans) Alexander Pagel (Lolligerhans)
Alfredo Menezes (lonfom169) Alfredo Menezes (lonfom169)
Ali AlZhrani (Cooffe) Ali AlZhrani (Cooffe)
Andreas Jan van der Meulen (Andyson007)
Andreas Matthies (Matthies) Andreas Matthies (Matthies)
Andrei Vetrov (proukornew) Andrei Vetrov (proukornew)
Andrew Grant (AndyGrant) Andrew Grant (AndyGrant)
@ -44,6 +45,7 @@ Bruno de Melo Costa (BM123499)
Bruno Pellanda (pellanda) Bruno Pellanda (pellanda)
Bryan Cross (crossbr) Bryan Cross (crossbr)
candirufish candirufish
Carlos Esparza Sánchez (ces42)
Chess13234 Chess13234
Chris Cain (ceebo) Chris Cain (ceebo)
Ciekce Ciekce
@ -68,9 +70,11 @@ Douglas Matos Gomes (dsmsgms)
Dubslow Dubslow
Eduardo Cáceres (eduherminio) Eduardo Cáceres (eduherminio)
Eelco de Groot (KingDefender) Eelco de Groot (KingDefender)
Ehsan Rashid (erashid)
Elvin Liu (solarlight2) Elvin Liu (solarlight2)
erbsenzaehler erbsenzaehler
Ernesto Gatti Ernesto Gatti
evqsx
Fabian Beuke (madnight) Fabian Beuke (madnight)
Fabian Fichter (ianfab) Fabian Fichter (ianfab)
Fanael Linithien (Fanael) Fanael Linithien (Fanael)
@ -127,6 +131,7 @@ Kojirion
Krystian Kuzniarek (kuzkry) Krystian Kuzniarek (kuzkry)
Leonardo Ljubičić (ICCF World Champion) Leonardo Ljubičić (ICCF World Champion)
Leonid Pechenik (lp--) Leonid Pechenik (lp--)
Li Ying (yl25946)
Liam Keegan (lkeegan) Liam Keegan (lkeegan)
Linmiao Xu (linrock) Linmiao Xu (linrock)
Linus Arver (listx) Linus Arver (listx)
@ -139,6 +144,7 @@ Maciej Żenczykowski (zenczykowski)
Malcolm Campbell (xoto10) Malcolm Campbell (xoto10)
Mark Tenzer (31m059) Mark Tenzer (31m059)
marotear marotear
Mathias Parnaudeau (mparnaudeau)
Matt Ginsberg (mattginsberg) Matt Ginsberg (mattginsberg)
Matthew Lai (matthewlai) Matthew Lai (matthewlai)
Matthew Sullivan (Matt14916) Matthew Sullivan (Matt14916)
@ -167,10 +173,12 @@ Niklas Fiekas (niklasf)
Nikolay Kostov (NikolayIT) Nikolay Kostov (NikolayIT)
Norman Schmidt (FireFather) Norman Schmidt (FireFather)
notruck notruck
Nour Berakdar (Nonlinear)
Ofek Shochat (OfekShochat, ghostway) Ofek Shochat (OfekShochat, ghostway)
Ondrej Mosnáček (WOnder93) Ondrej Mosnáček (WOnder93)
Ondřej Mišina (AndrovT) Ondřej Mišina (AndrovT)
Oskar Werkelin Ahlin Oskar Werkelin Ahlin
Ömer Faruk Tutkun (OmerFarukTutkun)
Pablo Vazquez Pablo Vazquez
Panthee Panthee
Pascal Romaret Pascal Romaret
@ -232,6 +240,7 @@ Unai Corzo (unaiic)
Uri Blass (uriblass) Uri Blass (uriblass)
Vince Negri (cuddlestmonkey) Vince Negri (cuddlestmonkey)
Viren Viren
Wencey Wang
windfishballad windfishballad
xefoci7612 xefoci7612
Xiang Wang (KatyushaScarlet) Xiang Wang (KatyushaScarlet)

View file

@ -59,7 +59,7 @@ discussion._
Changes to Stockfish C++ code should respect our coding style defined by Changes to Stockfish C++ code should respect our coding style defined by
[.clang-format](.clang-format). You can format your changes by running [.clang-format](.clang-format). You can format your changes by running
`make format`. This requires clang-format version 17 to be installed on your system. `make format`. This requires clang-format version 18 to be installed on your system.
## Navigate ## Navigate

View file

@ -99,7 +99,7 @@ descriptions. An example suitable for most Intel and AMD chips:
``` ```
cd src cd src
make -j profile-build ARCH=x86-64-avx2 make -j profile-build
``` ```
Detailed compilation instructions for all platforms can be found in our Detailed compilation instructions for all platforms can be found in our
@ -122,6 +122,11 @@ where the source code can be found) to generate the exact binary you are
distributing. If you make any changes to the source code, these changes must distributing. If you make any changes to the source code, these changes must
also be made available under GPL v3. also be made available under GPL v3.
## Acknowledgements
Stockfish uses neural networks trained on [data provided by the Leela Chess Zero
project][lc0-data-link], which is made available under the [Open Database License][odbl-link] (ODbL).
[authors-link]: https://github.com/official-stockfish/Stockfish/blob/master/AUTHORS [authors-link]: https://github.com/official-stockfish/Stockfish/blob/master/AUTHORS
[build-link]: https://github.com/official-stockfish/Stockfish/actions/workflows/stockfish.yml [build-link]: https://github.com/official-stockfish/Stockfish/actions/workflows/stockfish.yml
@ -146,6 +151,8 @@ also be made available under GPL v3.
[wiki-uci-link]: https://github.com/official-stockfish/Stockfish/wiki/UCI-&-Commands [wiki-uci-link]: https://github.com/official-stockfish/Stockfish/wiki/UCI-&-Commands
[wiki-usage-link]: https://github.com/official-stockfish/Stockfish/wiki/Download-and-usage [wiki-usage-link]: https://github.com/official-stockfish/Stockfish/wiki/Download-and-usage
[worker-link]: https://github.com/official-stockfish/fishtest/wiki/Running-the-worker [worker-link]: https://github.com/official-stockfish/fishtest/wiki/Running-the-worker
[lc0-data-link]: https://storage.lczero.org/files/training_data
[odbl-link]: https://opendatacommons.org/licenses/odbl/odbl-10.txt
[build-badge]: https://img.shields.io/github/actions/workflow/status/official-stockfish/Stockfish/stockfish.yml?branch=master&style=for-the-badge&label=stockfish&logo=github [build-badge]: https://img.shields.io/github/actions/workflow/status/official-stockfish/Stockfish/stockfish.yml?branch=master&style=for-the-badge&label=stockfish&logo=github
[commits-badge]: https://img.shields.io/github/commits-since/official-stockfish/Stockfish/latest?style=for-the-badge [commits-badge]: https://img.shields.io/github/commits-since/official-stockfish/Stockfish/latest?style=for-the-badge

View file

@ -1,106 +1,109 @@
Contributors to Fishtest with >10,000 CPU hours, as of 2024-02-24. Contributors to Fishtest with >10,000 CPU hours, as of 2024-08-31.
Thank you! Thank you!
Username CPU Hours Games played Username CPU Hours Games played
------------------------------------------------------------------ ------------------------------------------------------------------
noobpwnftw 39302472 3055513453 noobpwnftw 40428649 3164740143
technologov 20845762 994893444 technologov 23581394 1076895482
linrock 8616428 560281417 vdv 19425375 718302718
linrock 10034115 643194527
mlang 3026000 200065824 mlang 3026000 200065824
okrout 2332151 222639518 okrout 2572676 237511408
pemo 1800019 60274069 pemo 1836785 62226157
dew 1689162 100033738 dew 1689162 100033738
TueRens 1474943 75121774 TueRens 1648780 77891164
grandphish2 1463002 91616949 sebastronomy 1468328 60859092
JojoM 1109702 72927902 grandphish2 1466110 91776075
olafm 978631 71037944 JojoM 1130625 73666098
sebastronomy 939955 44920556 olafm 1067009 74807270
tvijlbrief 796125 51897690 tvijlbrief 796125 51897690
gvreuls 711320 49142318 oz 781847 53910686
rpngn 768460 49812975
gvreuls 751085 52177668
mibere 703840 46867607 mibere 703840 46867607
oz 646268 46293638 leszek 566598 42024615
rpngn 572571 38928563 cw 519601 34988161
leszek 531858 39316505
cw 518116 34894291
fastgm 503862 30260818 fastgm 503862 30260818
CSU_Dynasty 468784 31385034 CSU_Dynasty 468784 31385034
ctoks 434591 28520597 maximmasiutin 439192 27893522
maximmasiutin 429983 27066286 ctoks 435148 28541909
crunchy 427414 27371625 crunchy 427414 27371625
bcross 415724 29061187 bcross 415724 29061187
robal 371112 24642270
mgrabiak 367963 26464704
velislav 342588 22140902 velislav 342588 22140902
mgrabiak 338763 23999170 ncfish1 329039 20624527
Fisherman 327231 21829379 Fisherman 327231 21829379
robal 299836 20213182
Dantist 296386 18031762 Dantist 296386 18031762
ncfish1 267604 17881149 tolkki963 262050 22049676
Sylvain27 255595 8864404
nordlandia 249322 16420192 nordlandia 249322 16420192
Fifis 237657 13065577
marrco 234581 17714473 marrco 234581 17714473
tolkki963 233490 19773930 Calis007 217537 14450582
glinscott 208125 13277240 glinscott 208125 13277240
drabel 204167 13930674 drabel 204167 13930674
mhoram 202894 12601997 mhoram 202894 12601997
bking_US 198894 11876016 bking_US 198894 11876016
Calis007 188631 12795784
Thanar 179852 12365359 Thanar 179852 12365359
Fifis 176209 10638245 javran 169679 13481966
vdv 175544 9904472 armo9494 162863 10937118
spams 157128 10319326 spams 157128 10319326
DesolatedDodo 156659 10210328 DesolatedDodo 156683 10211206
armo9494 155355 10566898 Wencey 152308 8375444
sqrt2 147963 9724586 sqrt2 147963 9724586
vdbergh 140311 9225125
jcAEie 140086 10603658 jcAEie 140086 10603658
vdbergh 139746 9172061
CoffeeOne 137100 5024116 CoffeeOne 137100 5024116
malala 136182 8002293 malala 136182 8002293
xoto 133759 9159372 xoto 133759 9159372
Dubslow 129614 8519312
davar 129023 8376525 davar 129023 8376525
DMBK 122960 8980062 DMBK 122960 8980062
dsmith 122059 7570238 dsmith 122059 7570238
javran 121564 10144656 CypressChess 120784 8672620
sschnee 120526 7547722
maposora 119734 10749710
amicic 119661 7938029 amicic 119661 7938029
sschnee 118107 7389266 Wolfgang 115713 8159062
Wolfgang 114616 8070494
Data 113305 8220352 Data 113305 8220352
BrunoBanani 112960 7436849 BrunoBanani 112960 7436849
Wencey 111502 5991676 markkulix 112897 9133168
cuistot 108503 7006992 cuistot 109802 7121030
CypressChess 108331 7759788
skiminki 107583 7218170 skiminki 107583 7218170
sterni1971 104431 5938282
MaZePallas 102823 6633619 MaZePallas 102823 6633619
sterni1971 100532 5880772
sunu 100167 7040199 sunu 100167 7040199
zeryl 99331 6221261 zeryl 99331 6221261
thirdlife 99156 2245320 thirdlife 99156 2245320
ElbertoOne 99028 7023771 ElbertoOne 99028 7023771
Dubslow 98600 6903242 megaman7de 98456 6675076
markkulix 97010 7643900 Goatminola 96765 8257832
bigpen0r 94809 6529203 bigpen0r 94825 6529241
brabos 92118 6186135 brabos 92118 6186135
Maxim 90818 3283364 Maxim 90818 3283364
psk 89957 5984901 psk 89957 5984901
megaman7de 88822 6052132
racerschmacer 85805 6122790 racerschmacer 85805 6122790
maposora 85710 7778146
Vizvezdenec 83761 5344740 Vizvezdenec 83761 5344740
0x3C33 82614 5271253 0x3C33 82614 5271253
szupaw 82495 7151686
BRAVONE 81239 5054681 BRAVONE 81239 5054681
nssy 76497 5259388 nssy 76497 5259388
cody 76126 4492126
jromang 76106 5236025 jromang 76106 5236025
MarcusTullius 76103 5061991
woutboat 76072 6022922
Spprtr 75977 5252287
teddybaer 75125 5407666 teddybaer 75125 5407666
Pking_cda 73776 5293873 Pking_cda 73776 5293873
yurikvelo 73516 5036928 yurikvelo 73611 5046822
MarcusTullius 71053 4803477 Mineta 71130 4711422
Bobo1239 70579 4794999 Bobo1239 70579 4794999
solarlight 70517 5028306 solarlight 70517 5028306
dv8silencer 70287 3883992 dv8silencer 70287 3883992
Spprtr 69646 4806763
Mineta 66325 4537742
manap 66273 4121774 manap 66273 4121774
szupaw 65468 5669742
tinker 64333 4268790 tinker 64333 4268790
qurashee 61208 3429862 qurashee 61208 3429862
woutboat 59496 4906352
AGI 58195 4329580 AGI 58195 4329580
robnjr 57262 4053117 robnjr 57262 4053117
Freja 56938 3733019 Freja 56938 3733019
@ -108,39 +111,45 @@ MaxKlaxxMiner 56879 3423958
ttruscott 56010 3680085 ttruscott 56010 3680085
rkl 55132 4164467 rkl 55132 4164467
jmdana 54697 4012593 jmdana 54697 4012593
notchris 53936 4184018
renouve 53811 3501516 renouve 53811 3501516
notchris 52433 4044590
finfish 51360 3370515 finfish 51360 3370515
eva42 51272 3599691 eva42 51272 3599691
eastorwest 51117 3454811 eastorwest 51117 3454811
Goatminola 51004 4432492
rap 49985 3219146 rap 49985 3219146
pb00067 49733 3298934 pb00067 49733 3298934
GPUex 48686 3684998 GPUex 48686 3684998
OuaisBla 48626 3445134 OuaisBla 48626 3445134
ronaldjerum 47654 3240695 ronaldjerum 47654 3240695
biffhero 46564 3111352 biffhero 46564 3111352
oryx 45533 3539290 oryx 45639 3546530
VoyagerOne 45476 3452465 VoyagerOne 45476 3452465
speedycpu 43842 3003273 speedycpu 43842 3003273
jbwiebe 43305 2805433 jbwiebe 43305 2805433
Antihistamine 41788 2761312 Antihistamine 41788 2761312
mhunt 41735 2691355 mhunt 41735 2691355
jibarbosa 41640 4145702
homyur 39893 2850481 homyur 39893 2850481
gri 39871 2515779 gri 39871 2515779
DeepnessFulled 39020 3323102
Garf 37741 2999686 Garf 37741 2999686
SC 37299 2731694 SC 37299 2731694
Sylvain27 36520 1467082 Gaster319 37118 3279678
naclosagc 36562 1279618
csnodgrass 36207 2688994 csnodgrass 36207 2688994
Gaster319 35655 3149442
strelock 34716 2074055 strelock 34716 2074055
gopeto 33717 2245606
EthanOConnor 33370 2090311 EthanOConnor 33370 2090311
slakovv 32915 2021889 slakovv 32915 2021889
gopeto 31884 2076712 jojo2357 32890 2826662
shawnxu 32019 2802552
Gelma 31771 1551204 Gelma 31771 1551204
vidar808 31560 1351810
kdave 31157 2198362 kdave 31157 2198362
manapbk 30987 1810399 manapbk 30987 1810399
ZacHFX 30551 2238078 ZacHFX 30966 2272416
TataneSan 30713 1513402
votoanthuan 30691 2460856
Prcuvu 30377 2170122 Prcuvu 30377 2170122
anst 30301 2190091 anst 30301 2190091
jkiiski 30136 1904470 jkiiski 30136 1904470
@ -149,14 +158,15 @@ hyperbolic.tom 29840 2017394
chuckstablers 29659 2093438 chuckstablers 29659 2093438
Pyafue 29650 1902349 Pyafue 29650 1902349
belzedar94 28846 1811530 belzedar94 28846 1811530
votoanthuan 27978 2285818 mecevdimitar 27610 1721382
shawnxu 27438 2465810
chriswk 26902 1868317 chriswk 26902 1868317
xwziegtm 26897 2124586 xwziegtm 26897 2124586
achambord 26582 1767323 achambord 26582 1767323
somethingintheshadows 26496 2186404
Patrick_G 26276 1801617 Patrick_G 26276 1801617
yorkman 26193 1992080 yorkman 26193 1992080
Ulysses 25397 1701264 srowen 25743 1490684
Ulysses 25413 1702830
Jopo12321 25227 1652482 Jopo12321 25227 1652482
SFTUser 25182 1675689 SFTUser 25182 1675689
nabildanial 25068 1531665 nabildanial 25068 1531665
@ -164,66 +174,69 @@ Sharaf_DG 24765 1786697
rodneyc 24376 1416402 rodneyc 24376 1416402
jsys14 24297 1721230 jsys14 24297 1721230
agg177 23890 1395014 agg177 23890 1395014
srowen 23842 1342508 AndreasKrug 23754 1890115
Ente 23752 1678188 Ente 23752 1678188
jojo2357 23479 2061238
JanErik 23408 1703875 JanErik 23408 1703875
Isidor 23388 1680691 Isidor 23388 1680691
Norabor 23371 1603244 Norabor 23371 1603244
WoodMan777 23253 2023048
Nullvalue 23155 2022752
cisco2015 22920 1763301 cisco2015 22920 1763301
Zirie 22542 1472937 Zirie 22542 1472937
Nullvalue 22490 1970374
AndreasKrug 22485 1769491
team-oh 22272 1636708 team-oh 22272 1636708
Roady 22220 1465606 Roady 22220 1465606
MazeOfGalious 21978 1629593 MazeOfGalious 21978 1629593
sg4032 21947 1643353 sg4032 21950 1643373
tsim67 21747 1330880
ianh2105 21725 1632562 ianh2105 21725 1632562
Skiff84 21711 1014212
xor12 21628 1680365 xor12 21628 1680365
dex 21612 1467203 dex 21612 1467203
nesoneg 21494 1463031 nesoneg 21494 1463031
user213718 21454 1404128 user213718 21454 1404128
Serpensin 21452 1790510
sphinx 21211 1384728 sphinx 21211 1384728
qoo_charly_cai 21135 1514907 qoo_charly_cai 21136 1514927
IslandLambda 21062 1220838
jjoshua2 21001 1423089 jjoshua2 21001 1423089
Zake9298 20938 1565848 Zake9298 20938 1565848
horst.prack 20878 1465656 horst.prack 20878 1465656
fishtester 20729 1348888
0xB00B1ES 20590 1208666 0xB00B1ES 20590 1208666
Serpensin 20487 1729674 ols 20477 1195945
Dinde 20440 1292390 Dinde 20459 1292774
j3corre 20405 941444 j3corre 20405 941444
Adrian.Schmidt123 20316 1281436 Adrian.Schmidt123 20316 1281436
wei 19973 1745989 wei 19973 1745989
fishtester 19617 1257388 teenychess 19819 1762006
rstoesser 19569 1293588 rstoesser 19569 1293588
eudhan 19274 1283717 eudhan 19274 1283717
vulcan 18871 1729392 vulcan 18871 1729392
wizardassassin 18795 1376884
Karpovbot 18766 1053178 Karpovbot 18766 1053178
WoodMan777 18556 1628264
jundery 18445 1115855 jundery 18445 1115855
mkstockfishtester 18350 1690676
ville 17883 1384026 ville 17883 1384026
chris 17698 1487385 chris 17698 1487385
purplefishies 17595 1092533 purplefishies 17595 1092533
dju 17414 981289 dju 17414 981289
ols 17291 1042003
iisiraider 17275 1049015 iisiraider 17275 1049015
Skiff84 17111 950248
DragonLord 17014 1162790 DragonLord 17014 1162790
Karby 17008 1013160
pirt 16965 1271519
redstone59 16842 1461780 redstone59 16842 1461780
Karby 16839 1010124
Alb11747 16787 1213990 Alb11747 16787 1213990
pirt 16493 1237199
Naven94 16414 951718 Naven94 16414 951718
wizardassassin 16392 1148672 scuzzi 16115 994341
IgorLeMasson 16064 1147232 IgorLeMasson 16064 1147232
scuzzi 15757 968735
ako027ako 15671 1173203 ako027ako 15671 1173203
infinigon 15285 965966
Nikolay.IT 15154 1068349 Nikolay.IT 15154 1068349
Andrew Grant 15114 895539 Andrew Grant 15114 895539
OssumOpossum 14857 1007129 OssumOpossum 14857 1007129
LunaticBFF57 14525 1190310 LunaticBFF57 14525 1190310
enedene 14476 905279 enedene 14476 905279
IslandLambda 14393 958196 Hjax 14394 1005013
bpfliegel 14233 882523 bpfliegel 14233 882523
YELNAMRON 14230 1128094 YELNAMRON 14230 1128094
mpx86 14019 759568 mpx86 14019 759568
@ -233,54 +246,56 @@ Nesa92 13806 1116101
crocogoat 13803 1117422 crocogoat 13803 1117422
joster 13710 946160 joster 13710 946160
mbeier 13650 1044928 mbeier 13650 1044928
Hjax 13535 915487 Pablohn26 13552 1088532
wxt9861 13550 1312306
Dark_wizzie 13422 1007152 Dark_wizzie 13422 1007152
Rudolphous 13244 883140 Rudolphous 13244 883140
Machariel 13010 863104 Machariel 13010 863104
infinigon 12991 943216 nalanzeyu 12996 232590
mabichito 12903 749391 mabichito 12903 749391
Jackfish 12895 868928
thijsk 12886 722107 thijsk 12886 722107
AdrianSA 12860 804972 AdrianSA 12860 804972
Flopzee 12698 894821 Flopzee 12698 894821
whelanh 12682 266404
mschmidt 12644 863193 mschmidt 12644 863193
korposzczur 12606 838168 korposzczur 12606 838168
tsim67 12570 890180
Jackfish 12553 836958
fatmurphy 12547 853210 fatmurphy 12547 853210
Oakwen 12503 853105 Oakwen 12532 855759
icewulf 12447 854878
SapphireBrand 12416 969604 SapphireBrand 12416 969604
deflectooor 12386 579392 deflectooor 12386 579392
modolief 12386 896470 modolief 12386 896470
TataneSan 12358 609332
Farseer 12249 694108 Farseer 12249 694108
Hongildong 12201 648712
pgontarz 12151 848794 pgontarz 12151 848794
dbernier 12103 860824 dbernier 12103 860824
FormazChar 11989 907809 szczur90 12035 942376
FormazChar 12019 910409
rensonthemove 11999 971993
stocky 11954 699440 stocky 11954 699440
somethingintheshadows 11940 989472 MooTheCow 11923 779432
MooTheCow 11892 776126
3cho 11842 1036786 3cho 11842 1036786
whelanh 11557 245188 ckaz 11792 732276
infinity 11470 727027 infinity 11470 727027
aga 11412 695127 aga 11412 695127
torbjo 11395 729145 torbjo 11395 729145
Thomas A. Anderson 11372 732094 Thomas A. Anderson 11372 732094
savage84 11358 670860 savage84 11358 670860
Def9Infinity 11345 696552
d64 11263 789184 d64 11263 789184
ali-al-zhrani 11245 779246 ali-al-zhrani 11245 779246
ckaz 11170 680866 ImperiumAeternum 11155 952000
snicolet 11106 869170 snicolet 11106 869170
dapper 11032 771402 dapper 11032 771402
Ethnikoi 10993 945906 Ethnikoi 10993 945906
Snuuka 10938 435504 Snuuka 10938 435504
Karmatron 10859 678058 Karmatron 10871 678306
basepi 10637 744851 basepi 10637 744851
jibarbosa 10628 857100
Cubox 10621 826448 Cubox 10621 826448
mecevdimitar 10609 787318 gerbil 10519 971688
michaelrpg 10509 739239 michaelrpg 10509 739239
Def9Infinity 10427 686978
OIVAS7572 10420 995586 OIVAS7572 10420 995586
wxt9861 10412 1013864
Garruk 10365 706465 Garruk 10365 706465
dzjp 10343 732529 dzjp 10343 732529
RickGroszkiewicz 10263 990798

View file

@ -26,6 +26,17 @@ check_znver_1_2() {
[ "$vendor_id" = "AuthenticAMD" ] && [ "$cpu_family" = "23" ] && znver_1_2=true [ "$vendor_id" = "AuthenticAMD" ] && [ "$cpu_family" = "23" ] && znver_1_2=true
} }
# Set the file CPU loongarch64 architecture
set_arch_loongarch64() {
if check_flags 'lasx'; then
true_arch='loongarch64-lasx'
elif check_flags 'lsx'; then
true_arch='lonngarch64-lsx'
else
true_arch='loongarch64'
fi
}
# Set the file CPU x86_64 architecture # Set the file CPU x86_64 architecture
set_arch_x86_64() { set_arch_x86_64() {
if check_flags 'avx512vnni' 'avx512dq' 'avx512f' 'avx512bw' 'avx512vl'; then if check_flags 'avx512vnni' 'avx512dq' 'avx512f' 'avx512bw' 'avx512vl'; then
@ -43,6 +54,20 @@ set_arch_x86_64() {
fi fi
} }
set_arch_ppc_64() {
if $(grep -q -w "altivec" /proc/cpuinfo); then
power=$(grep -oP -m 1 'cpu\t+: POWER\K\d+' /proc/cpuinfo)
if [ "0$power" -gt 7 ]; then
# VSX started with POWER8
true_arch='ppc-64-vsx'
else
true_arch='ppc-64-altivec'
fi
else
true_arch='ppc-64'
fi
}
# Check the system type # Check the system type
uname_s=$(uname -s) uname_s=$(uname -s)
uname_m=$(uname -m) uname_m=$(uname -m)
@ -76,6 +101,10 @@ case $uname_s in
file_os='ubuntu' file_os='ubuntu'
true_arch='x86-32' true_arch='x86-32'
;; ;;
'ppc64'*)
file_os='ubuntu'
set_arch_ppc_64
;;
'aarch64') 'aarch64')
file_os='android' file_os='android'
true_arch='armv8' true_arch='armv8'
@ -90,6 +119,10 @@ case $uname_s in
true_arch="$true_arch-neon" true_arch="$true_arch-neon"
fi fi
;; ;;
'loongarch64'*)
file_os='linux'
set_arch_loongarch64
;;
*) # Unsupported machine type, exit with error *) # Unsupported machine type, exit with error
printf 'Unsupported machine type: %s\n' "$uname_m" printf 'Unsupported machine type: %s\n' "$uname_m"
exit 1 exit 1

75
scripts/net.sh Executable file
View file

@ -0,0 +1,75 @@
#!/bin/sh
wget_or_curl=$( (command -v wget > /dev/null 2>&1 && echo "wget -qO-") || \
(command -v curl > /dev/null 2>&1 && echo "curl -skL"))
if [ -z "$wget_or_curl" ]; then
>&2 printf "%s\n" "Neither wget or curl is installed." \
"Install one of these tools to download NNUE files automatically."
exit 1
fi
sha256sum=$( (command -v shasum > /dev/null 2>&1 && echo "shasum -a 256") || \
(command -v sha256sum > /dev/null 2>&1 && echo "sha256sum"))
if [ -z "$sha256sum" ]; then
>&2 echo "sha256sum not found, NNUE files will be assumed valid."
fi
get_nnue_filename() {
grep "$1" evaluate.h | grep "#define" | sed "s/.*\(nn-[a-z0-9]\{12\}.nnue\).*/\1/"
}
validate_network() {
# If no sha256sum command is available, assume the file is always valid.
if [ -n "$sha256sum" ] && [ -f "$1" ]; then
if [ "$1" != "nn-$($sha256sum "$1" | cut -c 1-12).nnue" ]; then
rm -f "$1"
return 1
fi
fi
}
fetch_network() {
_filename="$(get_nnue_filename "$1")"
if [ -z "$_filename" ]; then
>&2 echo "NNUE file name not found for: $1"
return 1
fi
if [ -f "$_filename" ]; then
if validate_network "$_filename"; then
echo "Existing $_filename validated, skipping download"
return
else
echo "Removing invalid NNUE file: $_filename"
fi
fi
for url in \
"https://tests.stockfishchess.org/api/nn/$_filename" \
"https://github.com/official-stockfish/networks/raw/master/$_filename"; do
echo "Downloading from $url ..."
if $wget_or_curl "$url" > "$_filename"; then
if validate_network "$_filename"; then
echo "Successfully validated $_filename"
else
echo "Downloaded $_filename is invalid"
continue
fi
else
echo "Failed to download from $url"
fi
if [ -f "$_filename" ]; then
return
fi
done
# Download was not successful in the loop, return false.
>&2 echo "Failed to download $_filename"
return 1
}
fetch_network EvalFileDefaultNameBig && \
fetch_network EvalFileDefaultNameSmall

View file

@ -55,15 +55,15 @@ PGOBENCH = $(WINE_PATH) ./$(EXE) bench
SRCS = benchmark.cpp bitboard.cpp evaluate.cpp main.cpp \ SRCS = benchmark.cpp bitboard.cpp evaluate.cpp main.cpp \
misc.cpp movegen.cpp movepick.cpp position.cpp \ misc.cpp movegen.cpp movepick.cpp position.cpp \
search.cpp thread.cpp timeman.cpp tt.cpp uci.cpp ucioption.cpp tune.cpp syzygy/tbprobe.cpp \ search.cpp thread.cpp timeman.cpp tt.cpp uci.cpp ucioption.cpp tune.cpp syzygy/tbprobe.cpp \
nnue/nnue_misc.cpp nnue/features/half_ka_v2_hm.cpp nnue/network.cpp engine.cpp score.cpp nnue/nnue_misc.cpp nnue/features/half_ka_v2_hm.cpp nnue/network.cpp engine.cpp score.cpp memory.cpp
HEADERS = benchmark.h bitboard.h evaluate.h misc.h movegen.h movepick.h \ HEADERS = benchmark.h bitboard.h evaluate.h misc.h movegen.h movepick.h history.h \
nnue/nnue_misc.h nnue/features/half_ka_v2_hm.h nnue/layers/affine_transform.h \ nnue/nnue_misc.h nnue/features/half_ka_v2_hm.h nnue/layers/affine_transform.h \
nnue/layers/affine_transform_sparse_input.h nnue/layers/clipped_relu.h nnue/layers/simd.h \ nnue/layers/affine_transform_sparse_input.h nnue/layers/clipped_relu.h nnue/layers/simd.h \
nnue/layers/sqr_clipped_relu.h nnue/nnue_accumulator.h nnue/nnue_architecture.h \ nnue/layers/sqr_clipped_relu.h nnue/nnue_accumulator.h nnue/nnue_architecture.h \
nnue/nnue_common.h nnue/nnue_feature_transformer.h position.h \ nnue/nnue_common.h nnue/nnue_feature_transformer.h position.h \
search.h syzygy/tbprobe.h thread.h thread_win32_osx.h timeman.h \ search.h syzygy/tbprobe.h thread.h thread_win32_osx.h timeman.h \
tt.h tune.h types.h uci.h ucioption.h perft.h nnue/network.h engine.h score.h tt.h tune.h types.h uci.h ucioption.h perft.h nnue/network.h engine.h score.h numa.h memory.h
OBJS = $(notdir $(SRCS:.cpp=.o)) OBJS = $(notdir $(SRCS:.cpp=.o))
@ -98,8 +98,12 @@ VPATH = syzygy:nnue:nnue/features
# avx512 = yes/no --- -mavx512bw --- Use Intel Advanced Vector Extensions 512 # avx512 = yes/no --- -mavx512bw --- Use Intel Advanced Vector Extensions 512
# vnni256 = yes/no --- -mavx256vnni --- Use Intel Vector Neural Network Instructions 512 with 256bit operands # vnni256 = yes/no --- -mavx256vnni --- Use Intel Vector Neural Network Instructions 512 with 256bit operands
# vnni512 = yes/no --- -mavx512vnni --- Use Intel Vector Neural Network Instructions 512 # vnni512 = yes/no --- -mavx512vnni --- Use Intel Vector Neural Network Instructions 512
# altivec = yes/no --- -maltivec --- Use PowerPC Altivec SIMD extension
# vsx = yes/no --- -mvsx --- Use POWER VSX SIMD extension
# neon = yes/no --- -DUSE_NEON --- Use ARM SIMD architecture # neon = yes/no --- -DUSE_NEON --- Use ARM SIMD architecture
# dotprod = yes/no --- -DUSE_NEON_DOTPROD --- Use ARM advanced SIMD Int8 dot product instructions # dotprod = yes/no --- -DUSE_NEON_DOTPROD --- Use ARM advanced SIMD Int8 dot product instructions
# lsx = yes/no --- -mlsx --- Use Loongson SIMD eXtension
# lasx = yes/no --- -mlasx --- use Loongson Advanced SIMD eXtension
# #
# Note that Makefile is space sensitive, so when adding new architectures # Note that Makefile is space sensitive, so when adding new architectures
# or modifying existing flags, you have to make sure there are no extra spaces # or modifying existing flags, you have to make sure there are no extra spaces
@ -124,8 +128,9 @@ endif
ifeq ($(ARCH), $(filter $(ARCH), \ ifeq ($(ARCH), $(filter $(ARCH), \
x86-64-vnni512 x86-64-vnni256 x86-64-avx512 x86-64-avxvnni x86-64-bmi2 \ x86-64-vnni512 x86-64-vnni256 x86-64-avx512 x86-64-avxvnni x86-64-bmi2 \
x86-64-avx2 x86-64-sse41-popcnt x86-64-modern x86-64-ssse3 x86-64-sse3-popcnt \ x86-64-avx2 x86-64-sse41-popcnt x86-64-modern x86-64-ssse3 x86-64-sse3-popcnt \
x86-64 x86-32-sse41-popcnt x86-32-sse2 x86-32 ppc-64 ppc-32 e2k \ x86-64 x86-32-sse41-popcnt x86-32-sse2 x86-32 ppc-64 ppc-64-altivec ppc-64-vsx ppc-32 e2k \
armv7 armv7-neon armv8 armv8-dotprod apple-silicon general-64 general-32 riscv64 loongarch64)) armv7 armv7-neon armv8 armv8-dotprod apple-silicon general-64 general-32 riscv64 \
loongarch64 loongarch64-lsx loongarch64-lasx))
SUPPORTED_ARCH=true SUPPORTED_ARCH=true
else else
SUPPORTED_ARCH=false SUPPORTED_ARCH=false
@ -148,13 +153,17 @@ avxvnni = no
avx512 = no avx512 = no
vnni256 = no vnni256 = no
vnni512 = no vnni512 = no
altivec = no
vsx = no
neon = no neon = no
dotprod = no dotprod = no
arm_version = 0 arm_version = 0
lsx = no
lasx = no
STRIP = strip STRIP = strip
ifneq ($(shell which clang-format-17 2> /dev/null),) ifneq ($(shell which clang-format-18 2> /dev/null),)
CLANG-FORMAT = clang-format-17 CLANG-FORMAT = clang-format-18
else else
CLANG-FORMAT = clang-format CLANG-FORMAT = clang-format
endif endif
@ -355,6 +364,20 @@ ifeq ($(ARCH),ppc-64)
prefetch = yes prefetch = yes
endif endif
ifeq ($(ARCH),ppc-64-altivec)
arch = ppc64
popcnt = yes
prefetch = yes
altivec = yes
endif
ifeq ($(ARCH),ppc-64-vsx)
arch = ppc64
popcnt = yes
prefetch = yes
vsx = yes
endif
ifeq ($(findstring e2k,$(ARCH)),e2k) ifeq ($(findstring e2k,$(ARCH)),e2k)
arch = e2k arch = e2k
mmx = yes mmx = yes
@ -370,8 +393,19 @@ ifeq ($(ARCH),riscv64)
arch = riscv64 arch = riscv64
endif endif
ifeq ($(ARCH),loongarch64) ifeq ($(findstring loongarch64,$(ARCH)),loongarch64)
arch = loongarch64 arch = loongarch64
prefetch = yes
ifeq ($(findstring -lasx,$(ARCH)),-lasx)
lsx = yes
lasx = yes
endif
ifeq ($(findstring -lsx,$(ARCH)),-lsx)
lsx = yes
endif
endif endif
endif endif
@ -408,7 +442,7 @@ ifeq ($(COMP),gcc)
ifeq ($(ARCH),riscv64) ifeq ($(ARCH),riscv64)
CXXFLAGS += -latomic CXXFLAGS += -latomic
endif endif
else ifeq ($(ARCH),loongarch64) else ifeq ($(arch),loongarch64)
CXXFLAGS += -latomic CXXFLAGS += -latomic
else else
CXXFLAGS += -m$(bits) CXXFLAGS += -m$(bits)
@ -480,7 +514,7 @@ ifeq ($(COMP),clang)
ifeq ($(ARCH),riscv64) ifeq ($(ARCH),riscv64)
CXXFLAGS += -latomic CXXFLAGS += -latomic
endif endif
else ifeq ($(ARCH),loongarch64) else ifeq ($(arch),loongarch64)
CXXFLAGS += -latomic CXXFLAGS += -latomic
else else
CXXFLAGS += -m$(bits) CXXFLAGS += -m$(bits)
@ -489,8 +523,8 @@ ifeq ($(COMP),clang)
endif endif
ifeq ($(KERNEL),Darwin) ifeq ($(KERNEL),Darwin)
CXXFLAGS += -mmacosx-version-min=10.14 CXXFLAGS += -mmacosx-version-min=10.15
LDFLAGS += -mmacosx-version-min=10.14 LDFLAGS += -mmacosx-version-min=10.15
ifneq ($(arch),any) ifneq ($(arch),any)
CXXFLAGS += -arch $(arch) CXXFLAGS += -arch $(arch)
LDFLAGS += -arch $(arch) LDFLAGS += -arch $(arch)
@ -634,7 +668,7 @@ else
endif endif
ifeq ($(popcnt),yes) ifeq ($(popcnt),yes)
ifeq ($(arch),$(filter $(arch),ppc64 armv7 armv8 arm64)) ifeq ($(arch),$(filter $(arch),ppc64 ppc64-altivec ppc64-vsx armv7 armv8 arm64))
CXXFLAGS += -DUSE_POPCNT CXXFLAGS += -DUSE_POPCNT
else else
CXXFLAGS += -msse3 -mpopcnt -DUSE_POPCNT CXXFLAGS += -msse3 -mpopcnt -DUSE_POPCNT
@ -704,6 +738,20 @@ ifeq ($(mmx),yes)
endif endif
endif endif
ifeq ($(altivec),yes)
CXXFLAGS += -maltivec
ifeq ($(COMP),gcc)
CXXFLAGS += -mabi=altivec
endif
endif
ifeq ($(vsx),yes)
CXXFLAGS += -mvsx
ifeq ($(COMP),gcc)
CXXFLAGS += -DNO_WARN_X86_INTRINSICS -DUSE_SSE2
endif
endif
ifeq ($(neon),yes) ifeq ($(neon),yes)
CXXFLAGS += -DUSE_NEON=$(arm_version) CXXFLAGS += -DUSE_NEON=$(arm_version)
ifeq ($(KERNEL),Linux) ifeq ($(KERNEL),Linux)
@ -719,6 +767,18 @@ ifeq ($(dotprod),yes)
CXXFLAGS += -march=armv8.2-a+dotprod -DUSE_NEON_DOTPROD CXXFLAGS += -march=armv8.2-a+dotprod -DUSE_NEON_DOTPROD
endif endif
ifeq ($(lasx),yes)
ifeq ($(comp),$(filter $(comp),gcc clang mingw icx))
CXXFLAGS += -mlasx
endif
endif
ifeq ($(lsx),yes)
ifeq ($(comp),$(filter $(comp),gcc clang mingw icx))
CXXFLAGS += -mlsx
endif
endif
### 3.7 pext ### 3.7 pext
ifeq ($(pext),yes) ifeq ($(pext),yes)
CXXFLAGS += -DUSE_PEXT CXXFLAGS += -DUSE_PEXT
@ -791,71 +851,75 @@ endif
### ========================================================================== ### ==========================================================================
help: help:
@echo "" @echo "" && \
@echo "To compile stockfish, type: " echo "To compile stockfish, type: " && \
@echo "" echo "" && \
@echo "make -j target [ARCH=arch] [COMP=compiler] [COMPCXX=cxx]" echo "make -j target [ARCH=arch] [COMP=compiler] [COMPCXX=cxx]" && \
@echo "" echo "" && \
@echo "Supported targets:" echo "Supported targets:" && \
@echo "" echo "" && \
@echo "help > Display architecture details" echo "help > Display architecture details" && \
@echo "profile-build > standard build with profile-guided optimization" echo "profile-build > standard build with profile-guided optimization" && \
@echo "build > skip profile-guided optimization" echo "build > skip profile-guided optimization" && \
@echo "net > Download the default nnue nets" echo "net > Download the default nnue nets" && \
@echo "strip > Strip executable" echo "strip > Strip executable" && \
@echo "install > Install executable" echo "install > Install executable" && \
@echo "clean > Clean up" echo "clean > Clean up" && \
@echo "" echo "" && \
@echo "Supported archs:" echo "Supported archs:" && \
@echo "" echo "" && \
@echo "native > select the best architecture for the host processor (default)" echo "native > select the best architecture for the host processor (default)" && \
@echo "x86-64-vnni512 > x86 64-bit with vnni 512bit support" echo "x86-64-vnni512 > x86 64-bit with vnni 512bit support" && \
@echo "x86-64-vnni256 > x86 64-bit with vnni 512bit support, limit operands to 256bit wide" echo "x86-64-vnni256 > x86 64-bit with vnni 512bit support, limit operands to 256bit wide" && \
@echo "x86-64-avx512 > x86 64-bit with avx512 support" echo "x86-64-avx512 > x86 64-bit with avx512 support" && \
@echo "x86-64-avxvnni > x86 64-bit with vnni 256bit support" echo "x86-64-avxvnni > x86 64-bit with vnni 256bit support" && \
@echo "x86-64-bmi2 > x86 64-bit with bmi2 support" echo "x86-64-bmi2 > x86 64-bit with bmi2 support" && \
@echo "x86-64-avx2 > x86 64-bit with avx2 support" echo "x86-64-avx2 > x86 64-bit with avx2 support" && \
@echo "x86-64-sse41-popcnt > x86 64-bit with sse41 and popcnt support" echo "x86-64-sse41-popcnt > x86 64-bit with sse41 and popcnt support" && \
@echo "x86-64-modern > deprecated, currently x86-64-sse41-popcnt" echo "x86-64-modern > deprecated, currently x86-64-sse41-popcnt" && \
@echo "x86-64-ssse3 > x86 64-bit with ssse3 support" echo "x86-64-ssse3 > x86 64-bit with ssse3 support" && \
@echo "x86-64-sse3-popcnt > x86 64-bit with sse3 compile and popcnt support" echo "x86-64-sse3-popcnt > x86 64-bit with sse3 compile and popcnt support" && \
@echo "x86-64 > x86 64-bit generic (with sse2 support)" echo "x86-64 > x86 64-bit generic (with sse2 support)" && \
@echo "x86-32-sse41-popcnt > x86 32-bit with sse41 and popcnt support" echo "x86-32-sse41-popcnt > x86 32-bit with sse41 and popcnt support" && \
@echo "x86-32-sse2 > x86 32-bit with sse2 support" echo "x86-32-sse2 > x86 32-bit with sse2 support" && \
@echo "x86-32 > x86 32-bit generic (with mmx compile support)" echo "x86-32 > x86 32-bit generic (with mmx compile support)" && \
@echo "ppc-64 > PPC 64-bit" echo "ppc-64 > PPC 64-bit" && \
@echo "ppc-32 > PPC 32-bit" echo "ppc-64-altivec > PPC 64-bit with altivec support" && \
@echo "armv7 > ARMv7 32-bit" echo "ppc-64-vsx > PPC 64-bit with vsx support" && \
@echo "armv7-neon > ARMv7 32-bit with popcnt and neon" echo "ppc-32 > PPC 32-bit" && \
@echo "armv8 > ARMv8 64-bit with popcnt and neon" echo "armv7 > ARMv7 32-bit" && \
@echo "armv8-dotprod > ARMv8 64-bit with popcnt, neon and dot product support" echo "armv7-neon > ARMv7 32-bit with popcnt and neon" && \
@echo "e2k > Elbrus 2000" echo "armv8 > ARMv8 64-bit with popcnt and neon" && \
@echo "apple-silicon > Apple silicon ARM64" echo "armv8-dotprod > ARMv8 64-bit with popcnt, neon and dot product support" && \
@echo "general-64 > unspecified 64-bit" echo "e2k > Elbrus 2000" && \
@echo "general-32 > unspecified 32-bit" echo "apple-silicon > Apple silicon ARM64" && \
@echo "riscv64 > RISC-V 64-bit" echo "general-64 > unspecified 64-bit" && \
@echo "loongarch64 > LoongArch 64-bit" echo "general-32 > unspecified 32-bit" && \
@echo "" echo "riscv64 > RISC-V 64-bit" && \
@echo "Supported compilers:" echo "loongarch64 > LoongArch 64-bit" && \
@echo "" echo "loongarch64-lsx > LoongArch 64-bit with SIMD eXtension" && \
@echo "gcc > GNU compiler (default)" echo "loongarch64-lasx > LoongArch 64-bit with Advanced SIMD eXtension" && \
@echo "mingw > GNU compiler with MinGW under Windows" echo "" && \
@echo "clang > LLVM Clang compiler" echo "Supported compilers:" && \
@echo "icx > Intel oneAPI DPC++/C++ Compiler" echo "" && \
@echo "ndk > Google NDK to cross-compile for Android" echo "gcc > GNU compiler (default)" && \
@echo "" echo "mingw > GNU compiler with MinGW under Windows" && \
@echo "Simple examples. If you don't know what to do, you likely want to run one of: " echo "clang > LLVM Clang compiler" && \
@echo "" echo "icx > Intel oneAPI DPC++/C++ Compiler" && \
@echo "make -j profile-build ARCH=x86-64-avx2 # typically a fast compile for common systems " echo "ndk > Google NDK to cross-compile for Android" && \
@echo "make -j profile-build ARCH=x86-64-sse41-popcnt # A more portable compile for 64-bit systems " echo "" && \
@echo "make -j profile-build ARCH=x86-64 # A portable compile for 64-bit systems " echo "Simple examples. If you don't know what to do, you likely want to run one of: " && \
@echo "" echo "" && \
@echo "Advanced examples, for experienced users: " echo "make -j profile-build ARCH=x86-64-avx2 # typically a fast compile for common systems " && \
@echo "" echo "make -j profile-build ARCH=x86-64-sse41-popcnt # A more portable compile for 64-bit systems " && \
@echo "make -j profile-build ARCH=x86-64-avxvnni" echo "make -j profile-build ARCH=x86-64 # A portable compile for 64-bit systems " && \
@echo "make -j profile-build ARCH=x86-64-avxvnni COMP=gcc COMPCXX=g++-12.0" echo "" && \
@echo "make -j build ARCH=x86-64-ssse3 COMP=clang" echo "Advanced examples, for experienced users: " && \
@echo "" echo "" && \
echo "make -j profile-build ARCH=x86-64-avxvnni" && \
echo "make -j profile-build ARCH=x86-64-avxvnni COMP=gcc COMPCXX=g++-12.0" && \
echo "make -j build ARCH=x86-64-ssse3 COMP=clang" && \
echo ""
ifneq ($(SUPPORTED_ARCH), true) ifneq ($(SUPPORTED_ARCH), true)
@echo "Specify a supported architecture with the ARCH option for more details" @echo "Specify a supported architecture with the ARCH option for more details"
@echo "" @echo ""
@ -917,59 +981,9 @@ profileclean:
@rm -f stockfish.res @rm -f stockfish.res
@rm -f ./-lstdc++.res @rm -f ./-lstdc++.res
define fetch_network
@echo "Default net: $(nnuenet)"
@if [ "x$(curl_or_wget)" = "x" ]; then \
echo "Neither curl nor wget is installed. Install one of these tools unless the net has been downloaded manually"; \
fi
@if [ "x$(shasum_command)" = "x" ]; then \
echo "shasum / sha256sum not found, skipping net validation"; \
elif test -f "$(nnuenet)"; then \
if [ "$(nnuenet)" != "nn-"`$(shasum_command) $(nnuenet) | cut -c1-12`".nnue" ]; then \
echo "Removing invalid network"; rm -f $(nnuenet); \
fi; \
fi;
@for nnuedownloadurl in "$(nnuedownloadurl1)" "$(nnuedownloadurl2)"; do \
if test -f "$(nnuenet)"; then \
echo "$(nnuenet) available : OK"; break; \
else \
if [ "x$(curl_or_wget)" != "x" ]; then \
echo "Downloading $${nnuedownloadurl}"; $(curl_or_wget) $${nnuedownloadurl} > $(nnuenet);\
else \
echo "No net found and download not possible"; exit 1;\
fi; \
fi; \
if [ "x$(shasum_command)" != "x" ]; then \
if [ "$(nnuenet)" != "nn-"`$(shasum_command) $(nnuenet) | cut -c1-12`".nnue" ]; then \
echo "Removing failed download"; rm -f $(nnuenet); \
fi; \
fi; \
done
@if ! test -f "$(nnuenet)"; then \
echo "Failed to download $(nnuenet)."; \
fi;
@if [ "x$(shasum_command)" != "x" ]; then \
if [ "$(nnuenet)" = "nn-"`$(shasum_command) $(nnuenet) | cut -c1-12`".nnue" ]; then \
echo "Network validated"; break; \
fi; \
fi;
endef
# set up shell variables for the net stuff
define netvariables
$(eval nnuenet := $(shell grep $(1) evaluate.h | grep define | sed 's/.*\(nn-[a-z0-9]\{12\}.nnue\).*/\1/'))
$(eval nnuedownloadurl1 := https://tests.stockfishchess.org/api/nn/$(nnuenet))
$(eval nnuedownloadurl2 := https://github.com/official-stockfish/networks/raw/master/$(nnuenet))
$(eval curl_or_wget := $(shell if hash curl 2>/dev/null; then echo "curl -skL"; elif hash wget 2>/dev/null; then echo "wget -qO-"; fi))
$(eval shasum_command := $(shell if hash shasum 2>/dev/null; then echo "shasum -a 256 "; elif hash sha256sum 2>/dev/null; then echo "sha256sum "; fi))
endef
# evaluation network (nnue) # evaluation network (nnue)
net: net:
$(call netvariables, EvalFileDefaultNameBig) @$(SHELL) ../scripts/net.sh
$(call fetch_network)
$(call netvariables, EvalFileDefaultNameSmall)
$(call fetch_network)
format: format:
$(CLANG-FORMAT) -i $(SRCS) $(HEADERS) -style=file $(CLANG-FORMAT) -i $(SRCS) $(HEADERS) -style=file
@ -986,61 +1000,71 @@ all: $(EXE) .depend
config-sanity: net config-sanity: net
@echo "" @echo ""
@echo "Config:" @echo "Config:" && \
@echo "debug: '$(debug)'" echo "debug: '$(debug)'" && \
@echo "sanitize: '$(sanitize)'" echo "sanitize: '$(sanitize)'" && \
@echo "optimize: '$(optimize)'" echo "optimize: '$(optimize)'" && \
@echo "arch: '$(arch)'" echo "arch: '$(arch)'" && \
@echo "bits: '$(bits)'" echo "bits: '$(bits)'" && \
@echo "kernel: '$(KERNEL)'" echo "kernel: '$(KERNEL)'" && \
@echo "os: '$(OS)'" echo "os: '$(OS)'" && \
@echo "prefetch: '$(prefetch)'" echo "prefetch: '$(prefetch)'" && \
@echo "popcnt: '$(popcnt)'" echo "popcnt: '$(popcnt)'" && \
@echo "pext: '$(pext)'" echo "pext: '$(pext)'" && \
@echo "sse: '$(sse)'" echo "sse: '$(sse)'" && \
@echo "mmx: '$(mmx)'" echo "mmx: '$(mmx)'" && \
@echo "sse2: '$(sse2)'" echo "sse2: '$(sse2)'" && \
@echo "ssse3: '$(ssse3)'" echo "ssse3: '$(ssse3)'" && \
@echo "sse41: '$(sse41)'" echo "sse41: '$(sse41)'" && \
@echo "avx2: '$(avx2)'" echo "avx2: '$(avx2)'" && \
@echo "avxvnni: '$(avxvnni)'" echo "avxvnni: '$(avxvnni)'" && \
@echo "avx512: '$(avx512)'" echo "avx512: '$(avx512)'" && \
@echo "vnni256: '$(vnni256)'" echo "vnni256: '$(vnni256)'" && \
@echo "vnni512: '$(vnni512)'" echo "vnni512: '$(vnni512)'" && \
@echo "neon: '$(neon)'" echo "altivec: '$(altivec)'" && \
@echo "dotprod: '$(dotprod)'" echo "vsx: '$(vsx)'" && \
@echo "arm_version: '$(arm_version)'" echo "neon: '$(neon)'" && \
@echo "target_windows: '$(target_windows)'" echo "dotprod: '$(dotprod)'" && \
@echo "" echo "arm_version: '$(arm_version)'" && \
@echo "Flags:" echo "lsx: '$(lsx)'" && \
@echo "CXX: $(CXX)" echo "lasx: '$(lasx)'" && \
@echo "CXXFLAGS: $(CXXFLAGS)" echo "target_windows: '$(target_windows)'" && \
@echo "LDFLAGS: $(LDFLAGS)" echo "" && \
@echo "" echo "Flags:" && \
@echo "Testing config sanity. If this fails, try 'make help' ..." echo "CXX: $(CXX)" && \
@echo "" echo "CXXFLAGS: $(CXXFLAGS)" && \
@test "$(debug)" = "yes" || test "$(debug)" = "no" echo "LDFLAGS: $(LDFLAGS)" && \
@test "$(optimize)" = "yes" || test "$(optimize)" = "no" echo "" && \
@test "$(SUPPORTED_ARCH)" = "true" echo "Testing config sanity. If this fails, try 'make help' ..." && \
@test "$(arch)" = "any" || test "$(arch)" = "x86_64" || test "$(arch)" = "i386" || \ echo "" && \
(test "$(debug)" = "yes" || test "$(debug)" = "no") && \
(test "$(optimize)" = "yes" || test "$(optimize)" = "no") && \
(test "$(SUPPORTED_ARCH)" = "true") && \
(test "$(arch)" = "any" || test "$(arch)" = "x86_64" || test "$(arch)" = "i386" || \
test "$(arch)" = "ppc64" || test "$(arch)" = "ppc" || test "$(arch)" = "e2k" || \ test "$(arch)" = "ppc64" || test "$(arch)" = "ppc" || test "$(arch)" = "e2k" || \
test "$(arch)" = "armv7" || test "$(arch)" = "armv8" || test "$(arch)" = "arm64" || test "$(arch)" = "riscv64" || test "$(arch)" = "loongarch64" test "$(arch)" = "armv7" || test "$(arch)" = "armv8" || test "$(arch)" = "arm64" || \
@test "$(bits)" = "32" || test "$(bits)" = "64" test "$(arch)" = "riscv64" || test "$(arch)" = "loongarch64") && \
@test "$(prefetch)" = "yes" || test "$(prefetch)" = "no" (test "$(bits)" = "32" || test "$(bits)" = "64") && \
@test "$(popcnt)" = "yes" || test "$(popcnt)" = "no" (test "$(prefetch)" = "yes" || test "$(prefetch)" = "no") && \
@test "$(pext)" = "yes" || test "$(pext)" = "no" (test "$(popcnt)" = "yes" || test "$(popcnt)" = "no") && \
@test "$(sse)" = "yes" || test "$(sse)" = "no" (test "$(pext)" = "yes" || test "$(pext)" = "no") && \
@test "$(mmx)" = "yes" || test "$(mmx)" = "no" (test "$(sse)" = "yes" || test "$(sse)" = "no") && \
@test "$(sse2)" = "yes" || test "$(sse2)" = "no" (test "$(mmx)" = "yes" || test "$(mmx)" = "no") && \
@test "$(ssse3)" = "yes" || test "$(ssse3)" = "no" (test "$(sse2)" = "yes" || test "$(sse2)" = "no") && \
@test "$(sse41)" = "yes" || test "$(sse41)" = "no" (test "$(ssse3)" = "yes" || test "$(ssse3)" = "no") && \
@test "$(avx2)" = "yes" || test "$(avx2)" = "no" (test "$(sse41)" = "yes" || test "$(sse41)" = "no") && \
@test "$(avx512)" = "yes" || test "$(avx512)" = "no" (test "$(avx2)" = "yes" || test "$(avx2)" = "no") && \
@test "$(vnni256)" = "yes" || test "$(vnni256)" = "no" (test "$(avx512)" = "yes" || test "$(avx512)" = "no") && \
@test "$(vnni512)" = "yes" || test "$(vnni512)" = "no" (test "$(vnni256)" = "yes" || test "$(vnni256)" = "no") && \
@test "$(neon)" = "yes" || test "$(neon)" = "no" (test "$(vnni512)" = "yes" || test "$(vnni512)" = "no") && \
@test "$(comp)" = "gcc" || test "$(comp)" = "icx" || test "$(comp)" = "mingw" || test "$(comp)" = "clang" \ (test "$(altivec)" = "yes" || test "$(altivec)" = "no") && \
|| test "$(comp)" = "armv7a-linux-androideabi16-clang" || test "$(comp)" = "aarch64-linux-android21-clang" (test "$(vsx)" = "yes" || test "$(vsx)" = "no") && \
(test "$(neon)" = "yes" || test "$(neon)" = "no") && \
(test "$(lsx)" = "yes" || test "$(lsx)" = "no") && \
(test "$(lasx)" = "yes" || test "$(lasx)" = "no") && \
(test "$(comp)" = "gcc" || test "$(comp)" = "icx" || test "$(comp)" = "mingw" || \
test "$(comp)" = "clang" || test "$(comp)" = "armv7a-linux-androideabi16-clang" || \
test "$(comp)" = "aarch64-linux-android21-clang")
$(EXE): $(OBJS) $(EXE): $(OBJS)
+$(CXX) -o $@ $(OBJS) $(LDFLAGS) +$(CXX) -o $@ $(OBJS) $(LDFLAGS)
@ -1051,14 +1075,14 @@ FORCE:
clang-profile-make: clang-profile-make:
$(MAKE) ARCH=$(ARCH) COMP=$(COMP) \ $(MAKE) ARCH=$(ARCH) COMP=$(COMP) \
EXTRACXXFLAGS='-fprofile-instr-generate ' \ EXTRACXXFLAGS='-fprofile-generate ' \
EXTRALDFLAGS=' -fprofile-instr-generate' \ EXTRALDFLAGS=' -fprofile-generate' \
all all
clang-profile-use: clang-profile-use:
$(XCRUN) llvm-profdata merge -output=stockfish.profdata *.profraw $(XCRUN) llvm-profdata merge -output=stockfish.profdata *.profraw
$(MAKE) ARCH=$(ARCH) COMP=$(COMP) \ $(MAKE) ARCH=$(ARCH) COMP=$(COMP) \
EXTRACXXFLAGS='-fprofile-instr-use=stockfish.profdata' \ EXTRACXXFLAGS='-fprofile-use=stockfish.profdata' \
EXTRALDFLAGS='-fprofile-use ' \ EXTRALDFLAGS='-fprofile-use ' \
all all

View file

@ -17,6 +17,7 @@
*/ */
#include "benchmark.h" #include "benchmark.h"
#include "numa.h"
#include <cstdlib> #include <cstdlib>
#include <fstream> #include <fstream>
@ -91,6 +92,282 @@ const std::vector<std::string> Defaults = {
}; };
// clang-format on // clang-format on
// clang-format off
// human-randomly picked 5 games with <60 moves from
// https://tests.stockfishchess.org/tests/view/665c71f9fd45fb0f907c21e0
// only moves for one side
const std::vector<std::vector<std::string>> BenchmarkPositions = {
{
"rnbq1k1r/ppp1bppp/4pn2/8/2B5/2NP1N2/PPP2PPP/R1BQR1K1 b - - 2 8",
"rnbq1k1r/pp2bppp/4pn2/2p5/2B2B2/2NP1N2/PPP2PPP/R2QR1K1 b - - 1 9",
"r1bq1k1r/pp2bppp/2n1pn2/2p5/2B1NB2/3P1N2/PPP2PPP/R2QR1K1 b - - 3 10",
"r1bq1k1r/pp2bppp/2n1p3/2p5/2B1PB2/5N2/PPP2PPP/R2QR1K1 b - - 0 11",
"r1b2k1r/pp2bppp/2n1p3/2p5/2B1PB2/5N2/PPP2PPP/3RR1K1 b - - 0 12",
"r1b1k2r/pp2bppp/2n1p3/2p5/2B1PB2/2P2N2/PP3PPP/3RR1K1 b - - 0 13",
"r1b1k2r/1p2bppp/p1n1p3/2p5/4PB2/2P2N2/PP2BPPP/3RR1K1 b - - 1 14",
"r1b1k2r/4bppp/p1n1p3/1pp5/P3PB2/2P2N2/1P2BPPP/3RR1K1 b - - 0 15",
"r1b1k2r/4bppp/p1n1p3/1P6/2p1PB2/2P2N2/1P2BPPP/3RR1K1 b - - 0 16",
"r1b1k2r/4bppp/2n1p3/1p6/2p1PB2/1PP2N2/4BPPP/3RR1K1 b - - 0 17",
"r3k2r/3bbppp/2n1p3/1p6/2P1PB2/2P2N2/4BPPP/3RR1K1 b - - 0 18",
"r3k2r/3bbppp/2n1p3/8/1pP1P3/2P2N2/3BBPPP/3RR1K1 b - - 1 19",
"1r2k2r/3bbppp/2n1p3/8/1pPNP3/2P5/3BBPPP/3RR1K1 b - - 3 20",
"1r2k2r/3bbppp/2n1p3/8/2PNP3/2B5/4BPPP/3RR1K1 b - - 0 21",
"1r2k2r/3bb1pp/2n1pp2/1N6/2P1P3/2B5/4BPPP/3RR1K1 b - - 1 22",
"1r2k2r/3b2pp/2n1pp2/1N6/1BP1P3/8/4BPPP/3RR1K1 b - - 0 23",
"1r2k2r/3b2pp/4pp2/1N6/1nP1P3/8/3RBPPP/4R1K1 b - - 1 24",
"1r5r/3bk1pp/4pp2/1N6/1nP1PP2/8/3RB1PP/4R1K1 b - - 0 25",
"1r5r/3bk1pp/2n1pp2/1N6/2P1PP2/8/3RBKPP/4R3 b - - 2 26",
"1r5r/3bk1pp/2n2p2/1N2p3/2P1PP2/6P1/3RBK1P/4R3 b - - 0 27",
"1r1r4/3bk1pp/2n2p2/1N2p3/2P1PP2/6P1/3RBK1P/R7 b - - 2 28",
"1r1r4/N3k1pp/2n1bp2/4p3/2P1PP2/6P1/3RBK1P/R7 b - - 4 29",
"1r1r4/3bk1pp/2N2p2/4p3/2P1PP2/6P1/3RBK1P/R7 b - - 0 30",
"1r1R4/4k1pp/2b2p2/4p3/2P1PP2/6P1/4BK1P/R7 b - - 0 31",
"3r4/4k1pp/2b2p2/4P3/2P1P3/6P1/4BK1P/R7 b - - 0 32",
"3r4/R3k1pp/2b5/4p3/2P1P3/6P1/4BK1P/8 b - - 1 33",
"8/3rk1pp/2b5/R3p3/2P1P3/6P1/4BK1P/8 b - - 3 34",
"8/3r2pp/2bk4/R1P1p3/4P3/6P1/4BK1P/8 b - - 0 35",
"8/2kr2pp/2b5/R1P1p3/4P3/4K1P1/4B2P/8 b - - 2 36",
"1k6/3r2pp/2b5/RBP1p3/4P3/4K1P1/7P/8 b - - 4 37",
"8/1k1r2pp/2b5/R1P1p3/4P3/3BK1P1/7P/8 b - - 6 38",
"1k6/3r2pp/2b5/2P1p3/4P3/3BK1P1/7P/R7 b - - 8 39",
"1k6/r5pp/2b5/2P1p3/4P3/3BK1P1/7P/5R2 b - - 10 40",
"1k3R2/6pp/2b5/2P1p3/4P3/r2BK1P1/7P/8 b - - 12 41",
"5R2/2k3pp/2b5/2P1p3/4P3/r2B2P1/3K3P/8 b - - 14 42",
"5R2/2k3pp/2b5/2P1p3/4P3/3BK1P1/r6P/8 b - - 16 43",
"5R2/2k3pp/2b5/2P1p3/4P3/r2B2P1/4K2P/8 b - - 18 44",
"5R2/2k3pp/2b5/2P1p3/4P3/3B1KP1/r6P/8 b - - 20 45",
"8/2k2Rpp/2b5/2P1p3/4P3/r2B1KP1/7P/8 b - - 22 46",
"3k4/5Rpp/2b5/2P1p3/4P3/r2B2P1/4K2P/8 b - - 24 47",
"3k4/5Rpp/2b5/2P1p3/4P3/3B1KP1/r6P/8 b - - 26 48",
"3k4/5Rpp/2b5/2P1p3/4P3/r2B2P1/4K2P/8 b - - 28 49",
"3k4/5Rpp/2b5/2P1p3/4P3/3BK1P1/r6P/8 b - - 30 50",
"3k4/5Rpp/2b5/2P1p3/4P3/r2B2P1/3K3P/8 b - - 32 51",
"3k4/5Rpp/2b5/2P1p3/4P3/2KB2P1/r6P/8 b - - 34 52",
"3k4/5Rpp/2b5/2P1p3/4P3/r2B2P1/2K4P/8 b - - 36 53",
"3k4/5Rpp/2b5/2P1p3/4P3/1K1B2P1/r6P/8 b - - 38 54",
"3k4/6Rp/2b5/2P1p3/4P3/1K1B2P1/7r/8 b - - 0 55",
"3k4/8/2b3Rp/2P1p3/4P3/1K1B2P1/7r/8 b - - 1 56",
"8/2k3R1/2b4p/2P1p3/4P3/1K1B2P1/7r/8 b - - 3 57",
"3k4/8/2b3Rp/2P1p3/4P3/1K1B2P1/7r/8 b - - 5 58",
"8/2k5/2b3Rp/2P1p3/1K2P3/3B2P1/7r/8 b - - 7 59",
"8/2k5/2b3Rp/2P1p3/4P3/2KB2P1/3r4/8 b - - 9 60",
"8/2k5/2b3Rp/2P1p3/1K2P3/3B2P1/6r1/8 b - - 11 61",
"8/2k5/2b3Rp/2P1p3/4P3/2KB2P1/3r4/8 b - - 13 62",
"8/2k5/2b3Rp/2P1p3/2K1P3/3B2P1/6r1/8 b - - 15 63",
"4b3/2k3R1/7p/2P1p3/2K1P3/3B2P1/6r1/8 b - - 17 64",
},
{
"r1bqkbnr/npp1pppp/p7/3P4/4pB2/2N5/PPP2PPP/R2QKBNR w KQkq - 1 6",
"r1bqkb1r/npp1pppp/p4n2/3P4/4pB2/2N5/PPP1QPPP/R3KBNR w KQkq - 3 7",
"r2qkb1r/npp1pppp/p4n2/3P1b2/4pB2/2N5/PPP1QPPP/2KR1BNR w kq - 5 8",
"r2qkb1r/1pp1pppp/p4n2/1n1P1b2/4pB2/2N4P/PPP1QPP1/2KR1BNR w kq - 1 9",
"r2qkb1r/1pp1pppp/5n2/1p1P1b2/4pB2/7P/PPP1QPP1/2KR1BNR w kq - 0 10",
"r2qkb1r/1ppbpppp/5n2/1Q1P4/4pB2/7P/PPP2PP1/2KR1BNR w kq - 1 11",
"3qkb1r/1Qpbpppp/5n2/3P4/4pB2/7P/rPP2PP1/2KR1BNR w k - 0 12",
"q3kb1r/1Qpbpppp/5n2/3P4/4pB2/7P/rPP2PP1/1K1R1BNR w k - 2 13",
"r3kb1r/2pbpppp/5n2/3P4/4pB2/7P/1PP2PP1/1K1R1BNR w k - 0 14",
"r3kb1r/2Bb1ppp/4pn2/3P4/4p3/7P/1PP2PP1/1K1R1BNR w k - 0 15",
"r3kb1r/2Bb2pp/4pn2/8/4p3/7P/1PP2PP1/1K1R1BNR w k - 0 16",
"r3k2r/2Bb2pp/4pn2/2b5/4p3/7P/1PP1NPP1/1K1R1B1R w k - 2 17",
"r6r/2Bbk1pp/4pn2/2b5/3Np3/7P/1PP2PP1/1K1R1B1R w - - 4 18",
"r6r/b2bk1pp/4pn2/4B3/3Np3/7P/1PP2PP1/1K1R1B1R w - - 6 19",
"r1r5/b2bk1pp/4pn2/4B3/2BNp3/7P/1PP2PP1/1K1R3R w - - 8 20",
"r7/b2bk1pp/4pn2/2r1B3/2BNp3/1P5P/2P2PP1/1K1R3R w - - 1 21",
"rb6/3bk1pp/4pn2/2r1B3/2BNpP2/1P5P/2P3P1/1K1R3R w - - 1 22",
"1r6/3bk1pp/4pn2/2r5/2BNpP2/1P5P/2P3P1/1K1R3R w - - 0 23",
"1r6/3bk1p1/4pn1p/2r5/2BNpP2/1P5P/2P3P1/2KR3R w - - 0 24",
"8/3bk1p1/1r2pn1p/2r5/2BNpP1P/1P6/2P3P1/2KR3R w - - 1 25",
"8/3bk3/1r2pnpp/2r5/2BNpP1P/1P6/2P3P1/2K1R2R w - - 0 26",
"2b5/4k3/1r2pnpp/2r5/2BNpP1P/1P4P1/2P5/2K1R2R w - - 1 27",
"8/1b2k3/1r2pnpp/2r5/2BNpP1P/1P4P1/2P5/2K1R1R1 w - - 3 28",
"8/1b1nk3/1r2p1pp/2r5/2BNpPPP/1P6/2P5/2K1R1R1 w - - 1 29",
"8/1b2k3/1r2p1pp/2r1nP2/2BNp1PP/1P6/2P5/2K1R1R1 w - - 1 30",
"8/1b2k3/1r2p1p1/2r1nPp1/2BNp2P/1P6/2P5/2K1R1R1 w - - 0 31",
"8/1b2k3/1r2p1n1/2r3p1/2BNp2P/1P6/2P5/2K1R1R1 w - - 0 32",
"8/1b2k3/1r2p1n1/6r1/2BNp2P/1P6/2P5/2K1R3 w - - 0 33",
"8/1b2k3/1r2p3/4n1P1/2BNp3/1P6/2P5/2K1R3 w - - 1 34",
"8/1b2k3/1r2p3/4n1P1/2BN4/1P2p3/2P5/2K4R w - - 0 35",
"8/1b2k3/1r2p2R/6P1/2nN4/1P2p3/2P5/2K5 w - - 0 36",
"8/1b2k3/3rp2R/6P1/2PN4/4p3/2P5/2K5 w - - 1 37",
"8/4k3/3rp2R/6P1/2PN4/2P1p3/6b1/2K5 w - - 1 38",
"8/4k3/r3p2R/2P3P1/3N4/2P1p3/6b1/2K5 w - - 1 39",
"8/3k4/r3p2R/2P2NP1/8/2P1p3/6b1/2K5 w - - 3 40",
"8/3k4/4p2R/2P3P1/8/2P1N3/6b1/r1K5 w - - 1 41",
"8/3k4/4p2R/2P3P1/8/2P1N3/3K2b1/6r1 w - - 3 42",
"8/3k4/4p2R/2P3P1/8/2PKNb2/8/6r1 w - - 5 43",
"8/4k3/4p1R1/2P3P1/8/2PKNb2/8/6r1 w - - 7 44",
"8/4k3/4p1R1/2P3P1/3K4/2P1N3/8/6rb w - - 9 45",
"8/3k4/4p1R1/2P1K1P1/8/2P1N3/8/6rb w - - 11 46",
"8/3k4/4p1R1/2P3P1/5K2/2P1N3/8/4r2b w - - 13 47",
"8/3k4/2b1p2R/2P3P1/5K2/2P1N3/8/4r3 w - - 15 48",
"8/3k4/2b1p3/2P3P1/5K2/2P1N2R/8/6r1 w - - 17 49",
"2k5/7R/2b1p3/2P3P1/5K2/2P1N3/8/6r1 w - - 19 50",
"2k5/7R/4p3/2P3P1/b1P2K2/4N3/8/6r1 w - - 1 51",
"2k5/3bR3/4p3/2P3P1/2P2K2/4N3/8/6r1 w - - 3 52",
"3k4/3b2R1/4p3/2P3P1/2P2K2/4N3/8/6r1 w - - 5 53",
"3kb3/6R1/4p1P1/2P5/2P2K2/4N3/8/6r1 w - - 1 54",
"3kb3/6R1/4p1P1/2P5/2P2KN1/8/8/2r5 w - - 3 55",
"3kb3/6R1/4p1P1/2P1N3/2P2K2/8/8/5r2 w - - 5 56",
"3kb3/6R1/4p1P1/2P1N3/2P5/4K3/8/4r3 w - - 7 57",
},
{
"rnbq1rk1/ppp1npb1/4p1p1/3P3p/3PP3/2N2N2/PP2BPPP/R1BQ1RK1 b - - 0 8",
"rnbq1rk1/ppp1npb1/6p1/3pP2p/3P4/2N2N2/PP2BPPP/R1BQ1RK1 b - - 0 9",
"rn1q1rk1/ppp1npb1/6p1/3pP2p/3P2b1/2N2N2/PP2BPPP/R1BQR1K1 b - - 2 10",
"r2q1rk1/ppp1npb1/2n3p1/3pP2p/3P2bN/2N5/PP2BPPP/R1BQR1K1 b - - 4 11",
"r4rk1/pppqnpb1/2n3p1/3pP2p/3P2bN/2N4P/PP2BPP1/R1BQR1K1 b - - 0 12",
"r4rk1/pppqnpb1/2n3p1/3pP2p/3P3N/7P/PP2NPP1/R1BQR1K1 b - - 0 13",
"r4rk1/pppq1pb1/2n3p1/3pPN1p/3P4/7P/PP2NPP1/R1BQR1K1 b - - 0 14",
"r4rk1/ppp2pb1/2n3p1/3pPq1p/3P1N2/7P/PP3PP1/R1BQR1K1 b - - 1 15",
"r4rk1/pppq1pb1/2n3p1/3pP2p/P2P1N2/7P/1P3PP1/R1BQR1K1 b - - 0 16",
"r2n1rk1/pppq1pb1/6p1/3pP2p/P2P1N2/R6P/1P3PP1/2BQR1K1 b - - 2 17",
"r4rk1/pppq1pb1/4N1p1/3pP2p/P2P4/R6P/1P3PP1/2BQR1K1 b - - 0 18",
"r4rk1/ppp2pb1/4q1p1/3pP1Bp/P2P4/R6P/1P3PP1/3QR1K1 b - - 1 19",
"r3r1k1/ppp2pb1/4q1p1/3pP1Bp/P2P1P2/R6P/1P4P1/3QR1K1 b - - 0 20",
"r3r1k1/ppp3b1/4qpp1/3pP2p/P2P1P1B/R6P/1P4P1/3QR1K1 b - - 1 21",
"r3r1k1/ppp3b1/4q1p1/3pP2p/P4P1B/R6P/1P4P1/3QR1K1 b - - 0 22",
"r4rk1/ppp3b1/4q1p1/3pP1Bp/P4P2/R6P/1P4P1/3QR1K1 b - - 2 23",
"r4rk1/pp4b1/4q1p1/2ppP1Bp/P4P2/3R3P/1P4P1/3QR1K1 b - - 1 24",
"r4rk1/pp4b1/4q1p1/2p1P1Bp/P2p1PP1/3R3P/1P6/3QR1K1 b - - 0 25",
"r4rk1/pp4b1/4q1p1/2p1P1B1/P2p1PP1/3R4/1P6/3QR1K1 b - - 0 26",
"r5k1/pp3rb1/4q1p1/2p1P1B1/P2p1PP1/6R1/1P6/3QR1K1 b - - 2 27",
"5rk1/pp3rb1/4q1p1/2p1P1B1/P2pRPP1/6R1/1P6/3Q2K1 b - - 4 28",
"5rk1/1p3rb1/p3q1p1/P1p1P1B1/3pRPP1/6R1/1P6/3Q2K1 b - - 0 29",
"4r1k1/1p3rb1/p3q1p1/P1p1P1B1/3pRPP1/1P4R1/8/3Q2K1 b - - 0 30",
"4r1k1/5rb1/pP2q1p1/2p1P1B1/3pRPP1/1P4R1/8/3Q2K1 b - - 0 31",
"4r1k1/5rb1/pq4p1/2p1P1B1/3pRPP1/1P4R1/4Q3/6K1 b - - 1 32",
"4r1k1/1r4b1/pq4p1/2p1P1B1/3pRPP1/1P4R1/2Q5/6K1 b - - 3 33",
"4r1k1/1r4b1/1q4p1/p1p1P1B1/3p1PP1/1P4R1/2Q5/4R1K1 b - - 1 34",
"4r1k1/3r2b1/1q4p1/p1p1P1B1/2Qp1PP1/1P4R1/8/4R1K1 b - - 3 35",
"4r1k1/3r2b1/4q1p1/p1p1P1B1/2Qp1PP1/1P4R1/5K2/4R3 b - - 5 36",
"4r1k1/3r2b1/6p1/p1p1P1B1/2Pp1PP1/6R1/5K2/4R3 b - - 0 37",
"4r1k1/3r2b1/6p1/p1p1P1B1/2P2PP1/3p2R1/5K2/3R4 b - - 1 38",
"5rk1/3r2b1/6p1/p1p1P1B1/2P2PP1/3p2R1/8/3RK3 b - - 3 39",
"5rk1/6b1/6p1/p1p1P1B1/2Pr1PP1/3R4/8/3RK3 b - - 0 40",
"5rk1/3R2b1/6p1/p1p1P1B1/2r2PP1/8/8/3RK3 b - - 1 41",
"5rk1/3R2b1/6p1/p1p1P1B1/4rPP1/8/3K4/3R4 b - - 3 42",
"1r4k1/3R2b1/6p1/p1p1P1B1/4rPP1/2K5/8/3R4 b - - 5 43",
"1r4k1/3R2b1/6p1/p1p1P1B1/2K2PP1/4r3/8/3R4 b - - 7 44",
"1r3bk1/8/3R2p1/p1p1P1B1/2K2PP1/4r3/8/3R4 b - - 9 45",
"1r3bk1/8/6R1/2p1P1B1/p1K2PP1/4r3/8/3R4 b - - 0 46",
"1r3b2/5k2/R7/2p1P1B1/p1K2PP1/4r3/8/3R4 b - - 2 47",
"5b2/1r3k2/R7/2p1P1B1/p1K2PP1/4r3/8/7R b - - 4 48",
"5b2/5k2/R7/2pKP1B1/pr3PP1/4r3/8/7R b - - 6 49",
"5b2/5k2/R1K5/2p1P1B1/p2r1PP1/4r3/8/7R b - - 8 50",
"8/R4kb1/2K5/2p1P1B1/p2r1PP1/4r3/8/7R b - - 10 51",
"8/R5b1/2K3k1/2p1PPB1/p2r2P1/4r3/8/7R b - - 0 52",
"8/6R1/2K5/2p1PPk1/p2r2P1/4r3/8/7R b - - 0 53",
"8/6R1/2K5/2p1PP2/p2r1kP1/4r3/8/5R2 b - - 2 54",
"8/6R1/2K2P2/2p1P3/p2r2P1/4r1k1/8/5R2 b - - 0 55",
"8/5PR1/2K5/2p1P3/p2r2P1/4r3/6k1/5R2 b - - 0 56",
},
{
"rn1qkb1r/p1pbpppp/5n2/8/2pP4/2N5/1PQ1PPPP/R1B1KBNR w KQkq - 0 7",
"r2qkb1r/p1pbpppp/2n2n2/8/2pP4/2N2N2/1PQ1PPPP/R1B1KB1R w KQkq - 2 8",
"r2qkb1r/p1pbpppp/5n2/8/1npPP3/2N2N2/1PQ2PPP/R1B1KB1R w KQkq - 1 9",
"r2qkb1r/p1pb1ppp/4pn2/8/1npPP3/2N2N2/1P3PPP/R1BQKB1R w KQkq - 0 10",
"r2qk2r/p1pbbppp/4pn2/8/1nBPP3/2N2N2/1P3PPP/R1BQK2R w KQkq - 1 11",
"r2q1rk1/p1pbbppp/4pn2/8/1nBPP3/2N2N2/1P3PPP/R1BQ1RK1 w - - 3 12",
"r2q1rk1/2pbbppp/p3pn2/8/1nBPPB2/2N2N2/1P3PPP/R2Q1RK1 w - - 0 13",
"r2q1rk1/2p1bppp/p3pn2/1b6/1nBPPB2/2N2N2/1P3PPP/R2QR1K1 w - - 2 14",
"r2q1rk1/4bppp/p1p1pn2/1b6/1nBPPB2/1PN2N2/5PPP/R2QR1K1 w - - 0 15",
"r4rk1/3qbppp/p1p1pn2/1b6/1nBPPB2/1PN2N2/3Q1PPP/R3R1K1 w - - 2 16",
"r4rk1/1q2bppp/p1p1pn2/1b6/1nBPPB2/1PN2N1P/3Q1PP1/R3R1K1 w - - 1 17",
"r3r1k1/1q2bppp/p1p1pn2/1b6/1nBPPB2/1PN2N1P/4QPP1/R3R1K1 w - - 3 18",
"r3r1k1/1q1nbppp/p1p1p3/1b6/1nBPPB2/1PN2N1P/4QPP1/3RR1K1 w - - 5 19",
"r3rbk1/1q1n1ppp/p1p1p3/1b6/1nBPPB2/1PN2N1P/3RQPP1/4R1K1 w - - 7 20",
"r3rbk1/1q3ppp/pnp1p3/1b6/1nBPPB2/1PN2N1P/3RQPP1/4R2K w - - 9 21",
"2r1rbk1/1q3ppp/pnp1p3/1b6/1nBPPB2/1PN2N1P/3RQPP1/1R5K w - - 11 22",
"2r1rbk1/1q4pp/pnp1pp2/1b6/1nBPPB2/1PN2N1P/4QPP1/1R1R3K w - - 0 23",
"2r1rbk1/5qpp/pnp1pp2/1b6/1nBPP3/1PN1BN1P/4QPP1/1R1R3K w - - 2 24",
"2r1rbk1/5qp1/pnp1pp1p/1b6/1nBPP3/1PN1BN1P/4QPP1/1R1R2K1 w - - 0 25",
"2r1rbk1/5qp1/pnp1pp1p/1b6/2BPP3/1P2BN1P/n3QPP1/1R1R2K1 w - - 0 26",
"r3rbk1/5qp1/pnp1pp1p/1b6/2BPP3/1P2BN1P/Q4PP1/1R1R2K1 w - - 1 27",
"rr3bk1/5qp1/pnp1pp1p/1b6/2BPP3/1P2BN1P/Q4PP1/R2R2K1 w - - 3 28",
"rr2qbk1/6p1/pnp1pp1p/1b6/2BPP3/1P2BN1P/4QPP1/R2R2K1 w - - 5 29",
"rr2qbk1/6p1/1np1pp1p/pb6/2BPP3/1P1QBN1P/5PP1/R2R2K1 w - - 0 30",
"rr2qbk1/6p1/1n2pp1p/pp6/3PP3/1P1QBN1P/5PP1/R2R2K1 w - - 0 31",
"rr2qbk1/6p1/1n2pp1p/1p1P4/p3P3/1P1QBN1P/5PP1/R2R2K1 w - - 0 32",
"rr2qbk1/3n2p1/3Ppp1p/1p6/p3P3/1P1QBN1P/5PP1/R2R2K1 w - - 1 33",
"rr3bk1/3n2p1/3Ppp1p/1p5q/pP2P3/3QBN1P/5PP1/R2R2K1 w - - 1 34",
"rr3bk1/3n2p1/3Ppp1p/1p5q/1P2P3/p2QBN1P/5PP1/2RR2K1 w - - 0 35",
"1r3bk1/3n2p1/r2Ppp1p/1p5q/1P2P3/pQ2BN1P/5PP1/2RR2K1 w - - 2 36",
"1r2qbk1/2Rn2p1/r2Ppp1p/1p6/1P2P3/pQ2BN1P/5PP1/3R2K1 w - - 4 37",
"1r2qbk1/2Rn2p1/r2Ppp1p/1pB5/1P2P3/1Q3N1P/p4PP1/3R2K1 w - - 0 38",
"1r2q1k1/2Rn2p1/r2bpp1p/1pB5/1P2P3/1Q3N1P/p4PP1/R5K1 w - - 0 39",
"1r2q1k1/2Rn2p1/3rpp1p/1p6/1P2P3/1Q3N1P/p4PP1/R5K1 w - - 0 40",
"2r1q1k1/2Rn2p1/3rpp1p/1p6/1P2P3/5N1P/Q4PP1/R5K1 w - - 1 41",
"1r2q1k1/1R1n2p1/3rpp1p/1p6/1P2P3/5N1P/Q4PP1/R5K1 w - - 3 42",
"2r1q1k1/2Rn2p1/3rpp1p/1p6/1P2P3/5N1P/Q4PP1/R5K1 w - - 5 43",
"1r2q1k1/1R1n2p1/3rpp1p/1p6/1P2P3/5N1P/Q4PP1/R5K1 w - - 7 44",
"1rq3k1/R2n2p1/3rpp1p/1p6/1P2P3/5N1P/Q4PP1/R5K1 w - - 9 45",
"2q3k1/Rr1n2p1/3rpp1p/1p6/1P2P3/5N1P/4QPP1/R5K1 w - - 11 46",
"Rrq3k1/3n2p1/3rpp1p/1p6/1P2P3/5N1P/4QPP1/R5K1 w - - 13 47",
},
{
"rn1qkb1r/1pp2ppp/p4p2/3p1b2/5P2/1P2PN2/P1PP2PP/RN1QKB1R b KQkq - 1 6",
"r2qkb1r/1pp2ppp/p1n2p2/3p1b2/3P1P2/1P2PN2/P1P3PP/RN1QKB1R b KQkq - 0 7",
"r2qkb1r/1pp2ppp/p4p2/3p1b2/1n1P1P2/1P1BPN2/P1P3PP/RN1QK2R b KQkq - 2 8",
"r2qkb1r/1pp2ppp/p4p2/3p1b2/3P1P2/1P1PPN2/P5PP/RN1QK2R b KQkq - 0 9",
"r2qk2r/1pp2ppp/p2b1p2/3p1b2/3P1P2/1PNPPN2/P5PP/R2QK2R b KQkq - 2 10",
"r2qk2r/1p3ppp/p1pb1p2/3p1b2/3P1P2/1PNPPN2/P5PP/R2Q1RK1 b kq - 1 11",
"r2q1rk1/1p3ppp/p1pb1p2/3p1b2/3P1P2/1PNPPN2/P2Q2PP/R4RK1 b - - 3 12",
"r2qr1k1/1p3ppp/p1pb1p2/3p1b2/3P1P2/1P1PPN2/P2QN1PP/R4RK1 b - - 5 13",
"r3r1k1/1p3ppp/pqpb1p2/3p1b2/3P1P2/1P1PPNN1/P2Q2PP/R4RK1 b - - 7 14",
"r3r1k1/1p3ppp/pqp2p2/3p1b2/1b1P1P2/1P1PPNN1/P1Q3PP/R4RK1 b - - 9 15",
"r3r1k1/1p1b1ppp/pqp2p2/3p4/1b1P1P2/1P1PPNN1/P4QPP/R4RK1 b - - 11 16",
"2r1r1k1/1p1b1ppp/pqp2p2/3p4/1b1PPP2/1P1P1NN1/P4QPP/R4RK1 b - - 0 17",
"2r1r1k1/1p1b1ppp/pq3p2/2pp4/1b1PPP2/PP1P1NN1/5QPP/R4RK1 b - - 0 18",
"2r1r1k1/1p1b1ppp/pq3p2/2Pp4/4PP2/PPbP1NN1/5QPP/R4RK1 b - - 0 19",
"2r1r1k1/1p1b1ppp/p4p2/2Pp4/4PP2/PqbP1NN1/5QPP/RR4K1 b - - 1 20",
"2r1r1k1/1p1b1ppp/p4p2/2Pp4/q3PP2/P1bP1NN1/R4QPP/1R4K1 b - - 3 21",
"2r1r1k1/1p3ppp/p4p2/1bPP4/q4P2/P1bP1NN1/R4QPP/1R4K1 b - - 0 22",
"2r1r1k1/1p3ppp/p4p2/2PP4/q4P2/P1bb1NN1/R4QPP/2R3K1 b - - 1 23",
"2r1r1k1/1p3ppp/p2P1p2/2P5/2q2P2/P1bb1NN1/R4QPP/2R3K1 b - - 0 24",
"2rr2k1/1p3ppp/p2P1p2/2P5/2q2P2/P1bb1NN1/R4QPP/2R4K b - - 2 25",
"2rr2k1/1p3ppp/p2P1p2/2Q5/5P2/P1bb1NN1/R5PP/2R4K b - - 0 26",
"3r2k1/1p3ppp/p2P1p2/2r5/5P2/P1bb1N2/R3N1PP/2R4K b - - 1 27",
"3r2k1/1p3ppp/p2P1p2/2r5/5P2/P1b2N2/4R1PP/2R4K b - - 0 28",
"3r2k1/1p3ppp/p2P1p2/2r5/1b3P2/P4N2/4R1PP/3R3K b - - 2 29",
"3r2k1/1p2Rppp/p2P1p2/b1r5/5P2/P4N2/6PP/3R3K b - - 4 30",
"3r2k1/1R3ppp/p1rP1p2/b7/5P2/P4N2/6PP/3R3K b - - 0 31",
"3r2k1/1R3ppp/p2R1p2/b7/5P2/P4N2/6PP/7K b - - 0 32",
"6k1/1R3ppp/p2r1p2/b7/5P2/P4NP1/7P/7K b - - 0 33",
"6k1/1R3p1p/p2r1pp1/b7/5P1P/P4NP1/8/7K b - - 0 34",
"6k1/3R1p1p/pr3pp1/b7/5P1P/P4NP1/8/7K b - - 2 35",
"6k1/5p2/pr3pp1/b2R3p/5P1P/P4NP1/8/7K b - - 1 36",
"6k1/5p2/pr3pp1/7p/5P1P/P1bR1NP1/8/7K b - - 3 37",
"6k1/5p2/p1r2pp1/7p/5P1P/P1bR1NP1/6K1/8 b - - 5 38",
"6k1/5p2/p1r2pp1/b2R3p/5P1P/P4NP1/6K1/8 b - - 7 39",
"6k1/5p2/p4pp1/b2R3p/5P1P/P4NPK/2r5/8 b - - 9 40",
"6k1/2b2p2/p4pp1/7p/5P1P/P2R1NPK/2r5/8 b - - 11 41",
"6k1/2b2p2/5pp1/p6p/3N1P1P/P2R2PK/2r5/8 b - - 1 42",
"6k1/2b2p2/5pp1/p6p/3N1P1P/P1R3PK/r7/8 b - - 3 43",
"6k1/5p2/1b3pp1/p6p/5P1P/P1R3PK/r1N5/8 b - - 5 44",
"8/5pk1/1bR2pp1/p6p/5P1P/P5PK/r1N5/8 b - - 7 45",
"3b4/5pk1/2R2pp1/p4P1p/7P/P5PK/r1N5/8 b - - 0 46",
"8/4bpk1/2R2pp1/p4P1p/6PP/P6K/r1N5/8 b - - 0 47",
"8/5pk1/2R2pP1/p6p/6PP/b6K/r1N5/8 b - - 0 48",
"8/6k1/2R2pp1/p6P/7P/b6K/r1N5/8 b - - 0 49",
"8/6k1/2R2p2/p6p/7P/b5K1/r1N5/8 b - - 1 50",
"8/8/2R2pk1/p6p/7P/b4K2/r1N5/8 b - - 3 51",
"8/8/2R2pk1/p6p/7P/4NK2/rb6/8 b - - 5 52",
"2R5/8/5pk1/7p/p6P/4NK2/rb6/8 b - - 1 53",
"6R1/8/5pk1/7p/p6P/4NK2/1b6/r7 b - - 3 54",
"R7/5k2/5p2/7p/p6P/4NK2/1b6/r7 b - - 5 55",
"R7/5k2/5p2/7p/7P/p3N3/1b2K3/r7 b - - 1 56",
"8/R4k2/5p2/7p/7P/p3N3/1b2K3/7r b - - 3 57",
"8/8/5pk1/7p/R6P/p3N3/1b2K3/7r b - - 5 58",
"8/8/5pk1/7p/R6P/p7/4K3/2bN3r b - - 7 59",
"8/8/5pk1/7p/R6P/p7/4KN1r/2b5 b - - 9 60",
"8/8/5pk1/7p/R6P/p3K3/1b3N1r/8 b - - 11 61",
"8/8/R4pk1/7p/7P/p1b1K3/5N1r/8 b - - 13 62",
"8/8/5pk1/7p/7P/2b1K3/R4N1r/8 b - - 0 63",
"8/8/5pk1/7p/3K3P/8/R4N1r/4b3 b - - 2 64",
}
};
// clang-format on
} // namespace } // namespace
namespace Stockfish::Benchmark { namespace Stockfish::Benchmark {
@ -160,4 +437,76 @@ std::vector<std::string> setup_bench(const std::string& currentFen, std::istream
return list; return list;
} }
BenchmarkSetup setup_benchmark(std::istream& is) {
// TT_SIZE_PER_THREAD is chosen such that roughly half of the hash is used all positions
// for the current sequence have been searched.
static constexpr int TT_SIZE_PER_THREAD = 128;
static constexpr int DEFAULT_DURATION_S = 150;
BenchmarkSetup setup{};
// Assign default values to missing arguments
int desiredTimeS;
if (!(is >> setup.threads))
setup.threads = get_hardware_concurrency();
else
setup.originalInvocation += std::to_string(setup.threads);
if (!(is >> setup.ttSize))
setup.ttSize = TT_SIZE_PER_THREAD * setup.threads;
else
setup.originalInvocation += " " + std::to_string(setup.ttSize);
if (!(is >> desiredTimeS))
desiredTimeS = DEFAULT_DURATION_S;
else
setup.originalInvocation += " " + std::to_string(desiredTimeS);
setup.filledInvocation += std::to_string(setup.threads) + " " + std::to_string(setup.ttSize)
+ " " + std::to_string(desiredTimeS);
auto getCorrectedTime = [&](int ply) {
// time per move is fit roughly based on LTC games
// seconds = 50/{ply+15}
// ms = 50000/{ply+15}
// with this fit 10th move gets 2000ms
// adjust for desired 10th move time
return 50000.0 / (static_cast<double>(ply) + 15.0);
};
float totalTime = 0;
for (const auto& game : BenchmarkPositions)
{
setup.commands.emplace_back("ucinewgame");
int ply = 1;
for (int i = 0; i < static_cast<int>(game.size()); ++i)
{
const float correctedTime = getCorrectedTime(ply);
totalTime += correctedTime;
ply += 1;
}
}
float timeScaleFactor = static_cast<float>(desiredTimeS * 1000) / totalTime;
for (const auto& game : BenchmarkPositions)
{
setup.commands.emplace_back("ucinewgame");
int ply = 1;
for (const std::string& fen : game)
{
setup.commands.emplace_back("position fen " + fen);
const int correctedTime = static_cast<int>(getCorrectedTime(ply) * timeScaleFactor);
setup.commands.emplace_back("go movetime " + std::to_string(correctedTime));
ply += 1;
}
}
return setup;
}
} // namespace Stockfish } // namespace Stockfish

View file

@ -27,6 +27,16 @@ namespace Stockfish::Benchmark {
std::vector<std::string> setup_bench(const std::string&, std::istream&); std::vector<std::string> setup_bench(const std::string&, std::istream&);
struct BenchmarkSetup {
int ttSize;
int threads;
std::vector<std::string> commands;
std::string originalInvocation;
std::string filledInvocation;
};
BenchmarkSetup setup_benchmark(std::istream&);
} // namespace Stockfish } // namespace Stockfish
#endif // #ifndef BENCHMARK_H_INCLUDED #endif // #ifndef BENCHMARK_H_INCLUDED

View file

@ -34,15 +34,14 @@ Bitboard BetweenBB[SQUARE_NB][SQUARE_NB];
Bitboard PseudoAttacks[PIECE_TYPE_NB][SQUARE_NB]; Bitboard PseudoAttacks[PIECE_TYPE_NB][SQUARE_NB];
Bitboard PawnAttacks[COLOR_NB][SQUARE_NB]; Bitboard PawnAttacks[COLOR_NB][SQUARE_NB];
Magic RookMagics[SQUARE_NB]; alignas(64) Magic Magics[SQUARE_NB][2];
Magic BishopMagics[SQUARE_NB];
namespace { namespace {
Bitboard RookTable[0x19000]; // To store rook attacks Bitboard RookTable[0x19000]; // To store rook attacks
Bitboard BishopTable[0x1480]; // To store bishop attacks Bitboard BishopTable[0x1480]; // To store bishop attacks
void init_magics(PieceType pt, Bitboard table[], Magic magics[]); void init_magics(PieceType pt, Bitboard table[], Magic magics[][2]);
// Returns the bitboard of target square for the given step // Returns the bitboard of target square for the given step
// from the given square. If the step is off the board, returns empty bitboard. // from the given square. If the step is off the board, returns empty bitboard.
@ -82,8 +81,8 @@ void Bitboards::init() {
for (Square s2 = SQ_A1; s2 <= SQ_H8; ++s2) for (Square s2 = SQ_A1; s2 <= SQ_H8; ++s2)
SquareDistance[s1][s2] = std::max(distance<File>(s1, s2), distance<Rank>(s1, s2)); SquareDistance[s1][s2] = std::max(distance<File>(s1, s2), distance<Rank>(s1, s2));
init_magics(ROOK, RookTable, RookMagics); init_magics(ROOK, RookTable, Magics);
init_magics(BISHOP, BishopTable, BishopMagics); init_magics(BISHOP, BishopTable, Magics);
for (Square s1 = SQ_A1; s1 <= SQ_H8; ++s1) for (Square s1 = SQ_A1; s1 <= SQ_H8; ++s1)
{ {
@ -124,8 +123,14 @@ Bitboard sliding_attack(PieceType pt, Square sq, Bitboard occupied) {
for (Direction d : (pt == ROOK ? RookDirections : BishopDirections)) for (Direction d : (pt == ROOK ? RookDirections : BishopDirections))
{ {
Square s = sq; Square s = sq;
while (safe_destination(s, d) && !(occupied & s)) while (safe_destination(s, d))
{
attacks |= (s += d); attacks |= (s += d);
if (occupied & s)
{
break;
}
}
} }
return attacks; return attacks;
@ -134,41 +139,49 @@ Bitboard sliding_attack(PieceType pt, Square sq, Bitboard occupied) {
// Computes all rook and bishop attacks at startup. Magic // Computes all rook and bishop attacks at startup. Magic
// bitboards are used to look up attacks of sliding pieces. As a reference see // bitboards are used to look up attacks of sliding pieces. As a reference see
// www.chessprogramming.org/Magic_Bitboards. In particular, here we use the so // https://www.chessprogramming.org/Magic_Bitboards. In particular, here we use
// called "fancy" approach. // the so called "fancy" approach.
void init_magics(PieceType pt, Bitboard table[], Magic magics[]) { void init_magics(PieceType pt, Bitboard table[], Magic magics[][2]) {
#ifndef USE_PEXT
// Optimal PRNG seeds to pick the correct magics in the shortest time // Optimal PRNG seeds to pick the correct magics in the shortest time
int seeds[][RANK_NB] = {{8977, 44560, 54343, 38998, 5731, 95205, 104912, 17020}, int seeds[][RANK_NB] = {{8977, 44560, 54343, 38998, 5731, 95205, 104912, 17020},
{728, 10316, 55013, 32803, 12281, 15100, 16645, 255}}; {728, 10316, 55013, 32803, 12281, 15100, 16645, 255}};
Bitboard occupancy[4096], reference[4096], edges, b; Bitboard occupancy[4096];
int epoch[4096] = {}, cnt = 0, size = 0; int epoch[4096] = {}, cnt = 0;
#endif
Bitboard reference[4096];
int size = 0;
for (Square s = SQ_A1; s <= SQ_H8; ++s) for (Square s = SQ_A1; s <= SQ_H8; ++s)
{ {
// Board edges are not considered in the relevant occupancies // Board edges are not considered in the relevant occupancies
edges = ((Rank1BB | Rank8BB) & ~rank_bb(s)) | ((FileABB | FileHBB) & ~file_bb(s)); Bitboard edges = ((Rank1BB | Rank8BB) & ~rank_bb(s)) | ((FileABB | FileHBB) & ~file_bb(s));
// Given a square 's', the mask is the bitboard of sliding attacks from // Given a square 's', the mask is the bitboard of sliding attacks from
// 's' computed on an empty board. The index must be big enough to contain // 's' computed on an empty board. The index must be big enough to contain
// all the attacks for each possible subset of the mask and so is 2 power // all the attacks for each possible subset of the mask and so is 2 power
// the number of 1s of the mask. Hence we deduce the size of the shift to // the number of 1s of the mask. Hence we deduce the size of the shift to
// apply to the 64 or 32 bits word to get the index. // apply to the 64 or 32 bits word to get the index.
Magic& m = magics[s]; Magic& m = magics[s][pt - BISHOP];
m.mask = sliding_attack(pt, s, 0) & ~edges; m.mask = sliding_attack(pt, s, 0) & ~edges;
m.shift = (Is64Bit ? 64 : 32) - popcount(m.mask); #ifndef USE_PEXT
m.shift = (Is64Bit ? 64 : 32) - popcount(m.mask);
#endif
// Set the offset for the attacks table of the square. We have individual // Set the offset for the attacks table of the square. We have individual
// table sizes for each square with "Fancy Magic Bitboards". // table sizes for each square with "Fancy Magic Bitboards".
m.attacks = s == SQ_A1 ? table : magics[s - 1].attacks + size; m.attacks = s == SQ_A1 ? table : magics[s - 1][pt - BISHOP].attacks + size;
size = 0;
// Use Carry-Rippler trick to enumerate all subsets of masks[s] and // Use Carry-Rippler trick to enumerate all subsets of masks[s] and
// store the corresponding sliding attack bitboard in reference[]. // store the corresponding sliding attack bitboard in reference[].
b = size = 0; Bitboard b = 0;
do do
{ {
#ifndef USE_PEXT
occupancy[size] = b; occupancy[size] = b;
#endif
reference[size] = sliding_attack(pt, s, b); reference[size] = sliding_attack(pt, s, b);
if (HasPext) if (HasPext)
@ -178,9 +191,7 @@ void init_magics(PieceType pt, Bitboard table[], Magic magics[]) {
b = (b - m.mask) & m.mask; b = (b - m.mask) & m.mask;
} while (b); } while (b);
if (HasPext) #ifndef USE_PEXT
continue;
PRNG rng(seeds[Is64Bit][rank_of(s)]); PRNG rng(seeds[Is64Bit][rank_of(s)]);
// Find a magic for square 's' picking up an (almost) random number // Find a magic for square 's' picking up an (almost) random number
@ -209,6 +220,7 @@ void init_magics(PieceType pt, Bitboard table[], Magic magics[]) {
break; break;
} }
} }
#endif
} }
} }
} }

View file

@ -67,27 +67,31 @@ extern Bitboard PawnAttacks[COLOR_NB][SQUARE_NB];
// Magic holds all magic bitboards relevant data for a single square // Magic holds all magic bitboards relevant data for a single square
struct Magic { struct Magic {
Bitboard mask; Bitboard mask;
Bitboard magic;
Bitboard* attacks; Bitboard* attacks;
unsigned shift; #ifndef USE_PEXT
Bitboard magic;
unsigned shift;
#endif
// Compute the attack's index using the 'magic bitboards' approach // Compute the attack's index using the 'magic bitboards' approach
unsigned index(Bitboard occupied) const { unsigned index(Bitboard occupied) const {
if (HasPext) #ifdef USE_PEXT
return unsigned(pext(occupied, mask)); return unsigned(pext(occupied, mask));
#else
if (Is64Bit) if (Is64Bit)
return unsigned(((occupied & mask) * magic) >> shift); return unsigned(((occupied & mask) * magic) >> shift);
unsigned lo = unsigned(occupied) & unsigned(mask); unsigned lo = unsigned(occupied) & unsigned(mask);
unsigned hi = unsigned(occupied >> 32) & unsigned(mask >> 32); unsigned hi = unsigned(occupied >> 32) & unsigned(mask >> 32);
return (lo * unsigned(magic) ^ hi * unsigned(magic >> 32)) >> shift; return (lo * unsigned(magic) ^ hi * unsigned(magic >> 32)) >> shift;
#endif
} }
Bitboard attacks_bb(Bitboard occupied) const { return attacks[index(occupied)]; }
}; };
extern Magic RookMagics[SQUARE_NB]; extern Magic Magics[SQUARE_NB][2];
extern Magic BishopMagics[SQUARE_NB];
constexpr Bitboard square_bb(Square s) { constexpr Bitboard square_bb(Square s) {
assert(is_ok(s)); assert(is_ok(s));
@ -229,9 +233,8 @@ inline Bitboard attacks_bb(Square s, Bitboard occupied) {
switch (Pt) switch (Pt)
{ {
case BISHOP : case BISHOP :
return BishopMagics[s].attacks[BishopMagics[s].index(occupied)];
case ROOK : case ROOK :
return RookMagics[s].attacks[RookMagics[s].index(occupied)]; return Magics[s][Pt - BISHOP].attacks_bb(occupied);
case QUEEN : case QUEEN :
return attacks_bb<BISHOP>(s, occupied) | attacks_bb<ROOK>(s, occupied); return attacks_bb<BISHOP>(s, occupied) | attacks_bb<ROOK>(s, occupied);
default : default :

View file

@ -18,15 +18,15 @@
#include "engine.h" #include "engine.h"
#include <cassert>
#include <deque> #include <deque>
#include <iosfwd>
#include <memory> #include <memory>
#include <ostream> #include <ostream>
#include <sstream>
#include <string_view> #include <string_view>
#include <utility> #include <utility>
#include <vector> #include <vector>
#include <sstream>
#include <iosfwd>
#include <cassert>
#include "evaluate.h" #include "evaluate.h"
#include "misc.h" #include "misc.h"
@ -44,16 +44,76 @@ namespace Stockfish {
namespace NN = Eval::NNUE; namespace NN = Eval::NNUE;
constexpr auto StartFEN = "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1"; constexpr auto StartFEN = "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1";
constexpr int MaxHashMB = Is64Bit ? 33554432 : 2048;
Engine::Engine(std::string path) : Engine::Engine(std::optional<std::string> path) :
binaryDirectory(CommandLine::get_binary_directory(path)), binaryDirectory(path ? CommandLine::get_binary_directory(*path) : ""),
numaContext(NumaConfig::from_system()),
states(new std::deque<StateInfo>(1)), states(new std::deque<StateInfo>(1)),
networks(NN::Networks( threads(),
NN::NetworkBig({EvalFileDefaultNameBig, "None", ""}, NN::EmbeddedNNUEType::BIG), networks(
NN::NetworkSmall({EvalFileDefaultNameSmall, "None", ""}, NN::EmbeddedNNUEType::SMALL))) { numaContext,
NN::Networks(
NN::NetworkBig({EvalFileDefaultNameBig, "None", ""}, NN::EmbeddedNNUEType::BIG),
NN::NetworkSmall({EvalFileDefaultNameSmall, "None", ""}, NN::EmbeddedNNUEType::SMALL))) {
pos.set(StartFEN, false, &states->back()); pos.set(StartFEN, false, &states->back());
capSq = SQ_NONE; capSq = SQ_NONE;
options["Debug Log File"] << Option("", [](const Option& o) {
start_logger(o);
return std::nullopt;
});
options["NumaPolicy"] << Option("auto", [this](const Option& o) {
set_numa_config_from_option(o);
return numa_config_information_as_string() + "\n"
+ thread_allocation_information_as_string();
});
options["Threads"] << Option(1, 1, 1024, [this](const Option&) {
resize_threads();
return thread_allocation_information_as_string();
});
options["Hash"] << Option(16, 1, MaxHashMB, [this](const Option& o) {
set_tt_size(o);
return std::nullopt;
});
options["Clear Hash"] << Option([this](const Option&) {
search_clear();
return std::nullopt;
});
options["Ponder"] << Option(false);
options["MultiPV"] << Option(1, 1, MAX_MOVES);
options["Skill Level"] << Option(20, 0, 20);
options["Move Overhead"] << Option(10, 0, 5000);
options["nodestime"] << Option(0, 0, 10000);
options["UCI_Chess960"] << Option(false);
options["UCI_LimitStrength"] << Option(false);
options["UCI_Elo"] << Option(Stockfish::Search::Skill::LowestElo,
Stockfish::Search::Skill::LowestElo,
Stockfish::Search::Skill::HighestElo);
options["UCI_ShowWDL"] << Option(false);
options["SyzygyPath"] << Option("", [](const Option& o) {
Tablebases::init(o);
return std::nullopt;
});
options["SyzygyProbeDepth"] << Option(1, 1, 100);
options["Syzygy50MoveRule"] << Option(true);
options["SyzygyProbeLimit"] << Option(7, 0, 7);
options["EvalFile"] << Option(EvalFileDefaultNameBig, [this](const Option& o) {
load_big_network(o);
return std::nullopt;
});
options["EvalFileSmall"] << Option(EvalFileDefaultNameSmall, [this](const Option& o) {
load_small_network(o);
return std::nullopt;
});
load_networks();
resize_threads();
} }
std::uint64_t Engine::perft(const std::string& fen, Depth depth, bool isChess960) { std::uint64_t Engine::perft(const std::string& fen, Depth depth, bool isChess960) {
@ -74,7 +134,7 @@ void Engine::stop() { threads.stop = true; }
void Engine::search_clear() { void Engine::search_clear() {
wait_for_search_finished(); wait_for_search_finished();
tt.clear(options["Threads"]); tt.clear(threads);
threads.clear(); threads.clear();
// @TODO wont work with multiple instances // @TODO wont work with multiple instances
@ -97,6 +157,10 @@ void Engine::set_on_bestmove(std::function<void(std::string_view, std::string_vi
updateContext.onBestmove = std::move(f); updateContext.onBestmove = std::move(f);
} }
void Engine::set_on_verify_networks(std::function<void(std::string_view)>&& f) {
onVerifyNetworks = std::move(f);
}
void Engine::wait_for_search_finished() { threads.main_thread()->wait_for_search_finished(); } void Engine::wait_for_search_finished() { threads.main_thread()->wait_for_search_finished(); }
void Engine::set_position(const std::string& fen, const std::vector<std::string>& moves) { void Engine::set_position(const std::string& fen, const std::vector<std::string>& moves) {
@ -124,11 +188,42 @@ void Engine::set_position(const std::string& fen, const std::vector<std::string>
// modifiers // modifiers
void Engine::resize_threads() { threads.set({options, threads, tt, networks}, updateContext); } void Engine::set_numa_config_from_option(const std::string& o) {
if (o == "auto" || o == "system")
{
numaContext.set_numa_config(NumaConfig::from_system());
}
else if (o == "hardware")
{
// Don't respect affinity set in the system.
numaContext.set_numa_config(NumaConfig::from_system(false));
}
else if (o == "none")
{
numaContext.set_numa_config(NumaConfig{});
}
else
{
numaContext.set_numa_config(NumaConfig::from_string(o));
}
// Force reallocation of threads in case affinities need to change.
resize_threads();
threads.ensure_network_replicated();
}
void Engine::resize_threads() {
threads.wait_for_search_finished();
threads.set(numaContext.get_numa_config(), {options, threads, tt, networks}, updateContext);
// Reallocate the hash with the new threadpool size
set_tt_size(options["Hash"]);
threads.ensure_network_replicated();
}
void Engine::set_tt_size(size_t mb) { void Engine::set_tt_size(size_t mb) {
wait_for_search_finished(); wait_for_search_finished();
tt.resize(mb, options["Threads"]); tt.resize(mb, threads);
} }
void Engine::set_ponderhit(bool b) { threads.main_manager()->ponder = b; } void Engine::set_ponderhit(bool b) { threads.main_manager()->ponder = b; }
@ -136,28 +231,38 @@ void Engine::set_ponderhit(bool b) { threads.main_manager()->ponder = b; }
// network related // network related
void Engine::verify_networks() const { void Engine::verify_networks() const {
networks.big.verify(options["EvalFile"]); networks->big.verify(options["EvalFile"], onVerifyNetworks);
networks.small.verify(options["EvalFileSmall"]); networks->small.verify(options["EvalFileSmall"], onVerifyNetworks);
} }
void Engine::load_networks() { void Engine::load_networks() {
load_big_network(options["EvalFile"]); networks.modify_and_replicate([this](NN::Networks& networks_) {
load_small_network(options["EvalFileSmall"]); networks_.big.load(binaryDirectory, options["EvalFile"]);
networks_.small.load(binaryDirectory, options["EvalFileSmall"]);
});
threads.clear();
threads.ensure_network_replicated();
} }
void Engine::load_big_network(const std::string& file) { void Engine::load_big_network(const std::string& file) {
networks.big.load(binaryDirectory, file); networks.modify_and_replicate(
[this, &file](NN::Networks& networks_) { networks_.big.load(binaryDirectory, file); });
threads.clear(); threads.clear();
threads.ensure_network_replicated();
} }
void Engine::load_small_network(const std::string& file) { void Engine::load_small_network(const std::string& file) {
networks.small.load(binaryDirectory, file); networks.modify_and_replicate(
[this, &file](NN::Networks& networks_) { networks_.small.load(binaryDirectory, file); });
threads.clear(); threads.clear();
threads.ensure_network_replicated();
} }
void Engine::save_network(const std::pair<std::optional<std::string>, std::string> files[2]) { void Engine::save_network(const std::pair<std::optional<std::string>, std::string> files[2]) {
networks.big.save(files[0].first); networks.modify_and_replicate([&files](NN::Networks& networks_) {
networks.small.save(files[1].first); networks_.big.save(files[0].first);
networks_.small.save(files[1].first);
});
} }
// utility functions // utility functions
@ -169,10 +274,11 @@ void Engine::trace_eval() const {
verify_networks(); verify_networks();
sync_cout << "\n" << Eval::trace(p, networks) << sync_endl; sync_cout << "\n" << Eval::trace(p, *networks) << sync_endl;
} }
OptionsMap& Engine::get_options() { return options; } const OptionsMap& Engine::get_options() const { return options; }
OptionsMap& Engine::get_options() { return options; }
std::string Engine::fen() const { return pos.fen(); } std::string Engine::fen() const { return pos.fen(); }
@ -184,4 +290,63 @@ std::string Engine::visualize() const {
return ss.str(); return ss.str();
} }
int Engine::get_hashfull(int maxAge) const { return tt.hashfull(maxAge); }
std::vector<std::pair<size_t, size_t>> Engine::get_bound_thread_count_by_numa_node() const {
auto counts = threads.get_bound_thread_count_by_numa_node();
const NumaConfig& cfg = numaContext.get_numa_config();
std::vector<std::pair<size_t, size_t>> ratios;
NumaIndex n = 0;
for (; n < counts.size(); ++n)
ratios.emplace_back(counts[n], cfg.num_cpus_in_numa_node(n));
if (!counts.empty())
for (; n < cfg.num_numa_nodes(); ++n)
ratios.emplace_back(0, cfg.num_cpus_in_numa_node(n));
return ratios;
}
std::string Engine::get_numa_config_as_string() const {
return numaContext.get_numa_config().to_string();
}
std::string Engine::numa_config_information_as_string() const {
auto cfgStr = get_numa_config_as_string();
return "Available processors: " + cfgStr;
}
std::string Engine::thread_binding_information_as_string() const {
auto boundThreadsByNode = get_bound_thread_count_by_numa_node();
std::stringstream ss;
if (boundThreadsByNode.empty())
return ss.str();
bool isFirst = true;
for (auto&& [current, total] : boundThreadsByNode)
{
if (!isFirst)
ss << ":";
ss << current << "/" << total;
isFirst = false;
}
return ss.str();
}
std::string Engine::thread_allocation_information_as_string() const {
std::stringstream ss;
size_t threadsSize = threads.size();
ss << "Using " << threadsSize << (threadsSize > 1 ? " threads" : " thread");
auto boundThreadsByNodeStr = thread_binding_information_as_string();
if (boundThreadsByNodeStr.empty())
return ss.str();
ss << " with NUMA node thread binding: ";
ss << boundThreadsByNodeStr;
return ss.str();
}
} }

View file

@ -29,6 +29,7 @@
#include <vector> #include <vector>
#include "nnue/network.h" #include "nnue/network.h"
#include "numa.h"
#include "position.h" #include "position.h"
#include "search.h" #include "search.h"
#include "syzygy/tbprobe.h" // for Stockfish::Depth #include "syzygy/tbprobe.h" // for Stockfish::Depth
@ -46,7 +47,14 @@ class Engine {
using InfoFull = Search::InfoFull; using InfoFull = Search::InfoFull;
using InfoIter = Search::InfoIteration; using InfoIter = Search::InfoIteration;
Engine(std::string path = ""); Engine(std::optional<std::string> path = std::nullopt);
// Cannot be movable due to components holding backreferences to fields
Engine(const Engine&) = delete;
Engine(Engine&&) = delete;
Engine& operator=(const Engine&) = delete;
Engine& operator=(Engine&&) = delete;
~Engine() { wait_for_search_finished(); } ~Engine() { wait_for_search_finished(); }
std::uint64_t perft(const std::string& fen, Depth depth, bool isChess960); std::uint64_t perft(const std::string& fen, Depth depth, bool isChess960);
@ -63,6 +71,7 @@ class Engine {
// modifiers // modifiers
void set_numa_config_from_option(const std::string& o);
void resize_threads(); void resize_threads();
void set_tt_size(size_t mb); void set_tt_size(size_t mb);
void set_ponderhit(bool); void set_ponderhit(bool);
@ -72,6 +81,7 @@ class Engine {
void set_on_update_full(std::function<void(const InfoFull&)>&&); void set_on_update_full(std::function<void(const InfoFull&)>&&);
void set_on_iter(std::function<void(const InfoIter&)>&&); void set_on_iter(std::function<void(const InfoIter&)>&&);
void set_on_bestmove(std::function<void(std::string_view, std::string_view)>&&); void set_on_bestmove(std::function<void(std::string_view, std::string_view)>&&);
void set_on_verify_networks(std::function<void(std::string_view)>&&);
// network related // network related
@ -83,25 +93,38 @@ class Engine {
// utility functions // utility functions
void trace_eval() const; void trace_eval() const;
OptionsMap& get_options();
std::string fen() const; const OptionsMap& get_options() const;
void flip(); OptionsMap& get_options();
std::string visualize() const;
int get_hashfull(int maxAge = 0) const;
std::string fen() const;
void flip();
std::string visualize() const;
std::vector<std::pair<size_t, size_t>> get_bound_thread_count_by_numa_node() const;
std::string get_numa_config_as_string() const;
std::string numa_config_information_as_string() const;
std::string thread_allocation_information_as_string() const;
std::string thread_binding_information_as_string() const;
private: private:
const std::string binaryDirectory; const std::string binaryDirectory;
NumaReplicationContext numaContext;
Position pos; Position pos;
StateListPtr states; StateListPtr states;
Square capSq; Square capSq;
OptionsMap options; OptionsMap options;
ThreadPool threads; ThreadPool threads;
TranspositionTable tt; TranspositionTable tt;
Eval::NNUE::Networks networks; LazyNumaReplicated<Eval::NNUE::Networks> networks;
Search::SearchManager::UpdateContext updateContext; Search::SearchManager::UpdateContext updateContext;
std::function<void(std::string_view)> onVerifyNetworks;
}; };
} // namespace Stockfish } // namespace Stockfish

View file

@ -24,8 +24,9 @@
#include <cstdlib> #include <cstdlib>
#include <iomanip> #include <iomanip>
#include <iostream> #include <iostream>
#include <sstream>
#include <memory> #include <memory>
#include <sstream>
#include <tuple>
#include "nnue/network.h" #include "nnue/network.h"
#include "nnue/nnue_misc.h" #include "nnue/nnue_misc.h"
@ -44,6 +45,10 @@ int Eval::simple_eval(const Position& pos, Color c) {
+ (pos.non_pawn_material(c) - pos.non_pawn_material(~c)); + (pos.non_pawn_material(c) - pos.non_pawn_material(~c));
} }
bool Eval::use_smallnet(const Position& pos) {
int simpleEval = simple_eval(pos, pos.side_to_move());
return std::abs(simpleEval) > 962;
}
// Evaluate is the evaluator for the outer world. It returns a static evaluation // Evaluate is the evaluator for the outer world. It returns a static evaluation
// of the position from the point of view of the side to move. // of the position from the point of view of the side to move.
@ -54,34 +59,30 @@ Value Eval::evaluate(const Eval::NNUE::Networks& networks,
assert(!pos.checkers()); assert(!pos.checkers());
int simpleEval = simple_eval(pos, pos.side_to_move()); bool smallNet = use_smallnet(pos);
bool smallNet = std::abs(simpleEval) > SmallNetThreshold; auto [psqt, positional] = smallNet ? networks.small.evaluate(pos, &caches.small)
int nnueComplexity; : networks.big.evaluate(pos, &caches.big);
int v;
Value nnue = smallNet ? networks.small.evaluate(pos, &caches.small, true, &nnueComplexity) Value nnue = (125 * psqt + 131 * positional) / 128;
: networks.big.evaluate(pos, &caches.big, true, &nnueComplexity);
const auto adjustEval = [&](int nnueDiv, int pawnCountConstant, int pawnCountMul, // Re-evaluate the position when higher eval accuracy is worth the time spent
int npmConstant, int evalDiv, int shufflingConstant) { if (smallNet && (std::abs(nnue) < 236))
// Blend optimism and eval with nnue complexity and material imbalance {
optimism += optimism * (nnueComplexity + std::abs(simpleEval - nnue)) / 584; std::tie(psqt, positional) = networks.big.evaluate(pos, &caches.big);
nnue -= nnue * (nnueComplexity * 5 / 3) / nnueDiv; nnue = (125 * psqt + 131 * positional) / 128;
smallNet = false;
}
int npm = pos.non_pawn_material() / 64; // Blend optimism and eval with nnue complexity
v = (nnue * (npm + pawnCountConstant + pawnCountMul * pos.count<PAWN>()) int nnueComplexity = std::abs(psqt - positional);
+ optimism * (npmConstant + npm)) optimism += optimism * nnueComplexity / 468;
/ evalDiv; nnue -= nnue * nnueComplexity / (smallNet ? 20233 : 17879);
// Damp down the evaluation linearly when shuffling int material = (smallNet ? 553 : 532) * pos.count<PAWN>() + pos.non_pawn_material();
int shuffling = pos.rule50_count(); int v = (nnue * (77777 + material) + optimism * (7777 + material)) / 77777;
v = v * (shufflingConstant - shuffling) / 207;
};
if (!smallNet) // Damp down the evaluation linearly when shuffling
adjustEval(32395, 942, 11, 139, 1058, 178); v -= v * pos.rule50_count() / 212;
else
adjustEval(32793, 944, 9, 140, 1067, 206);
// Guarantee evaluation does not hit the tablebase range // Guarantee evaluation does not hit the tablebase range
v = std::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1); v = std::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1);
@ -108,8 +109,9 @@ std::string Eval::trace(Position& pos, const Eval::NNUE::Networks& networks) {
ss << std::showpoint << std::showpos << std::fixed << std::setprecision(2) << std::setw(15); ss << std::showpoint << std::showpos << std::fixed << std::setprecision(2) << std::setw(15);
Value v = networks.big.evaluate(pos, &caches->big, false); auto [psqt, positional] = networks.big.evaluate(pos, &caches->big);
v = pos.side_to_move() == WHITE ? v : -v; Value v = psqt + positional;
v = pos.side_to_move() == WHITE ? v : -v;
ss << "NNUE evaluation " << 0.01 * UCIEngine::to_cp(v, pos) << " (white side)\n"; ss << "NNUE evaluation " << 0.01 * UCIEngine::to_cp(v, pos) << " (white side)\n";
v = evaluate(networks, pos, *caches, VALUE_ZERO); v = evaluate(networks, pos, *caches, VALUE_ZERO);

View file

@ -29,14 +29,12 @@ class Position;
namespace Eval { namespace Eval {
constexpr inline int SmallNetThreshold = 1274;
// The default net name MUST follow the format nn-[SHA256 first 12 digits].nnue // The default net name MUST follow the format nn-[SHA256 first 12 digits].nnue
// for the build process (profile-build and fishtest) to work. Do not change the // for the build process (profile-build and fishtest) to work. Do not change the
// name of the macro or the location where this macro is defined, as it is used // name of the macro or the location where this macro is defined, as it is used
// in the Makefile/Fishtest. // in the Makefile/Fishtest.
#define EvalFileDefaultNameBig "nn-ae6a388e4a1a.nnue" #define EvalFileDefaultNameBig "nn-1c0000000000.nnue"
#define EvalFileDefaultNameSmall "nn-baff1ede1f90.nnue" #define EvalFileDefaultNameSmall "nn-37f18f62d772.nnue"
namespace NNUE { namespace NNUE {
struct Networks; struct Networks;
@ -46,6 +44,7 @@ struct AccumulatorCaches;
std::string trace(Position& pos, const Eval::NNUE::Networks& networks); std::string trace(Position& pos, const Eval::NNUE::Networks& networks);
int simple_eval(const Position& pos, Color c); int simple_eval(const Position& pos, Color c);
bool use_smallnet(const Position& pos);
Value evaluate(const NNUE::Networks& networks, Value evaluate(const NNUE::Networks& networks,
const Position& pos, const Position& pos,
Eval::NNUE::AccumulatorCaches& caches, Eval::NNUE::AccumulatorCaches& caches,

185
src/history.h Normal file
View file

@ -0,0 +1,185 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Stockfish is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef HISTORY_H_INCLUDED
#define HISTORY_H_INCLUDED
#include <algorithm>
#include <array>
#include <cassert>
#include <cmath>
#include <cstdint>
#include <cstdlib>
#include <limits>
#include <type_traits> // IWYU pragma: keep
#include "position.h"
namespace Stockfish {
constexpr int PAWN_HISTORY_SIZE = 512; // has to be a power of 2
constexpr int CORRECTION_HISTORY_SIZE = 32768; // has to be a power of 2
constexpr int CORRECTION_HISTORY_LIMIT = 1024;
constexpr int LOW_PLY_HISTORY_SIZE = 4;
static_assert((PAWN_HISTORY_SIZE & (PAWN_HISTORY_SIZE - 1)) == 0,
"PAWN_HISTORY_SIZE has to be a power of 2");
static_assert((CORRECTION_HISTORY_SIZE & (CORRECTION_HISTORY_SIZE - 1)) == 0,
"CORRECTION_HISTORY_SIZE has to be a power of 2");
enum PawnHistoryType {
Normal,
Correction
};
template<PawnHistoryType T = Normal>
inline int pawn_structure_index(const Position& pos) {
return pos.pawn_key() & ((T == Normal ? PAWN_HISTORY_SIZE : CORRECTION_HISTORY_SIZE) - 1);
}
inline int major_piece_index(const Position& pos) {
return pos.major_piece_key() & (CORRECTION_HISTORY_SIZE - 1);
}
inline int minor_piece_index(const Position& pos) {
return pos.minor_piece_key() & (CORRECTION_HISTORY_SIZE - 1);
}
template<Color c>
inline int non_pawn_index(const Position& pos) {
return pos.non_pawn_key(c) & (CORRECTION_HISTORY_SIZE - 1);
}
// StatsEntry stores the stat table value. It is usually a number but could
// be a move or even a nested history. We use a class instead of a naked value
// to directly call history update operator<<() on the entry so to use stats
// tables at caller sites as simple multi-dim arrays.
template<typename T, int D>
class StatsEntry {
T entry;
public:
void operator=(const T& v) { entry = v; }
T* operator&() { return &entry; }
T* operator->() { return &entry; }
operator const T&() const { return entry; }
void operator<<(int bonus) {
static_assert(D <= std::numeric_limits<T>::max(), "D overflows T");
// Make sure that bonus is in range [-D, D]
int clampedBonus = std::clamp(bonus, -D, D);
entry += clampedBonus - entry * std::abs(clampedBonus) / D;
assert(std::abs(entry) <= D);
}
};
// Stats is a generic N-dimensional array used to store various statistics.
// The first template parameter T is the base type of the array, and the second
// template parameter D limits the range of updates in [-D, D] when we update
// values with the << operator, while the last parameters (Size and Sizes)
// encode the dimensions of the array.
template<typename T, int D, int Size, int... Sizes>
struct Stats: public std::array<Stats<T, D, Sizes...>, Size> {
using stats = Stats<T, D, Size, Sizes...>;
void fill(const T& v) {
// For standard-layout 'this' points to the first struct member
assert(std::is_standard_layout_v<stats>);
using entry = StatsEntry<T, D>;
entry* p = reinterpret_cast<entry*>(this);
std::fill(p, p + sizeof(*this) / sizeof(entry), v);
}
};
template<typename T, int D, int Size>
struct Stats<T, D, Size>: public std::array<StatsEntry<T, D>, Size> {};
// In stats table, D=0 means that the template parameter is not used
enum StatsParams {
NOT_USED = 0
};
enum StatsType {
NoCaptures,
Captures
};
// ButterflyHistory records how often quiet moves have been successful or unsuccessful
// during the current search, and is used for reduction and move ordering decisions.
// It uses 2 tables (one for each color) indexed by the move's from and to squares,
// see https://www.chessprogramming.org/Butterfly_Boards (~11 elo)
using ButterflyHistory = Stats<int16_t, 7183, COLOR_NB, int(SQUARE_NB) * int(SQUARE_NB)>;
// LowPlyHistory is adressed by play and move's from and to squares, used
// to improve move ordering near the root
using LowPlyHistory = Stats<int16_t, 7183, LOW_PLY_HISTORY_SIZE, int(SQUARE_NB) * int(SQUARE_NB)>;
// CapturePieceToHistory is addressed by a move's [piece][to][captured piece type]
using CapturePieceToHistory = Stats<int16_t, 10692, PIECE_NB, SQUARE_NB, PIECE_TYPE_NB>;
// PieceToHistory is like ButterflyHistory but is addressed by a move's [piece][to]
using PieceToHistory = Stats<int16_t, 29952, PIECE_NB, SQUARE_NB>;
// ContinuationHistory is the combined history of a given pair of moves, usually
// the current one given a previous one. The nested history table is based on
// PieceToHistory instead of ButterflyBoards.
// (~63 elo)
using ContinuationHistory = Stats<PieceToHistory, NOT_USED, PIECE_NB, SQUARE_NB>;
// PawnHistory is addressed by the pawn structure and a move's [piece][to]
using PawnHistory = Stats<int16_t, 8192, PAWN_HISTORY_SIZE, PIECE_NB, SQUARE_NB>;
// Correction histories record differences between the static evaluation of
// positions and their search score. It is used to improve the static evaluation
// used by some search heuristics.
// see https://www.chessprogramming.org/Static_Evaluation_Correction_History
enum CorrHistType {
Pawn, // By color and pawn structure
Major, // By color and positions of major pieces (Queen, Rook) and King
Minor, // By color and positions of minor pieces (Knight, Bishop) and King
NonPawn, // By color and non-pawn material positions
PieceTo, // By [piece][to] move
Continuation, // Combined history of move pairs
};
template<CorrHistType _>
struct CorrHistTypedef {
using type = Stats<int16_t, CORRECTION_HISTORY_LIMIT, COLOR_NB, CORRECTION_HISTORY_SIZE>;
};
template<>
struct CorrHistTypedef<PieceTo> {
using type = Stats<int16_t, CORRECTION_HISTORY_LIMIT, PIECE_NB, SQUARE_NB>;
};
template<>
struct CorrHistTypedef<Continuation> {
using type = Stats<CorrHistTypedef<PieceTo>::type, NOT_USED, PIECE_NB, SQUARE_NB>;
};
template<CorrHistType T>
using CorrectionHistory = typename CorrHistTypedef<T>::type;
} // namespace Stockfish
#endif // #ifndef HISTORY_H_INCLUDED

268
src/memory.cpp Normal file
View file

@ -0,0 +1,268 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Stockfish is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "memory.h"
#include <cstdlib>
#if __has_include("features.h")
#include <features.h>
#endif
#if defined(__linux__) && !defined(__ANDROID__)
#include <sys/mman.h>
#endif
#if defined(__APPLE__) || defined(__ANDROID__) || defined(__OpenBSD__) \
|| (defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC) && !defined(_WIN32)) \
|| defined(__e2k__)
#define POSIXALIGNEDALLOC
#include <stdlib.h>
#endif
#ifdef _WIN32
#if _WIN32_WINNT < 0x0601
#undef _WIN32_WINNT
#define _WIN32_WINNT 0x0601 // Force to include needed API prototypes
#endif
#ifndef NOMINMAX
#define NOMINMAX
#endif
#include <ios> // std::hex, std::dec
#include <iostream> // std::cerr
#include <ostream> // std::endl
#include <windows.h>
// The needed Windows API for processor groups could be missed from old Windows
// versions, so instead of calling them directly (forcing the linker to resolve
// the calls at compile time), try to load them at runtime. To do this we need
// first to define the corresponding function pointers.
extern "C" {
using OpenProcessToken_t = bool (*)(HANDLE, DWORD, PHANDLE);
using LookupPrivilegeValueA_t = bool (*)(LPCSTR, LPCSTR, PLUID);
using AdjustTokenPrivileges_t =
bool (*)(HANDLE, BOOL, PTOKEN_PRIVILEGES, DWORD, PTOKEN_PRIVILEGES, PDWORD);
}
#endif
namespace Stockfish {
// Wrappers for systems where the c++17 implementation does not guarantee the
// availability of aligned_alloc(). Memory allocated with std_aligned_alloc()
// must be freed with std_aligned_free().
void* std_aligned_alloc(size_t alignment, size_t size) {
#if defined(_ISOC11_SOURCE)
return aligned_alloc(alignment, size);
#elif defined(POSIXALIGNEDALLOC)
void* mem = nullptr;
posix_memalign(&mem, alignment, size);
return mem;
#elif defined(_WIN32) && !defined(_M_ARM) && !defined(_M_ARM64)
return _mm_malloc(size, alignment);
#elif defined(_WIN32)
return _aligned_malloc(size, alignment);
#else
return std::aligned_alloc(alignment, size);
#endif
}
void std_aligned_free(void* ptr) {
#if defined(POSIXALIGNEDALLOC)
free(ptr);
#elif defined(_WIN32) && !defined(_M_ARM) && !defined(_M_ARM64)
_mm_free(ptr);
#elif defined(_WIN32)
_aligned_free(ptr);
#else
free(ptr);
#endif
}
// aligned_large_pages_alloc() will return suitably aligned memory,
// if possible using large pages.
#if defined(_WIN32)
static void* aligned_large_pages_alloc_windows([[maybe_unused]] size_t allocSize) {
#if !defined(_WIN64)
return nullptr;
#else
HANDLE hProcessToken{};
LUID luid{};
void* mem = nullptr;
const size_t largePageSize = GetLargePageMinimum();
if (!largePageSize)
return nullptr;
// Dynamically link OpenProcessToken, LookupPrivilegeValue and AdjustTokenPrivileges
HMODULE hAdvapi32 = GetModuleHandle(TEXT("advapi32.dll"));
if (!hAdvapi32)
hAdvapi32 = LoadLibrary(TEXT("advapi32.dll"));
auto OpenProcessToken_f =
OpenProcessToken_t((void (*)()) GetProcAddress(hAdvapi32, "OpenProcessToken"));
if (!OpenProcessToken_f)
return nullptr;
auto LookupPrivilegeValueA_f =
LookupPrivilegeValueA_t((void (*)()) GetProcAddress(hAdvapi32, "LookupPrivilegeValueA"));
if (!LookupPrivilegeValueA_f)
return nullptr;
auto AdjustTokenPrivileges_f =
AdjustTokenPrivileges_t((void (*)()) GetProcAddress(hAdvapi32, "AdjustTokenPrivileges"));
if (!AdjustTokenPrivileges_f)
return nullptr;
// We need SeLockMemoryPrivilege, so try to enable it for the process
if (!OpenProcessToken_f( // OpenProcessToken()
GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hProcessToken))
return nullptr;
if (LookupPrivilegeValueA_f(nullptr, "SeLockMemoryPrivilege", &luid))
{
TOKEN_PRIVILEGES tp{};
TOKEN_PRIVILEGES prevTp{};
DWORD prevTpLen = 0;
tp.PrivilegeCount = 1;
tp.Privileges[0].Luid = luid;
tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
// Try to enable SeLockMemoryPrivilege. Note that even if AdjustTokenPrivileges()
// succeeds, we still need to query GetLastError() to ensure that the privileges
// were actually obtained.
if (AdjustTokenPrivileges_f(hProcessToken, FALSE, &tp, sizeof(TOKEN_PRIVILEGES), &prevTp,
&prevTpLen)
&& GetLastError() == ERROR_SUCCESS)
{
// Round up size to full pages and allocate
allocSize = (allocSize + largePageSize - 1) & ~size_t(largePageSize - 1);
mem = VirtualAlloc(nullptr, allocSize, MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES,
PAGE_READWRITE);
// Privilege no longer needed, restore previous state
AdjustTokenPrivileges_f(hProcessToken, FALSE, &prevTp, 0, nullptr, nullptr);
}
}
CloseHandle(hProcessToken);
return mem;
#endif
}
void* aligned_large_pages_alloc(size_t allocSize) {
// Try to allocate large pages
void* mem = aligned_large_pages_alloc_windows(allocSize);
// Fall back to regular, page-aligned, allocation if necessary
if (!mem)
mem = VirtualAlloc(nullptr, allocSize, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
return mem;
}
#else
void* aligned_large_pages_alloc(size_t allocSize) {
#if defined(__linux__)
constexpr size_t alignment = 2 * 1024 * 1024; // 2MB page size assumed
#else
constexpr size_t alignment = 4096; // small page size assumed
#endif
// Round up to multiples of alignment
size_t size = ((allocSize + alignment - 1) / alignment) * alignment;
void* mem = std_aligned_alloc(alignment, size);
#if defined(MADV_HUGEPAGE)
madvise(mem, size, MADV_HUGEPAGE);
#endif
return mem;
}
#endif
bool has_large_pages() {
#if defined(_WIN32)
constexpr size_t page_size = 2 * 1024 * 1024; // 2MB page size assumed
void* mem = aligned_large_pages_alloc_windows(page_size);
if (mem == nullptr)
{
return false;
}
else
{
aligned_large_pages_free(mem);
return true;
}
#elif defined(__linux__)
#if defined(MADV_HUGEPAGE)
return true;
#else
return false;
#endif
#else
return false;
#endif
}
// aligned_large_pages_free() will free the previously memory allocated
// by aligned_large_pages_alloc(). The effect is a nop if mem == nullptr.
#if defined(_WIN32)
void aligned_large_pages_free(void* mem) {
if (mem && !VirtualFree(mem, 0, MEM_RELEASE))
{
DWORD err = GetLastError();
std::cerr << "Failed to free large page memory. Error code: 0x" << std::hex << err
<< std::dec << std::endl;
exit(EXIT_FAILURE);
}
}
#else
void aligned_large_pages_free(void* mem) { std_aligned_free(mem); }
#endif
} // namespace Stockfish

218
src/memory.h Normal file
View file

@ -0,0 +1,218 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Stockfish is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef MEMORY_H_INCLUDED
#define MEMORY_H_INCLUDED
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <memory>
#include <new>
#include <type_traits>
#include <utility>
#include "types.h"
namespace Stockfish {
void* std_aligned_alloc(size_t alignment, size_t size);
void std_aligned_free(void* ptr);
// Memory aligned by page size, min alignment: 4096 bytes
void* aligned_large_pages_alloc(size_t size);
void aligned_large_pages_free(void* mem);
bool has_large_pages();
// Frees memory which was placed there with placement new.
// Works for both single objects and arrays of unknown bound.
template<typename T, typename FREE_FUNC>
void memory_deleter(T* ptr, FREE_FUNC free_func) {
if (!ptr)
return;
// Explicitly needed to call the destructor
if constexpr (!std::is_trivially_destructible_v<T>)
ptr->~T();
free_func(ptr);
return;
}
// Frees memory which was placed there with placement new.
// Works for both single objects and arrays of unknown bound.
template<typename T, typename FREE_FUNC>
void memory_deleter_array(T* ptr, FREE_FUNC free_func) {
if (!ptr)
return;
// Move back on the pointer to where the size is allocated
const size_t array_offset = std::max(sizeof(size_t), alignof(T));
char* raw_memory = reinterpret_cast<char*>(ptr) - array_offset;
if constexpr (!std::is_trivially_destructible_v<T>)
{
const size_t size = *reinterpret_cast<size_t*>(raw_memory);
// Explicitly call the destructor for each element in reverse order
for (size_t i = size; i-- > 0;)
ptr[i].~T();
}
free_func(raw_memory);
}
// Allocates memory for a single object and places it there with placement new
template<typename T, typename ALLOC_FUNC, typename... Args>
inline std::enable_if_t<!std::is_array_v<T>, T*> memory_allocator(ALLOC_FUNC alloc_func,
Args&&... args) {
void* raw_memory = alloc_func(sizeof(T));
ASSERT_ALIGNED(raw_memory, alignof(T));
return new (raw_memory) T(std::forward<Args>(args)...);
}
// Allocates memory for an array of unknown bound and places it there with placement new
template<typename T, typename ALLOC_FUNC>
inline std::enable_if_t<std::is_array_v<T>, std::remove_extent_t<T>*>
memory_allocator(ALLOC_FUNC alloc_func, size_t num) {
using ElementType = std::remove_extent_t<T>;
const size_t array_offset = std::max(sizeof(size_t), alignof(ElementType));
// Save the array size in the memory location
char* raw_memory =
reinterpret_cast<char*>(alloc_func(array_offset + num * sizeof(ElementType)));
ASSERT_ALIGNED(raw_memory, alignof(T));
new (raw_memory) size_t(num);
for (size_t i = 0; i < num; ++i)
new (raw_memory + array_offset + i * sizeof(ElementType)) ElementType();
// Need to return the pointer at the start of the array so that
// the indexing in unique_ptr<T[]> works.
return reinterpret_cast<ElementType*>(raw_memory + array_offset);
}
//
//
// aligned large page unique ptr
//
//
template<typename T>
struct LargePageDeleter {
void operator()(T* ptr) const { return memory_deleter<T>(ptr, aligned_large_pages_free); }
};
template<typename T>
struct LargePageArrayDeleter {
void operator()(T* ptr) const { return memory_deleter_array<T>(ptr, aligned_large_pages_free); }
};
template<typename T>
using LargePagePtr =
std::conditional_t<std::is_array_v<T>,
std::unique_ptr<T, LargePageArrayDeleter<std::remove_extent_t<T>>>,
std::unique_ptr<T, LargePageDeleter<T>>>;
// make_unique_large_page for single objects
template<typename T, typename... Args>
std::enable_if_t<!std::is_array_v<T>, LargePagePtr<T>> make_unique_large_page(Args&&... args) {
static_assert(alignof(T) <= 4096,
"aligned_large_pages_alloc() may fail for such a big alignment requirement of T");
T* obj = memory_allocator<T>(aligned_large_pages_alloc, std::forward<Args>(args)...);
return LargePagePtr<T>(obj);
}
// make_unique_large_page for arrays of unknown bound
template<typename T>
std::enable_if_t<std::is_array_v<T>, LargePagePtr<T>> make_unique_large_page(size_t num) {
using ElementType = std::remove_extent_t<T>;
static_assert(alignof(ElementType) <= 4096,
"aligned_large_pages_alloc() may fail for such a big alignment requirement of T");
ElementType* memory = memory_allocator<T>(aligned_large_pages_alloc, num);
return LargePagePtr<T>(memory);
}
//
//
// aligned unique ptr
//
//
template<typename T>
struct AlignedDeleter {
void operator()(T* ptr) const { return memory_deleter<T>(ptr, std_aligned_free); }
};
template<typename T>
struct AlignedArrayDeleter {
void operator()(T* ptr) const { return memory_deleter_array<T>(ptr, std_aligned_free); }
};
template<typename T>
using AlignedPtr =
std::conditional_t<std::is_array_v<T>,
std::unique_ptr<T, AlignedArrayDeleter<std::remove_extent_t<T>>>,
std::unique_ptr<T, AlignedDeleter<T>>>;
// make_unique_aligned for single objects
template<typename T, typename... Args>
std::enable_if_t<!std::is_array_v<T>, AlignedPtr<T>> make_unique_aligned(Args&&... args) {
const auto func = [](size_t size) { return std_aligned_alloc(alignof(T), size); };
T* obj = memory_allocator<T>(func, std::forward<Args>(args)...);
return AlignedPtr<T>(obj);
}
// make_unique_aligned for arrays of unknown bound
template<typename T>
std::enable_if_t<std::is_array_v<T>, AlignedPtr<T>> make_unique_aligned(size_t num) {
using ElementType = std::remove_extent_t<T>;
const auto func = [](size_t size) { return std_aligned_alloc(alignof(ElementType), size); };
ElementType* memory = memory_allocator<T>(func, num);
return AlignedPtr<T>(memory);
}
// Get the first aligned element of an array.
// ptr must point to an array of size at least `sizeof(T) * N + alignment` bytes,
// where N is the number of elements in the array.
template<uintptr_t Alignment, typename T>
T* align_ptr_up(T* ptr) {
static_assert(alignof(T) < Alignment);
const uintptr_t ptrint = reinterpret_cast<uintptr_t>(reinterpret_cast<char*>(ptr));
return reinterpret_cast<T*>(
reinterpret_cast<char*>((ptrint + (Alignment - 1)) / Alignment * Alignment));
}
} // namespace Stockfish
#endif // #ifndef MEMORY_H_INCLUDED

View file

@ -18,58 +18,21 @@
#include "misc.h" #include "misc.h"
#ifdef _WIN32
#if _WIN32_WINNT < 0x0601
#undef _WIN32_WINNT
#define _WIN32_WINNT 0x0601 // Force to include needed API prototypes
#endif
#ifndef NOMINMAX
#define NOMINMAX
#endif
#include <windows.h>
// The needed Windows API for processor groups could be missed from old Windows
// versions, so instead of calling them directly (forcing the linker to resolve
// the calls at compile time), try to load them at runtime. To do this we need
// first to define the corresponding function pointers.
extern "C" {
using fun1_t = bool (*)(LOGICAL_PROCESSOR_RELATIONSHIP,
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX,
PDWORD);
using fun2_t = bool (*)(USHORT, PGROUP_AFFINITY);
using fun3_t = bool (*)(HANDLE, CONST GROUP_AFFINITY*, PGROUP_AFFINITY);
using fun4_t = bool (*)(USHORT, PGROUP_AFFINITY, USHORT, PUSHORT);
using fun5_t = WORD (*)();
using fun6_t = bool (*)(HANDLE, DWORD, PHANDLE);
using fun7_t = bool (*)(LPCSTR, LPCSTR, PLUID);
using fun8_t = bool (*)(HANDLE, BOOL, PTOKEN_PRIVILEGES, DWORD, PTOKEN_PRIVILEGES, PDWORD);
}
#endif
#include <atomic> #include <atomic>
#include <cctype>
#include <cmath> #include <cmath>
#include <cstdlib> #include <cstdlib>
#include <fstream> #include <fstream>
#include <iomanip> #include <iomanip>
#include <iostream> #include <iostream>
#include <iterator>
#include <limits>
#include <mutex> #include <mutex>
#include <sstream> #include <sstream>
#include <string_view> #include <string_view>
#include "types.h" #include "types.h"
#if defined(__linux__) && !defined(__ANDROID__)
#include <sys/mman.h>
#endif
#if defined(__APPLE__) || defined(__ANDROID__) || defined(__OpenBSD__) \
|| (defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC) && !defined(_WIN32)) \
|| defined(__e2k__)
#define POSIXALIGNEDALLOC
#include <stdlib.h>
#endif
namespace Stockfish { namespace Stockfish {
namespace { namespace {
@ -149,15 +112,17 @@ class Logger {
// Returns the full name of the current Stockfish version. // Returns the full name of the current Stockfish version.
// For local dev compiles we try to append the commit sha and commit date //
// from git if that fails only the local compilation date is set and "nogit" is specified: // For local dev compiles we try to append the commit SHA and
// Stockfish dev-YYYYMMDD-SHA // commit date from git. If that fails only the local compilation
// or // date is set and "nogit" is specified:
// Stockfish dev-YYYYMMDD-nogit // Stockfish dev-YYYYMMDD-SHA
// or
// Stockfish dev-YYYYMMDD-nogit
// //
// For releases (non-dev builds) we only include the version number: // For releases (non-dev builds) we only include the version number:
// Stockfish version // Stockfish version
std::string engine_info(bool to_uci) { std::string engine_version_info() {
std::stringstream ss; std::stringstream ss;
ss << "Stockfish " << version << std::setfill('0'); ss << "Stockfish " << version << std::setfill('0');
@ -168,8 +133,9 @@ std::string engine_info(bool to_uci) {
ss << stringify(GIT_DATE); ss << stringify(GIT_DATE);
#else #else
constexpr std::string_view months("Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec"); constexpr std::string_view months("Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec");
std::string month, day, year;
std::stringstream date(__DATE__); // From compiler, format is "Sep 21 2008" std::string month, day, year;
std::stringstream date(__DATE__); // From compiler, format is "Sep 21 2008"
date >> month >> day >> year; date >> month >> day >> year;
ss << year << std::setw(2) << std::setfill('0') << (1 + months.find(month) / 4) ss << year << std::setw(2) << std::setfill('0') << (1 + months.find(month) / 4)
@ -185,11 +151,14 @@ std::string engine_info(bool to_uci) {
#endif #endif
} }
ss << (to_uci ? "\nid author " : " by ") << "the Stockfish developers (see AUTHORS file)";
return ss.str(); return ss.str();
} }
std::string engine_info(bool to_uci) {
return engine_version_info() + (to_uci ? "\nid author " : " by ")
+ "the Stockfish developers (see AUTHORS file)";
}
// Returns a string trying to describe the compiler we use // Returns a string trying to describe the compiler we use
std::string compiler_info() { std::string compiler_info() {
@ -318,13 +287,21 @@ template<size_t N>
struct DebugInfo { struct DebugInfo {
std::atomic<int64_t> data[N] = {0}; std::atomic<int64_t> data[N] = {0};
constexpr inline std::atomic<int64_t>& operator[](int index) { return data[index]; } constexpr std::atomic<int64_t>& operator[](int index) { return data[index]; }
}; };
DebugInfo<2> hit[MaxDebugSlots]; struct DebugExtremes: public DebugInfo<3> {
DebugInfo<2> mean[MaxDebugSlots]; DebugExtremes() {
DebugInfo<3> stdev[MaxDebugSlots]; data[1] = std::numeric_limits<int64_t>::min();
DebugInfo<6> correl[MaxDebugSlots]; data[2] = std::numeric_limits<int64_t>::max();
}
};
DebugInfo<2> hit[MaxDebugSlots];
DebugInfo<2> mean[MaxDebugSlots];
DebugInfo<3> stdev[MaxDebugSlots];
DebugInfo<6> correl[MaxDebugSlots];
DebugExtremes extremes[MaxDebugSlots];
} // namespace } // namespace
@ -348,6 +325,18 @@ void dbg_stdev_of(int64_t value, int slot) {
stdev[slot][2] += value * value; stdev[slot][2] += value * value;
} }
void dbg_extremes_of(int64_t value, int slot) {
++extremes[slot][0];
int64_t current_max = extremes[slot][1].load();
while (current_max < value && !extremes[slot][1].compare_exchange_weak(current_max, value))
{}
int64_t current_min = extremes[slot][2].load();
while (current_min > value && !extremes[slot][2].compare_exchange_weak(current_min, value))
{}
}
void dbg_correl_of(int64_t value1, int64_t value2, int slot) { void dbg_correl_of(int64_t value1, int64_t value2, int slot) {
++correl[slot][0]; ++correl[slot][0];
@ -382,6 +371,13 @@ void dbg_print() {
std::cerr << "Stdev #" << i << ": Total " << n << " Stdev " << r << std::endl; std::cerr << "Stdev #" << i << ": Total " << n << " Stdev " << r << std::endl;
} }
for (int i = 0; i < MaxDebugSlots; ++i)
if ((n = extremes[i][0]))
{
std::cerr << "Extremity #" << i << ": Total " << n << " Min " << extremes[i][2]
<< " Max " << extremes[i][1] << std::endl;
}
for (int i = 0; i < MaxDebugSlots; ++i) for (int i = 0; i < MaxDebugSlots; ++i)
if ((n = correl[i][0])) if ((n = correl[i][0]))
{ {
@ -408,6 +404,8 @@ std::ostream& operator<<(std::ostream& os, SyncCout sc) {
return os; return os;
} }
void sync_cout_start() { std::cout << IO_LOCK; }
void sync_cout_end() { std::cout << IO_UNLOCK; }
// Trampoline helper to avoid moving Logger to misc.h // Trampoline helper to avoid moving Logger to misc.h
void start_logger(const std::string& fname) { Logger::start(fname); } void start_logger(const std::string& fname) { Logger::start(fname); }
@ -415,14 +413,14 @@ void start_logger(const std::string& fname) { Logger::start(fname); }
#ifdef NO_PREFETCH #ifdef NO_PREFETCH
void prefetch(void*) {} void prefetch(const void*) {}
#else #else
void prefetch(void* addr) { void prefetch(const void* addr) {
#if defined(_MSC_VER) #if defined(_MSC_VER)
_mm_prefetch((char*) addr, _MM_HINT_T0); _mm_prefetch((char const*) addr, _MM_HINT_T0);
#else #else
__builtin_prefetch(addr); __builtin_prefetch(addr);
#endif #endif
@ -430,291 +428,6 @@ void prefetch(void* addr) {
#endif #endif
// Wrapper for systems where the c++17 implementation
// does not guarantee the availability of aligned_alloc(). Memory allocated with
// std_aligned_alloc() must be freed with std_aligned_free().
void* std_aligned_alloc(size_t alignment, size_t size) {
#if defined(POSIXALIGNEDALLOC)
void* mem;
return posix_memalign(&mem, alignment, size) ? nullptr : mem;
#elif defined(_WIN32) && !defined(_M_ARM) && !defined(_M_ARM64)
return _mm_malloc(size, alignment);
#elif defined(_WIN32)
return _aligned_malloc(size, alignment);
#else
return std::aligned_alloc(alignment, size);
#endif
}
void std_aligned_free(void* ptr) {
#if defined(POSIXALIGNEDALLOC)
free(ptr);
#elif defined(_WIN32) && !defined(_M_ARM) && !defined(_M_ARM64)
_mm_free(ptr);
#elif defined(_WIN32)
_aligned_free(ptr);
#else
free(ptr);
#endif
}
// aligned_large_pages_alloc() will return suitably aligned memory, if possible using large pages.
#if defined(_WIN32)
static void* aligned_large_pages_alloc_windows([[maybe_unused]] size_t allocSize) {
#if !defined(_WIN64)
return nullptr;
#else
HANDLE hProcessToken{};
LUID luid{};
void* mem = nullptr;
const size_t largePageSize = GetLargePageMinimum();
if (!largePageSize)
return nullptr;
// Dynamically link OpenProcessToken, LookupPrivilegeValue and AdjustTokenPrivileges
HMODULE hAdvapi32 = GetModuleHandle(TEXT("advapi32.dll"));
if (!hAdvapi32)
hAdvapi32 = LoadLibrary(TEXT("advapi32.dll"));
auto fun6 = fun6_t((void (*)()) GetProcAddress(hAdvapi32, "OpenProcessToken"));
if (!fun6)
return nullptr;
auto fun7 = fun7_t((void (*)()) GetProcAddress(hAdvapi32, "LookupPrivilegeValueA"));
if (!fun7)
return nullptr;
auto fun8 = fun8_t((void (*)()) GetProcAddress(hAdvapi32, "AdjustTokenPrivileges"));
if (!fun8)
return nullptr;
// We need SeLockMemoryPrivilege, so try to enable it for the process
if (!fun6( // OpenProcessToken()
GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hProcessToken))
return nullptr;
if (fun7( // LookupPrivilegeValue(nullptr, SE_LOCK_MEMORY_NAME, &luid)
nullptr, "SeLockMemoryPrivilege", &luid))
{
TOKEN_PRIVILEGES tp{};
TOKEN_PRIVILEGES prevTp{};
DWORD prevTpLen = 0;
tp.PrivilegeCount = 1;
tp.Privileges[0].Luid = luid;
tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
// Try to enable SeLockMemoryPrivilege. Note that even if AdjustTokenPrivileges() succeeds,
// we still need to query GetLastError() to ensure that the privileges were actually obtained.
if (fun8( // AdjustTokenPrivileges()
hProcessToken, FALSE, &tp, sizeof(TOKEN_PRIVILEGES), &prevTp, &prevTpLen)
&& GetLastError() == ERROR_SUCCESS)
{
// Round up size to full pages and allocate
allocSize = (allocSize + largePageSize - 1) & ~size_t(largePageSize - 1);
mem = VirtualAlloc(nullptr, allocSize, MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES,
PAGE_READWRITE);
// Privilege no longer needed, restore previous state
fun8( // AdjustTokenPrivileges ()
hProcessToken, FALSE, &prevTp, 0, nullptr, nullptr);
}
}
CloseHandle(hProcessToken);
return mem;
#endif
}
void* aligned_large_pages_alloc(size_t allocSize) {
// Try to allocate large pages
void* mem = aligned_large_pages_alloc_windows(allocSize);
// Fall back to regular, page-aligned, allocation if necessary
if (!mem)
mem = VirtualAlloc(nullptr, allocSize, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
return mem;
}
#else
void* aligned_large_pages_alloc(size_t allocSize) {
#if defined(__linux__)
constexpr size_t alignment = 2 * 1024 * 1024; // assumed 2MB page size
#else
constexpr size_t alignment = 4096; // assumed small page size
#endif
// Round up to multiples of alignment
size_t size = ((allocSize + alignment - 1) / alignment) * alignment;
void* mem = std_aligned_alloc(alignment, size);
#if defined(MADV_HUGEPAGE)
madvise(mem, size, MADV_HUGEPAGE);
#endif
return mem;
}
#endif
// aligned_large_pages_free() will free the previously allocated ttmem
#if defined(_WIN32)
void aligned_large_pages_free(void* mem) {
if (mem && !VirtualFree(mem, 0, MEM_RELEASE))
{
DWORD err = GetLastError();
std::cerr << "Failed to free large page memory. Error code: 0x" << std::hex << err
<< std::dec << std::endl;
exit(EXIT_FAILURE);
}
}
#else
void aligned_large_pages_free(void* mem) { std_aligned_free(mem); }
#endif
namespace WinProcGroup {
#ifndef _WIN32
void bind_this_thread(size_t) {}
#else
namespace {
// Retrieves logical processor information using Windows-specific
// API and returns the best node id for the thread with index idx. Original
// code from Texel by Peter Österlund.
int best_node(size_t idx) {
int threads = 0;
int nodes = 0;
int cores = 0;
DWORD returnLength = 0;
DWORD byteOffset = 0;
// Early exit if the needed API is not available at runtime
HMODULE k32 = GetModuleHandle(TEXT("Kernel32.dll"));
auto fun1 = (fun1_t) (void (*)()) GetProcAddress(k32, "GetLogicalProcessorInformationEx");
if (!fun1)
return -1;
// First call to GetLogicalProcessorInformationEx() to get returnLength.
// We expect the call to fail due to null buffer.
if (fun1(RelationAll, nullptr, &returnLength))
return -1;
// Once we know returnLength, allocate the buffer
SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *buffer, *ptr;
ptr = buffer = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*) malloc(returnLength);
// Second call to GetLogicalProcessorInformationEx(), now we expect to succeed
if (!fun1(RelationAll, buffer, &returnLength))
{
free(buffer);
return -1;
}
while (byteOffset < returnLength)
{
if (ptr->Relationship == RelationNumaNode)
nodes++;
else if (ptr->Relationship == RelationProcessorCore)
{
cores++;
threads += (ptr->Processor.Flags == LTP_PC_SMT) ? 2 : 1;
}
assert(ptr->Size);
byteOffset += ptr->Size;
ptr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*) (((char*) ptr) + ptr->Size);
}
free(buffer);
std::vector<int> groups;
// Run as many threads as possible on the same node until the core limit is
// reached, then move on to filling the next node.
for (int n = 0; n < nodes; n++)
for (int i = 0; i < cores / nodes; i++)
groups.push_back(n);
// In case a core has more than one logical processor (we assume 2) and we
// still have threads to allocate, spread them evenly across available nodes.
for (int t = 0; t < threads - cores; t++)
groups.push_back(t % nodes);
// If we still have more threads than the total number of logical processors
// then return -1 and let the OS to decide what to do.
return idx < groups.size() ? groups[idx] : -1;
}
}
// Sets the group affinity of the current thread
void bind_this_thread(size_t idx) {
// Use only local variables to be thread-safe
int node = best_node(idx);
if (node == -1)
return;
// Early exit if the needed API are not available at runtime
HMODULE k32 = GetModuleHandle(TEXT("Kernel32.dll"));
auto fun2 = fun2_t((void (*)()) GetProcAddress(k32, "GetNumaNodeProcessorMaskEx"));
auto fun3 = fun3_t((void (*)()) GetProcAddress(k32, "SetThreadGroupAffinity"));
auto fun4 = fun4_t((void (*)()) GetProcAddress(k32, "GetNumaNodeProcessorMask2"));
auto fun5 = fun5_t((void (*)()) GetProcAddress(k32, "GetMaximumProcessorGroupCount"));
if (!fun2 || !fun3)
return;
if (!fun4 || !fun5)
{
GROUP_AFFINITY affinity;
if (fun2(node, &affinity)) // GetNumaNodeProcessorMaskEx
fun3(GetCurrentThread(), &affinity, nullptr); // SetThreadGroupAffinity
}
else
{
// If a numa node has more than one processor group, we assume they are
// sized equal and we spread threads evenly across the groups.
USHORT elements, returnedElements;
elements = fun5(); // GetMaximumProcessorGroupCount
GROUP_AFFINITY* affinity = (GROUP_AFFINITY*) malloc(elements * sizeof(GROUP_AFFINITY));
if (fun4(node, affinity, elements, &returnedElements)) // GetNumaNodeProcessorMask2
fun3(GetCurrentThread(), &affinity[idx % returnedElements],
nullptr); // SetThreadGroupAffinity
free(affinity);
}
}
#endif
} // namespace WinProcGroup
#ifdef _WIN32 #ifdef _WIN32
#include <direct.h> #include <direct.h>
#define GETCWD _getcwd #define GETCWD _getcwd
@ -723,6 +436,27 @@ void bind_this_thread(size_t idx) {
#define GETCWD getcwd #define GETCWD getcwd
#endif #endif
size_t str_to_size_t(const std::string& s) {
unsigned long long value = std::stoull(s);
if (value > std::numeric_limits<size_t>::max())
std::exit(EXIT_FAILURE);
return static_cast<size_t>(value);
}
std::optional<std::string> read_file_to_string(const std::string& path) {
std::ifstream f(path, std::ios_base::binary);
if (!f)
return std::nullopt;
return std::string(std::istreambuf_iterator<char>(f), std::istreambuf_iterator<char>());
}
void remove_whitespace(std::string& s) {
s.erase(std::remove_if(s.begin(), s.end(), [](char c) { return std::isspace(c); }), s.end());
}
bool is_whitespace(std::string_view s) {
return std::all_of(s.begin(), s.end(), [](char c) { return std::isspace(c); });
}
std::string CommandLine::get_binary_directory(std::string argv0) { std::string CommandLine::get_binary_directory(std::string argv0) {
std::string pathSeparator; std::string pathSeparator;

View file

@ -24,9 +24,11 @@
#include <chrono> #include <chrono>
#include <cstddef> #include <cstddef>
#include <cstdint> #include <cstdint>
#include <cstdio>
#include <iosfwd> #include <iosfwd>
#include <memory> #include <optional>
#include <string> #include <string>
#include <string_view>
#include <vector> #include <vector>
#define stringify2(x) #x #define stringify2(x) #x
@ -34,49 +36,40 @@
namespace Stockfish { namespace Stockfish {
std::string engine_version_info();
std::string engine_info(bool to_uci = false); std::string engine_info(bool to_uci = false);
std::string compiler_info(); std::string compiler_info();
// Preloads the given address in L1/L2 cache. This is a non-blocking // Preloads the given address in L1/L2 cache. This is a non-blocking
// function that doesn't stall the CPU waiting for data to be loaded from memory, // function that doesn't stall the CPU waiting for data to be loaded from memory,
// which can be quite slow. // which can be quite slow.
void prefetch(void* addr); void prefetch(const void* addr);
void start_logger(const std::string& fname); void start_logger(const std::string& fname);
void* std_aligned_alloc(size_t alignment, size_t size);
void std_aligned_free(void* ptr);
// memory aligned by page size, min alignment: 4096 bytes
void* aligned_large_pages_alloc(size_t size);
// nop if mem == nullptr
void aligned_large_pages_free(void* mem);
// Deleter for automating release of memory area size_t str_to_size_t(const std::string& s);
template<typename T>
struct AlignedDeleter { #if defined(__linux__)
void operator()(T* ptr) const {
ptr->~T(); struct PipeDeleter {
std_aligned_free(ptr); void operator()(FILE* file) const {
if (file != nullptr)
{
pclose(file);
}
} }
}; };
template<typename T> #endif
struct LargePageDeleter {
void operator()(T* ptr) const {
ptr->~T();
aligned_large_pages_free(ptr);
}
};
template<typename T>
using AlignedPtr = std::unique_ptr<T, AlignedDeleter<T>>;
template<typename T>
using LargePagePtr = std::unique_ptr<T, LargePageDeleter<T>>;
// Reads the file as bytes.
// Returns std::nullopt if the file does not exist.
std::optional<std::string> read_file_to_string(const std::string& path);
void dbg_hit_on(bool cond, int slot = 0); void dbg_hit_on(bool cond, int slot = 0);
void dbg_mean_of(int64_t value, int slot = 0); void dbg_mean_of(int64_t value, int slot = 0);
void dbg_stdev_of(int64_t value, int slot = 0); void dbg_stdev_of(int64_t value, int slot = 0);
void dbg_extremes_of(int64_t value, int slot = 0);
void dbg_correl_of(int64_t value1, int64_t value2, int slot = 0); void dbg_correl_of(int64_t value1, int64_t value2, int slot = 0);
void dbg_print(); void dbg_print();
@ -88,6 +81,30 @@ inline TimePoint now() {
.count(); .count();
} }
inline std::vector<std::string_view> split(std::string_view s, std::string_view delimiter) {
std::vector<std::string_view> res;
if (s.empty())
return res;
size_t begin = 0;
for (;;)
{
const size_t end = s.find(delimiter, begin);
if (end == std::string::npos)
break;
res.emplace_back(s.substr(begin, end - begin));
begin = end + delimiter.size();
}
res.emplace_back(s.substr(begin));
return res;
}
void remove_whitespace(std::string& s);
bool is_whitespace(std::string_view s);
enum SyncCout { enum SyncCout {
IO_LOCK, IO_LOCK,
@ -98,19 +115,8 @@ std::ostream& operator<<(std::ostream&, SyncCout);
#define sync_cout std::cout << IO_LOCK #define sync_cout std::cout << IO_LOCK
#define sync_endl std::endl << IO_UNLOCK #define sync_endl std::endl << IO_UNLOCK
void sync_cout_start();
// Get the first aligned element of an array. void sync_cout_end();
// ptr must point to an array of size at least `sizeof(T) * N + alignment` bytes,
// where N is the number of elements in the array.
template<uintptr_t Alignment, typename T>
T* align_ptr_up(T* ptr) {
static_assert(alignof(T) < Alignment);
const uintptr_t ptrint = reinterpret_cast<uintptr_t>(reinterpret_cast<char*>(ptr));
return reinterpret_cast<T*>(
reinterpret_cast<char*>((ptrint + (Alignment - 1)) / Alignment * Alignment));
}
// True if and only if the binary is compiled on a little-endian machine // True if and only if the binary is compiled on a little-endian machine
static inline const union { static inline const union {
@ -194,15 +200,6 @@ inline uint64_t mul_hi64(uint64_t a, uint64_t b) {
#endif #endif
} }
// Under Windows it is not possible for a process to run on more than one
// logical processor group. This usually means being limited to using max 64
// cores. To overcome this, some special platform-specific API should be
// called to set group affinity for each thread. Original code from Texel by
// Peter Österlund.
namespace WinProcGroup {
void bind_this_thread(size_t idx);
}
struct CommandLine { struct CommandLine {
public: public:

View file

@ -75,17 +75,6 @@ ExtMove* generate_pawn_moves(const Position& pos, ExtMove* moveList, Bitboard ta
b2 &= target; b2 &= target;
} }
if constexpr (Type == QUIET_CHECKS)
{
// To make a quiet check, you either make a direct check by pushing a pawn
// or push a blocker pawn that is not on the same file as the enemy king.
// Discovered check promotion has been already generated amongst the captures.
Square ksq = pos.square<KING>(Them);
Bitboard dcCandidatePawns = pos.blockers_for_king(Them) & ~file_bb(ksq);
b1 &= pawn_attacks_bb(Them, ksq) | shift<Up>(dcCandidatePawns);
b2 &= pawn_attacks_bb(Them, ksq) | shift<Up + Up>(dcCandidatePawns);
}
while (b1) while (b1)
{ {
Square to = pop_lsb(b1); Square to = pop_lsb(b1);
@ -158,7 +147,7 @@ ExtMove* generate_pawn_moves(const Position& pos, ExtMove* moveList, Bitboard ta
} }
template<Color Us, PieceType Pt, bool Checks> template<Color Us, PieceType Pt>
ExtMove* generate_moves(const Position& pos, ExtMove* moveList, Bitboard target) { ExtMove* generate_moves(const Position& pos, ExtMove* moveList, Bitboard target) {
static_assert(Pt != KING && Pt != PAWN, "Unsupported piece type in generate_moves()"); static_assert(Pt != KING && Pt != PAWN, "Unsupported piece type in generate_moves()");
@ -170,10 +159,6 @@ ExtMove* generate_moves(const Position& pos, ExtMove* moveList, Bitboard target)
Square from = pop_lsb(bb); Square from = pop_lsb(bb);
Bitboard b = attacks_bb<Pt>(from, pos.pieces()) & target; Bitboard b = attacks_bb<Pt>(from, pos.pieces()) & target;
// To check, you either move freely a blocker or make a direct check.
if (Checks && (Pt == QUEEN || !(pos.blockers_for_king(~Us) & from)))
b &= pos.check_squares(Pt);
while (b) while (b)
*moveList++ = Move(from, pop_lsb(b)); *moveList++ = Move(from, pop_lsb(b));
} }
@ -187,9 +172,8 @@ ExtMove* generate_all(const Position& pos, ExtMove* moveList) {
static_assert(Type != LEGAL, "Unsupported type in generate_all()"); static_assert(Type != LEGAL, "Unsupported type in generate_all()");
constexpr bool Checks = Type == QUIET_CHECKS; // Reduce template instantiations const Square ksq = pos.square<KING>(Us);
const Square ksq = pos.square<KING>(Us); Bitboard target;
Bitboard target;
// Skip generating non-king moves when in double check // Skip generating non-king moves when in double check
if (Type != EVASIONS || !more_than_one(pos.checkers())) if (Type != EVASIONS || !more_than_one(pos.checkers()))
@ -197,29 +181,24 @@ ExtMove* generate_all(const Position& pos, ExtMove* moveList) {
target = Type == EVASIONS ? between_bb(ksq, lsb(pos.checkers())) target = Type == EVASIONS ? between_bb(ksq, lsb(pos.checkers()))
: Type == NON_EVASIONS ? ~pos.pieces(Us) : Type == NON_EVASIONS ? ~pos.pieces(Us)
: Type == CAPTURES ? pos.pieces(~Us) : Type == CAPTURES ? pos.pieces(~Us)
: ~pos.pieces(); // QUIETS || QUIET_CHECKS : ~pos.pieces(); // QUIETS
moveList = generate_pawn_moves<Us, Type>(pos, moveList, target); moveList = generate_pawn_moves<Us, Type>(pos, moveList, target);
moveList = generate_moves<Us, KNIGHT, Checks>(pos, moveList, target); moveList = generate_moves<Us, KNIGHT>(pos, moveList, target);
moveList = generate_moves<Us, BISHOP, Checks>(pos, moveList, target); moveList = generate_moves<Us, BISHOP>(pos, moveList, target);
moveList = generate_moves<Us, ROOK, Checks>(pos, moveList, target); moveList = generate_moves<Us, ROOK>(pos, moveList, target);
moveList = generate_moves<Us, QUEEN, Checks>(pos, moveList, target); moveList = generate_moves<Us, QUEEN>(pos, moveList, target);
} }
if (!Checks || pos.blockers_for_king(~Us) & ksq) Bitboard b = attacks_bb<KING>(ksq) & (Type == EVASIONS ? ~pos.pieces(Us) : target);
{
Bitboard b = attacks_bb<KING>(ksq) & (Type == EVASIONS ? ~pos.pieces(Us) : target);
if (Checks)
b &= ~attacks_bb<QUEEN>(pos.square<KING>(~Us));
while (b) while (b)
*moveList++ = Move(ksq, pop_lsb(b)); *moveList++ = Move(ksq, pop_lsb(b));
if ((Type == QUIETS || Type == NON_EVASIONS) && pos.can_castle(Us & ANY_CASTLING)) if ((Type == QUIETS || Type == NON_EVASIONS) && pos.can_castle(Us & ANY_CASTLING))
for (CastlingRights cr : {Us & KING_SIDE, Us & QUEEN_SIDE}) for (CastlingRights cr : {Us & KING_SIDE, Us & QUEEN_SIDE})
if (!pos.castling_impeded(cr) && pos.can_castle(cr)) if (!pos.castling_impeded(cr) && pos.can_castle(cr))
*moveList++ = Move::make<CASTLING>(ksq, pos.castling_rook_square(cr)); *moveList++ = Move::make<CASTLING>(ksq, pos.castling_rook_square(cr));
}
return moveList; return moveList;
} }
@ -231,8 +210,6 @@ ExtMove* generate_all(const Position& pos, ExtMove* moveList) {
// <QUIETS> Generates all pseudo-legal non-captures and underpromotions // <QUIETS> Generates all pseudo-legal non-captures and underpromotions
// <EVASIONS> Generates all pseudo-legal check evasions // <EVASIONS> Generates all pseudo-legal check evasions
// <NON_EVASIONS> Generates all pseudo-legal captures and non-captures // <NON_EVASIONS> Generates all pseudo-legal captures and non-captures
// <QUIET_CHECKS> Generates all pseudo-legal non-captures giving check,
// except castling and promotions
// //
// Returns a pointer to the end of the move list. // Returns a pointer to the end of the move list.
template<GenType Type> template<GenType Type>
@ -251,7 +228,6 @@ ExtMove* generate(const Position& pos, ExtMove* moveList) {
template ExtMove* generate<CAPTURES>(const Position&, ExtMove*); template ExtMove* generate<CAPTURES>(const Position&, ExtMove*);
template ExtMove* generate<QUIETS>(const Position&, ExtMove*); template ExtMove* generate<QUIETS>(const Position&, ExtMove*);
template ExtMove* generate<EVASIONS>(const Position&, ExtMove*); template ExtMove* generate<EVASIONS>(const Position&, ExtMove*);
template ExtMove* generate<QUIET_CHECKS>(const Position&, ExtMove*);
template ExtMove* generate<NON_EVASIONS>(const Position&, ExtMove*); template ExtMove* generate<NON_EVASIONS>(const Position&, ExtMove*);

View file

@ -31,7 +31,6 @@ class Position;
enum GenType { enum GenType {
CAPTURES, CAPTURES,
QUIETS, QUIETS,
QUIET_CHECKS,
EVASIONS, EVASIONS,
NON_EVASIONS, NON_EVASIONS,
LEGAL LEGAL

View file

@ -18,10 +18,9 @@
#include "movepick.h" #include "movepick.h"
#include <algorithm> #include <array>
#include <cassert> #include <cassert>
#include <iterator> #include <limits>
#include <utility>
#include "bitboard.h" #include "bitboard.h"
#include "position.h" #include "position.h"
@ -35,7 +34,6 @@ enum Stages {
MAIN_TT, MAIN_TT,
CAPTURE_INIT, CAPTURE_INIT,
GOOD_CAPTURE, GOOD_CAPTURE,
REFUTATION,
QUIET_INIT, QUIET_INIT,
GOOD_QUIET, GOOD_QUIET,
BAD_CAPTURE, BAD_CAPTURE,
@ -54,13 +52,11 @@ enum Stages {
// generate qsearch moves // generate qsearch moves
QSEARCH_TT, QSEARCH_TT,
QCAPTURE_INIT, QCAPTURE_INIT,
QCAPTURE, QCAPTURE
QCHECK_INIT,
QCHECK
}; };
// Sort moves in descending order up to and including // Sort moves in descending order up to and including a given limit.
// a given limit. The order of moves smaller than the limit is left unspecified. // The order of moves smaller than the limit is left unspecified.
void partial_insertion_sort(ExtMove* begin, ExtMove* end, int limit) { void partial_insertion_sort(ExtMove* begin, ExtMove* end, int limit) {
for (ExtMove *sortedEnd = begin, *p = begin + 1; p < end; ++p) for (ExtMove *sortedEnd = begin, *p = begin + 1; p < end; ++p)
@ -78,56 +74,38 @@ void partial_insertion_sort(ExtMove* begin, ExtMove* end, int limit) {
// Constructors of the MovePicker class. As arguments, we pass information // Constructors of the MovePicker class. As arguments, we pass information
// to help it return the (presumably) good moves first, to decide which // to decide which class of moves to emit, to help sorting the (presumably)
// moves to return (in the quiescence search, for instance, we only want to // good moves first, and how important move ordering is at the current node.
// search captures, promotions, and some checks) and how important a good
// move ordering is at the current node.
// MovePicker constructor for the main search // MovePicker constructor for the main search and for the quiescence search
MovePicker::MovePicker(const Position& p, MovePicker::MovePicker(const Position& p,
Move ttm, Move ttm,
Depth d, Depth d,
const ButterflyHistory* mh, const ButterflyHistory* mh,
const LowPlyHistory* lph,
const CapturePieceToHistory* cph, const CapturePieceToHistory* cph,
const PieceToHistory** ch, const PieceToHistory** ch,
const PawnHistory* ph, const PawnHistory* ph,
Move cm, int pl) :
const Move* killers) :
pos(p), pos(p),
mainHistory(mh), mainHistory(mh),
lowPlyHistory(lph),
captureHistory(cph), captureHistory(cph),
continuationHistory(ch), continuationHistory(ch),
pawnHistory(ph), pawnHistory(ph),
ttMove(ttm), ttMove(ttm),
refutations{{killers[0], 0}, {killers[1], 0}, {cm, 0}}, depth(d),
depth(d) { ply(pl) {
assert(d > 0);
stage = (pos.checkers() ? EVASION_TT : MAIN_TT) + !(ttm && pos.pseudo_legal(ttm)); if (pos.checkers())
stage = EVASION_TT + !(ttm && pos.pseudo_legal(ttm));
else
stage = (depth > 0 ? MAIN_TT : QSEARCH_TT) + !(ttm && pos.pseudo_legal(ttm));
} }
// Constructor for quiescence search // MovePicker constructor for ProbCut: we generate captures with Static Exchange
MovePicker::MovePicker(const Position& p, // Evaluation (SEE) greater than or equal to the given threshold.
Move ttm,
Depth d,
const ButterflyHistory* mh,
const CapturePieceToHistory* cph,
const PieceToHistory** ch,
const PawnHistory* ph) :
pos(p),
mainHistory(mh),
captureHistory(cph),
continuationHistory(ch),
pawnHistory(ph),
ttMove(ttm),
depth(d) {
assert(d <= 0);
stage = (pos.checkers() ? EVASION_TT : QSEARCH_TT) + !(ttm && pos.pseudo_legal(ttm));
}
// Constructor for ProbCut: we generate captures with SEE greater
// than or equal to the given threshold.
MovePicker::MovePicker(const Position& p, Move ttm, int th, const CapturePieceToHistory* cph) : MovePicker::MovePicker(const Position& p, Move ttm, int th, const CapturePieceToHistory* cph) :
pos(p), pos(p),
captureHistory(cph), captureHistory(cph),
@ -139,9 +117,9 @@ MovePicker::MovePicker(const Position& p, Move ttm, int th, const CapturePieceTo
+ !(ttm && pos.capture_stage(ttm) && pos.pseudo_legal(ttm) && pos.see_ge(ttm, threshold)); + !(ttm && pos.capture_stage(ttm) && pos.pseudo_legal(ttm) && pos.see_ge(ttm, threshold));
} }
// Assigns a numerical value to each move in a list, used // Assigns a numerical value to each move in a list, used for sorting.
// for sorting. Captures are ordered by Most Valuable Victim (MVV), preferring // Captures are ordered by Most Valuable Victim (MVV), preferring captures
// captures with a good history. Quiets moves are ordered using the history tables. // with a good history. Quiets moves are ordered using the history tables.
template<GenType Type> template<GenType Type>
void MovePicker::score() { void MovePicker::score() {
@ -180,9 +158,9 @@ void MovePicker::score() {
// histories // histories
m.value = 2 * (*mainHistory)[pos.side_to_move()][m.from_to()]; m.value = 2 * (*mainHistory)[pos.side_to_move()][m.from_to()];
m.value += 2 * (*pawnHistory)[pawn_structure_index(pos)][pc][to]; m.value += 2 * (*pawnHistory)[pawn_structure_index(pos)][pc][to];
m.value += 2 * (*continuationHistory[0])[pc][to]; m.value += (*continuationHistory[0])[pc][to];
m.value += (*continuationHistory[1])[pc][to]; m.value += (*continuationHistory[1])[pc][to];
m.value += (*continuationHistory[2])[pc][to] / 4; m.value += (*continuationHistory[2])[pc][to];
m.value += (*continuationHistory[3])[pc][to]; m.value += (*continuationHistory[3])[pc][to];
m.value += (*continuationHistory[5])[pc][to]; m.value += (*continuationHistory[5])[pc][to];
@ -197,13 +175,12 @@ void MovePicker::score() {
: 0; : 0;
// malus for putting piece en prise // malus for putting piece en prise
m.value -= !(threatenedPieces & from) m.value -= (pt == QUEEN ? bool(to & threatenedByRook) * 49000
? (pt == QUEEN ? bool(to & threatenedByRook) * 48150 : pt == ROOK && bool(to & threatenedByMinor) ? 24335
+ bool(to & threatenedByMinor) * 10650 : 0);
: pt == ROOK ? bool(to & threatenedByMinor) * 24335
: pt != PAWN ? bool(to & threatenedByPawn) * 14950 if (ply < LOW_PLY_HISTORY_SIZE)
: 0) m.value += 8 * (*lowPlyHistory)[ply][m.from_to()] / (1 + 2 * ply);
: 0;
} }
else // Type == EVASIONS else // Type == EVASIONS
@ -219,27 +196,21 @@ void MovePicker::score() {
} }
// Returns the next move satisfying a predicate function. // Returns the next move satisfying a predicate function.
// It never returns the TT move. // This never returns the TT move, as it was emitted before.
template<MovePicker::PickType T, typename Pred> template<typename Pred>
Move MovePicker::select(Pred filter) { Move MovePicker::select(Pred filter) {
while (cur < endMoves) for (; cur < endMoves; ++cur)
{
if constexpr (T == Best)
std::swap(*cur, *std::max_element(cur, endMoves));
if (*cur != ttMove && filter()) if (*cur != ttMove && filter())
return *cur++; return *cur++;
cur++;
}
return Move::none(); return Move::none();
} }
// Most important method of the MovePicker class. It // This is the most important method of the MovePicker class. We emit one
// returns a new pseudo-legal move every time it is called until there are no more // new pseudo-legal move on every call until there are no more moves left,
// moves left, picking the move with the highest score from a list of generated moves. // picking the move with the highest score from a list of generated moves.
Move MovePicker::next_move(bool skipQuiets) { Move MovePicker::next_move() {
auto quiet_threshold = [](Depth d) { return -3560 * d; }; auto quiet_threshold = [](Depth d) { return -3560 * d; };
@ -266,29 +237,13 @@ top:
goto top; goto top;
case GOOD_CAPTURE : case GOOD_CAPTURE :
if (select<Next>([&]() { if (select([&]() {
// Move losing capture to endBadCaptures to be tried later // Move losing capture to endBadCaptures to be tried later
return pos.see_ge(*cur, -cur->value / 18) ? true return pos.see_ge(*cur, -cur->value / 18) ? true
: (*endBadCaptures++ = *cur, false); : (*endBadCaptures++ = *cur, false);
})) }))
return *(cur - 1); return *(cur - 1);
// Prepare the pointers to loop over the refutations array
cur = std::begin(refutations);
endMoves = std::end(refutations);
// If the countermove is the same as a killer, skip it
if (refutations[0] == refutations[2] || refutations[1] == refutations[2])
--endMoves;
++stage;
[[fallthrough]];
case REFUTATION :
if (select<Next>([&]() {
return *cur != Move::none() && !pos.capture_stage(*cur) && pos.pseudo_legal(*cur);
}))
return *(cur - 1);
++stage; ++stage;
[[fallthrough]]; [[fallthrough]];
@ -306,9 +261,7 @@ top:
[[fallthrough]]; [[fallthrough]];
case GOOD_QUIET : case GOOD_QUIET :
if (!skipQuiets && select<Next>([&]() { if (!skipQuiets && select([]() { return true; }))
return *cur != refutations[0] && *cur != refutations[1] && *cur != refutations[2];
}))
{ {
if ((cur - 1)->value > -7998 || (cur - 1)->value <= quiet_threshold(depth)) if ((cur - 1)->value > -7998 || (cur - 1)->value <= quiet_threshold(depth))
return *(cur - 1); return *(cur - 1);
@ -325,7 +278,7 @@ top:
[[fallthrough]]; [[fallthrough]];
case BAD_CAPTURE : case BAD_CAPTURE :
if (select<Next>([]() { return true; })) if (select([]() { return true; }))
return *(cur - 1); return *(cur - 1);
// Prepare the pointers to loop over the bad quiets // Prepare the pointers to loop over the bad quiets
@ -337,9 +290,7 @@ top:
case BAD_QUIET : case BAD_QUIET :
if (!skipQuiets) if (!skipQuiets)
return select<Next>([&]() { return select([]() { return true; });
return *cur != refutations[0] && *cur != refutations[1] && *cur != refutations[2];
});
return Move::none(); return Move::none();
@ -348,39 +299,22 @@ top:
endMoves = generate<EVASIONS>(pos, cur); endMoves = generate<EVASIONS>(pos, cur);
score<EVASIONS>(); score<EVASIONS>();
partial_insertion_sort(cur, endMoves, std::numeric_limits<int>::min());
++stage; ++stage;
[[fallthrough]]; [[fallthrough]];
case EVASION : case EVASION :
return select<Best>([]() { return true; }); case QCAPTURE :
return select([]() { return true; });
case PROBCUT : case PROBCUT :
return select<Next>([&]() { return pos.see_ge(*cur, threshold); }); return select([&]() { return pos.see_ge(*cur, threshold); });
case QCAPTURE :
if (select<Next>([]() { return true; }))
return *(cur - 1);
// If we did not find any move and we do not try checks, we have finished
if (depth != DEPTH_QS_CHECKS)
return Move::none();
++stage;
[[fallthrough]];
case QCHECK_INIT :
cur = moves;
endMoves = generate<QUIET_CHECKS>(pos, cur);
++stage;
[[fallthrough]];
case QCHECK :
return select<Next>([]() { return true; });
} }
assert(false); assert(false);
return Move::none(); // Silence warning return Move::none(); // Silence warning
} }
void MovePicker::skip_quiet_moves() { skipQuiets = true; }
} // namespace Stockfish } // namespace Stockfish

View file

@ -19,141 +19,22 @@
#ifndef MOVEPICK_H_INCLUDED #ifndef MOVEPICK_H_INCLUDED
#define MOVEPICK_H_INCLUDED #define MOVEPICK_H_INCLUDED
#include <algorithm> #include "history.h"
#include <array>
#include <cassert>
#include <cmath>
#include <cstdint>
#include <cstdlib>
#include <limits>
#include <type_traits> // IWYU pragma: keep
#include "movegen.h" #include "movegen.h"
#include "position.h"
#include "types.h" #include "types.h"
namespace Stockfish { namespace Stockfish {
constexpr int PAWN_HISTORY_SIZE = 512; // has to be a power of 2 class Position;
constexpr int CORRECTION_HISTORY_SIZE = 16384; // has to be a power of 2
constexpr int CORRECTION_HISTORY_LIMIT = 1024;
static_assert((PAWN_HISTORY_SIZE & (PAWN_HISTORY_SIZE - 1)) == 0, // The MovePicker class is used to pick one pseudo-legal move at a time from the
"PAWN_HISTORY_SIZE has to be a power of 2"); // current position. The most important method is next_move(), which emits one
// new pseudo-legal move on every call, until there are no moves left, when
static_assert((CORRECTION_HISTORY_SIZE & (CORRECTION_HISTORY_SIZE - 1)) == 0, // Move::none() is returned. In order to improve the efficiency of the alpha-beta
"CORRECTION_HISTORY_SIZE has to be a power of 2"); // algorithm, MovePicker attempts to return the moves which are most likely to get
// a cut-off first.
enum PawnHistoryType {
Normal,
Correction
};
template<PawnHistoryType T = Normal>
inline int pawn_structure_index(const Position& pos) {
return pos.pawn_key() & ((T == Normal ? PAWN_HISTORY_SIZE : CORRECTION_HISTORY_SIZE) - 1);
}
// StatsEntry stores the stat table value. It is usually a number but could
// be a move or even a nested history. We use a class instead of a naked value
// to directly call history update operator<<() on the entry so to use stats
// tables at caller sites as simple multi-dim arrays.
template<typename T, int D>
class StatsEntry {
T entry;
public:
void operator=(const T& v) { entry = v; }
T* operator&() { return &entry; }
T* operator->() { return &entry; }
operator const T&() const { return entry; }
void operator<<(int bonus) {
static_assert(D <= std::numeric_limits<T>::max(), "D overflows T");
// Make sure that bonus is in range [-D, D]
int clampedBonus = std::clamp(bonus, -D, D);
entry += clampedBonus - entry * std::abs(clampedBonus) / D;
assert(std::abs(entry) <= D);
}
};
// Stats is a generic N-dimensional array used to store various statistics.
// The first template parameter T is the base type of the array, and the second
// template parameter D limits the range of updates in [-D, D] when we update
// values with the << operator, while the last parameters (Size and Sizes)
// encode the dimensions of the array.
template<typename T, int D, int Size, int... Sizes>
struct Stats: public std::array<Stats<T, D, Sizes...>, Size> {
using stats = Stats<T, D, Size, Sizes...>;
void fill(const T& v) {
// For standard-layout 'this' points to the first struct member
assert(std::is_standard_layout_v<stats>);
using entry = StatsEntry<T, D>;
entry* p = reinterpret_cast<entry*>(this);
std::fill(p, p + sizeof(*this) / sizeof(entry), v);
}
};
template<typename T, int D, int Size>
struct Stats<T, D, Size>: public std::array<StatsEntry<T, D>, Size> {};
// In stats table, D=0 means that the template parameter is not used
enum StatsParams {
NOT_USED = 0
};
enum StatsType {
NoCaptures,
Captures
};
// ButterflyHistory records how often quiet moves have been successful or unsuccessful
// during the current search, and is used for reduction and move ordering decisions.
// It uses 2 tables (one for each color) indexed by the move's from and to squares,
// see www.chessprogramming.org/Butterfly_Boards (~11 elo)
using ButterflyHistory = Stats<int16_t, 7183, COLOR_NB, int(SQUARE_NB) * int(SQUARE_NB)>;
// CounterMoveHistory stores counter moves indexed by [piece][to] of the previous
// move, see www.chessprogramming.org/Countermove_Heuristic
using CounterMoveHistory = Stats<Move, NOT_USED, PIECE_NB, SQUARE_NB>;
// CapturePieceToHistory is addressed by a move's [piece][to][captured piece type]
using CapturePieceToHistory = Stats<int16_t, 10692, PIECE_NB, SQUARE_NB, PIECE_TYPE_NB>;
// PieceToHistory is like ButterflyHistory but is addressed by a move's [piece][to]
using PieceToHistory = Stats<int16_t, 29952, PIECE_NB, SQUARE_NB>;
// ContinuationHistory is the combined history of a given pair of moves, usually
// the current one given a previous one. The nested history table is based on
// PieceToHistory instead of ButterflyBoards.
// (~63 elo)
using ContinuationHistory = Stats<PieceToHistory, NOT_USED, PIECE_NB, SQUARE_NB>;
// PawnHistory is addressed by the pawn structure and a move's [piece][to]
using PawnHistory = Stats<int16_t, 8192, PAWN_HISTORY_SIZE, PIECE_NB, SQUARE_NB>;
// CorrectionHistory is addressed by color and pawn structure
using CorrectionHistory =
Stats<int16_t, CORRECTION_HISTORY_LIMIT, COLOR_NB, CORRECTION_HISTORY_SIZE>;
// MovePicker class is used to pick one pseudo-legal move at a time from the
// current position. The most important method is next_move(), which returns a
// new pseudo-legal move each time it is called, until there are no moves left,
// when Move::none() is returned. In order to improve the efficiency of the
// alpha-beta algorithm, MovePicker attempts to return the moves which are most
// likely to get a cut-off first.
class MovePicker { class MovePicker {
enum PickType {
Next,
Best
};
public: public:
MovePicker(const MovePicker&) = delete; MovePicker(const MovePicker&) = delete;
MovePicker& operator=(const MovePicker&) = delete; MovePicker& operator=(const MovePicker&) = delete;
@ -161,23 +42,17 @@ class MovePicker {
Move, Move,
Depth, Depth,
const ButterflyHistory*, const ButterflyHistory*,
const LowPlyHistory*,
const CapturePieceToHistory*, const CapturePieceToHistory*,
const PieceToHistory**, const PieceToHistory**,
const PawnHistory*, const PawnHistory*,
Move, int);
const Move*);
MovePicker(const Position&,
Move,
Depth,
const ButterflyHistory*,
const CapturePieceToHistory*,
const PieceToHistory**,
const PawnHistory*);
MovePicker(const Position&, Move, int, const CapturePieceToHistory*); MovePicker(const Position&, Move, int, const CapturePieceToHistory*);
Move next_move(bool skipQuiets = false); Move next_move();
void skip_quiet_moves();
private: private:
template<PickType T, typename Pred> template<typename Pred>
Move select(Pred); Move select(Pred);
template<GenType> template<GenType>
void score(); void score();
@ -186,15 +61,18 @@ class MovePicker {
const Position& pos; const Position& pos;
const ButterflyHistory* mainHistory; const ButterflyHistory* mainHistory;
const LowPlyHistory* lowPlyHistory;
const CapturePieceToHistory* captureHistory; const CapturePieceToHistory* captureHistory;
const PieceToHistory** continuationHistory; const PieceToHistory** continuationHistory;
const PawnHistory* pawnHistory; const PawnHistory* pawnHistory;
Move ttMove; Move ttMove;
ExtMove refutations[3], *cur, *endMoves, *endBadCaptures, *beginBadQuiets, *endBadQuiets; ExtMove * cur, *endMoves, *endBadCaptures, *beginBadQuiets, *endBadQuiets;
int stage; int stage;
int threshold; int threshold;
Depth depth; Depth depth;
ExtMove moves[MAX_MOVES]; int ply;
bool skipQuiets = false;
ExtMove moves[MAX_MOVES];
}; };
} // namespace Stockfish } // namespace Stockfish

View file

@ -39,25 +39,26 @@
namespace Stockfish::Eval::NNUE::Layers { namespace Stockfish::Eval::NNUE::Layers {
#if defined(USE_SSSE3) || defined(USE_NEON_DOTPROD)
#define ENABLE_SEQ_OPT
#endif
// Fallback implementation for older/other architectures. // Fallback implementation for older/other architectures.
// Requires the input to be padded to at least 16 values. // Requires the input to be padded to at least 16 values.
#if !defined(USE_SSSE3) #ifndef ENABLE_SEQ_OPT
template<IndexType InputDimensions, IndexType PaddedInputDimensions, IndexType OutputDimensions> template<IndexType InputDimensions, IndexType PaddedInputDimensions, IndexType OutputDimensions>
static void affine_transform_non_ssse3(std::int32_t* output, static void affine_transform_non_ssse3(std::int32_t* output,
const std::int8_t* weights, const std::int8_t* weights,
const std::int32_t* biases, const std::int32_t* biases,
const std::uint8_t* input) { const std::uint8_t* input) {
#if defined(USE_SSE2) || defined(USE_NEON_DOTPROD) || defined(USE_NEON) #if defined(USE_SSE2) || defined(USE_NEON)
#if defined(USE_SSE2) #if defined(USE_SSE2)
// At least a multiple of 16, with SSE2. // At least a multiple of 16, with SSE2.
constexpr IndexType NumChunks = ceil_to_multiple<IndexType>(InputDimensions, 16) / 16; constexpr IndexType NumChunks = ceil_to_multiple<IndexType>(InputDimensions, 16) / 16;
const __m128i Zeros = _mm_setzero_si128(); const __m128i Zeros = _mm_setzero_si128();
const auto inputVector = reinterpret_cast<const __m128i*>(input); const auto inputVector = reinterpret_cast<const __m128i*>(input);
#elif defined(USE_NEON_DOTPROD)
constexpr IndexType NumChunks = ceil_to_multiple<IndexType>(InputDimensions, 16) / 16;
const auto inputVector = reinterpret_cast<const int8x16_t*>(input);
#elif defined(USE_NEON) #elif defined(USE_NEON)
constexpr IndexType NumChunks = ceil_to_multiple<IndexType>(InputDimensions, 16) / 16; constexpr IndexType NumChunks = ceil_to_multiple<IndexType>(InputDimensions, 16) / 16;
const auto inputVector = reinterpret_cast<const int8x8_t*>(input); const auto inputVector = reinterpret_cast<const int8x8_t*>(input);
@ -91,16 +92,8 @@ static void affine_transform_non_ssse3(std::int32_t* output,
sum = _mm_add_epi32(sum, sum_second_32); sum = _mm_add_epi32(sum, sum_second_32);
output[i] = _mm_cvtsi128_si32(sum); output[i] = _mm_cvtsi128_si32(sum);
#elif defined(USE_NEON_DOTPROD)
int32x4_t sum = {biases[i]};
const auto row = reinterpret_cast<const int8x16_t*>(&weights[offset]);
for (IndexType j = 0; j < NumChunks; ++j)
{
sum = vdotq_s32(sum, inputVector[j], row[j]);
}
output[i] = vaddvq_s32(sum);
#elif defined(USE_NEON) #elif defined(USE_NEON)
int32x4_t sum = {biases[i]}; int32x4_t sum = {biases[i]};
const auto row = reinterpret_cast<const int8x8_t*>(&weights[offset]); const auto row = reinterpret_cast<const int8x8_t*>(&weights[offset]);
for (IndexType j = 0; j < NumChunks; ++j) for (IndexType j = 0; j < NumChunks; ++j)
@ -127,7 +120,8 @@ static void affine_transform_non_ssse3(std::int32_t* output,
} }
#endif #endif
} }
#endif
#endif // !ENABLE_SEQ_OPT
template<IndexType InDims, IndexType OutDims> template<IndexType InDims, IndexType OutDims>
class AffineTransform { class AffineTransform {
@ -162,7 +156,7 @@ class AffineTransform {
} }
static constexpr IndexType get_weight_index(IndexType i) { static constexpr IndexType get_weight_index(IndexType i) {
#if defined(USE_SSSE3) #ifdef ENABLE_SEQ_OPT
return get_weight_index_scrambled(i); return get_weight_index_scrambled(i);
#else #else
return i; return i;
@ -190,29 +184,28 @@ class AffineTransform {
// Forward propagation // Forward propagation
void propagate(const InputType* input, OutputType* output) const { void propagate(const InputType* input, OutputType* output) const {
#if defined(USE_SSSE3) #ifdef ENABLE_SEQ_OPT
if constexpr (OutputDimensions > 1) if constexpr (OutputDimensions > 1)
{ {
#if defined(USE_AVX512) #if defined(USE_AVX512)
using vec_t = __m512i; using vec_t = __m512i;
#define vec_setzero _mm512_setzero_si512
#define vec_set_32 _mm512_set1_epi32 #define vec_set_32 _mm512_set1_epi32
#define vec_add_dpbusd_32 Simd::m512_add_dpbusd_epi32 #define vec_add_dpbusd_32 Simd::m512_add_dpbusd_epi32
#define vec_hadd Simd::m512_hadd
#elif defined(USE_AVX2) #elif defined(USE_AVX2)
using vec_t = __m256i; using vec_t = __m256i;
#define vec_setzero _mm256_setzero_si256
#define vec_set_32 _mm256_set1_epi32 #define vec_set_32 _mm256_set1_epi32
#define vec_add_dpbusd_32 Simd::m256_add_dpbusd_epi32 #define vec_add_dpbusd_32 Simd::m256_add_dpbusd_epi32
#define vec_hadd Simd::m256_hadd
#elif defined(USE_SSSE3) #elif defined(USE_SSSE3)
using vec_t = __m128i; using vec_t = __m128i;
#define vec_setzero _mm_setzero_si128
#define vec_set_32 _mm_set1_epi32 #define vec_set_32 _mm_set1_epi32
#define vec_add_dpbusd_32 Simd::m128_add_dpbusd_epi32 #define vec_add_dpbusd_32 Simd::m128_add_dpbusd_epi32
#define vec_hadd Simd::m128_hadd #elif defined(USE_NEON_DOTPROD)
using vec_t = int32x4_t;
#define vec_set_32 vdupq_n_s32
#define vec_add_dpbusd_32(acc, a, b) \
Simd::dotprod_m128_add_dpbusd_epi32(acc, vreinterpretq_s8_s32(a), \
vreinterpretq_s8_s32(b))
#endif #endif
static constexpr IndexType OutputSimdWidth = sizeof(vec_t) / sizeof(OutputType); static constexpr IndexType OutputSimdWidth = sizeof(vec_t) / sizeof(OutputType);
@ -242,28 +235,33 @@ class AffineTransform {
for (IndexType k = 0; k < NumRegs; ++k) for (IndexType k = 0; k < NumRegs; ++k)
outptr[k] = acc[k]; outptr[k] = acc[k];
#undef vec_setzero
#undef vec_set_32 #undef vec_set_32
#undef vec_add_dpbusd_32 #undef vec_add_dpbusd_32
#undef vec_hadd
} }
else if constexpr (OutputDimensions == 1) else if constexpr (OutputDimensions == 1)
{ {
// We cannot use AVX512 for the last layer because there are only 32 inputs // We cannot use AVX512 for the last layer because there are only 32 inputs
// and the buffer is not padded to 64 elements. // and the buffer is not padded to 64 elements.
#if defined(USE_AVX2) #if defined(USE_AVX2)
using vec_t = __m256i; using vec_t = __m256i;
#define vec_setzero _mm256_setzero_si256 #define vec_setzero() _mm256_setzero_si256()
#define vec_set_32 _mm256_set1_epi32 #define vec_set_32 _mm256_set1_epi32
#define vec_add_dpbusd_32 Simd::m256_add_dpbusd_epi32 #define vec_add_dpbusd_32 Simd::m256_add_dpbusd_epi32
#define vec_hadd Simd::m256_hadd #define vec_hadd Simd::m256_hadd
#elif defined(USE_SSSE3) #elif defined(USE_SSSE3)
using vec_t = __m128i; using vec_t = __m128i;
#define vec_setzero _mm_setzero_si128 #define vec_setzero() _mm_setzero_si128()
#define vec_set_32 _mm_set1_epi32 #define vec_set_32 _mm_set1_epi32
#define vec_add_dpbusd_32 Simd::m128_add_dpbusd_epi32 #define vec_add_dpbusd_32 Simd::m128_add_dpbusd_epi32
#define vec_hadd Simd::m128_hadd #define vec_hadd Simd::m128_hadd
#elif defined(USE_NEON_DOTPROD)
using vec_t = int32x4_t;
#define vec_setzero() vdupq_n_s32(0)
#define vec_set_32 vdupq_n_s32
#define vec_add_dpbusd_32(acc, a, b) \
Simd::dotprod_m128_add_dpbusd_epi32(acc, vreinterpretq_s8_s32(a), \
vreinterpretq_s8_s32(b))
#define vec_hadd Simd::neon_m128_hadd
#endif #endif
const auto inputVector = reinterpret_cast<const vec_t*>(input); const auto inputVector = reinterpret_cast<const vec_t*>(input);

View file

@ -65,41 +65,37 @@ class ClippedReLU {
if constexpr (InputDimensions % SimdWidth == 0) if constexpr (InputDimensions % SimdWidth == 0)
{ {
constexpr IndexType NumChunks = InputDimensions / SimdWidth; constexpr IndexType NumChunks = InputDimensions / SimdWidth;
const __m256i Zero = _mm256_setzero_si256();
const __m256i Offsets = _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0); const __m256i Offsets = _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0);
const auto in = reinterpret_cast<const __m256i*>(input); const auto in = reinterpret_cast<const __m256i*>(input);
const auto out = reinterpret_cast<__m256i*>(output); const auto out = reinterpret_cast<__m256i*>(output);
for (IndexType i = 0; i < NumChunks; ++i) for (IndexType i = 0; i < NumChunks; ++i)
{ {
const __m256i words0 = const __m256i words0 =
_mm256_srai_epi16(_mm256_packs_epi32(_mm256_load_si256(&in[i * 4 + 0]), _mm256_srli_epi16(_mm256_packus_epi32(_mm256_load_si256(&in[i * 4 + 0]),
_mm256_load_si256(&in[i * 4 + 1])), _mm256_load_si256(&in[i * 4 + 1])),
WeightScaleBits); WeightScaleBits);
const __m256i words1 = const __m256i words1 =
_mm256_srai_epi16(_mm256_packs_epi32(_mm256_load_si256(&in[i * 4 + 2]), _mm256_srli_epi16(_mm256_packus_epi32(_mm256_load_si256(&in[i * 4 + 2]),
_mm256_load_si256(&in[i * 4 + 3])), _mm256_load_si256(&in[i * 4 + 3])),
WeightScaleBits); WeightScaleBits);
_mm256_store_si256( _mm256_store_si256(&out[i], _mm256_permutevar8x32_epi32(
&out[i], _mm256_permutevar8x32_epi32( _mm256_packs_epi16(words0, words1), Offsets));
_mm256_max_epi8(_mm256_packs_epi16(words0, words1), Zero), Offsets));
} }
} }
else else
{ {
constexpr IndexType NumChunks = InputDimensions / (SimdWidth / 2); constexpr IndexType NumChunks = InputDimensions / (SimdWidth / 2);
const __m128i Zero = _mm_setzero_si128();
const auto in = reinterpret_cast<const __m128i*>(input); const auto in = reinterpret_cast<const __m128i*>(input);
const auto out = reinterpret_cast<__m128i*>(output); const auto out = reinterpret_cast<__m128i*>(output);
for (IndexType i = 0; i < NumChunks; ++i) for (IndexType i = 0; i < NumChunks; ++i)
{ {
const __m128i words0 = _mm_srai_epi16( const __m128i words0 = _mm_srli_epi16(
_mm_packs_epi32(_mm_load_si128(&in[i * 4 + 0]), _mm_load_si128(&in[i * 4 + 1])), _mm_packus_epi32(_mm_load_si128(&in[i * 4 + 0]), _mm_load_si128(&in[i * 4 + 1])),
WeightScaleBits); WeightScaleBits);
const __m128i words1 = _mm_srai_epi16( const __m128i words1 = _mm_srli_epi16(
_mm_packs_epi32(_mm_load_si128(&in[i * 4 + 2]), _mm_load_si128(&in[i * 4 + 3])), _mm_packus_epi32(_mm_load_si128(&in[i * 4 + 2]), _mm_load_si128(&in[i * 4 + 3])),
WeightScaleBits); WeightScaleBits);
const __m128i packedbytes = _mm_packs_epi16(words0, words1); _mm_store_si128(&out[i], _mm_packs_epi16(words0, words1));
_mm_store_si128(&out[i], _mm_max_epi8(packedbytes, Zero));
} }
} }
constexpr IndexType Start = InputDimensions % SimdWidth == 0 constexpr IndexType Start = InputDimensions % SimdWidth == 0
@ -109,9 +105,7 @@ class ClippedReLU {
#elif defined(USE_SSE2) #elif defined(USE_SSE2)
constexpr IndexType NumChunks = InputDimensions / SimdWidth; constexpr IndexType NumChunks = InputDimensions / SimdWidth;
#ifdef USE_SSE41 #ifndef USE_SSE41
const __m128i Zero = _mm_setzero_si128();
#else
const __m128i k0x80s = _mm_set1_epi8(-128); const __m128i k0x80s = _mm_set1_epi8(-128);
#endif #endif
@ -119,6 +113,15 @@ class ClippedReLU {
const auto out = reinterpret_cast<__m128i*>(output); const auto out = reinterpret_cast<__m128i*>(output);
for (IndexType i = 0; i < NumChunks; ++i) for (IndexType i = 0; i < NumChunks; ++i)
{ {
#if defined(USE_SSE41)
const __m128i words0 = _mm_srli_epi16(
_mm_packus_epi32(_mm_load_si128(&in[i * 4 + 0]), _mm_load_si128(&in[i * 4 + 1])),
WeightScaleBits);
const __m128i words1 = _mm_srli_epi16(
_mm_packus_epi32(_mm_load_si128(&in[i * 4 + 2]), _mm_load_si128(&in[i * 4 + 3])),
WeightScaleBits);
_mm_store_si128(&out[i], _mm_packs_epi16(words0, words1));
#else
const __m128i words0 = _mm_srai_epi16( const __m128i words0 = _mm_srai_epi16(
_mm_packs_epi32(_mm_load_si128(&in[i * 4 + 0]), _mm_load_si128(&in[i * 4 + 1])), _mm_packs_epi32(_mm_load_si128(&in[i * 4 + 0]), _mm_load_si128(&in[i * 4 + 1])),
WeightScaleBits); WeightScaleBits);
@ -126,15 +129,8 @@ class ClippedReLU {
_mm_packs_epi32(_mm_load_si128(&in[i * 4 + 2]), _mm_load_si128(&in[i * 4 + 3])), _mm_packs_epi32(_mm_load_si128(&in[i * 4 + 2]), _mm_load_si128(&in[i * 4 + 3])),
WeightScaleBits); WeightScaleBits);
const __m128i packedbytes = _mm_packs_epi16(words0, words1); const __m128i packedbytes = _mm_packs_epi16(words0, words1);
_mm_store_si128(&out[i], _mm_store_si128(&out[i], _mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s));
#ifdef USE_SSE41
_mm_max_epi8(packedbytes, Zero)
#else
_mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s)
#endif #endif
);
} }
constexpr IndexType Start = NumChunks * SimdWidth; constexpr IndexType Start = NumChunks * SimdWidth;

View file

@ -43,39 +43,6 @@ namespace Stockfish::Simd {
return _mm512_reduce_add_epi32(sum) + bias; return _mm512_reduce_add_epi32(sum) + bias;
} }
/*
Parameters:
sum0 = [zmm0.i128[0], zmm0.i128[1], zmm0.i128[2], zmm0.i128[3]]
sum1 = [zmm1.i128[0], zmm1.i128[1], zmm1.i128[2], zmm1.i128[3]]
sum2 = [zmm2.i128[0], zmm2.i128[1], zmm2.i128[2], zmm2.i128[3]]
sum3 = [zmm3.i128[0], zmm3.i128[1], zmm3.i128[2], zmm3.i128[3]]
Returns:
ret = [
reduce_add_epi32(zmm0.i128[0]), reduce_add_epi32(zmm1.i128[0]), reduce_add_epi32(zmm2.i128[0]), reduce_add_epi32(zmm3.i128[0]),
reduce_add_epi32(zmm0.i128[1]), reduce_add_epi32(zmm1.i128[1]), reduce_add_epi32(zmm2.i128[1]), reduce_add_epi32(zmm3.i128[1]),
reduce_add_epi32(zmm0.i128[2]), reduce_add_epi32(zmm1.i128[2]), reduce_add_epi32(zmm2.i128[2]), reduce_add_epi32(zmm3.i128[2]),
reduce_add_epi32(zmm0.i128[3]), reduce_add_epi32(zmm1.i128[3]), reduce_add_epi32(zmm2.i128[3]), reduce_add_epi32(zmm3.i128[3])
]
*/
[[maybe_unused]] static __m512i
m512_hadd128x16_interleave(__m512i sum0, __m512i sum1, __m512i sum2, __m512i sum3) {
__m512i sum01a = _mm512_unpacklo_epi32(sum0, sum1);
__m512i sum01b = _mm512_unpackhi_epi32(sum0, sum1);
__m512i sum23a = _mm512_unpacklo_epi32(sum2, sum3);
__m512i sum23b = _mm512_unpackhi_epi32(sum2, sum3);
__m512i sum01 = _mm512_add_epi32(sum01a, sum01b);
__m512i sum23 = _mm512_add_epi32(sum23a, sum23b);
__m512i sum0123a = _mm512_unpacklo_epi64(sum01, sum23);
__m512i sum0123b = _mm512_unpackhi_epi64(sum01, sum23);
return _mm512_add_epi32(sum0123a, sum0123b);
}
[[maybe_unused]] static void m512_add_dpbusd_epi32(__m512i& acc, __m512i a, __m512i b) { [[maybe_unused]] static void m512_add_dpbusd_epi32(__m512i& acc, __m512i a, __m512i b) {
#if defined(USE_VNNI) #if defined(USE_VNNI)

View file

@ -18,17 +18,17 @@
#include "network.h" #include "network.h"
#include <cmath>
#include <cstdlib> #include <cstdlib>
#include <cstring>
#include <fstream> #include <fstream>
#include <iostream> #include <iostream>
#include <memory>
#include <optional> #include <optional>
#include <type_traits> #include <type_traits>
#include <vector> #include <vector>
#include "../evaluate.h" #include "../evaluate.h"
#include "../incbin/incbin.h" #include "../incbin/incbin.h"
#include "../memory.h"
#include "../misc.h" #include "../misc.h"
#include "../position.h" #include "../position.h"
#include "../types.h" #include "../types.h"
@ -85,23 +85,6 @@ namespace Stockfish::Eval::NNUE {
namespace Detail { namespace Detail {
// Initialize the evaluation function parameters
template<typename T>
void initialize(AlignedPtr<T>& pointer) {
pointer.reset(reinterpret_cast<T*>(std_aligned_alloc(alignof(T), sizeof(T))));
std::memset(pointer.get(), 0, sizeof(T));
}
template<typename T>
void initialize(LargePagePtr<T>& pointer) {
static_assert(alignof(T) <= 4096,
"aligned_large_pages_alloc() may fail for such a big alignment requirement of T");
pointer.reset(reinterpret_cast<T*>(aligned_large_pages_alloc(sizeof(T))));
std::memset(pointer.get(), 0, sizeof(T));
}
// Read evaluation function parameters // Read evaluation function parameters
template<typename T> template<typename T>
bool read_parameters(std::istream& stream, T& reference) { bool read_parameters(std::istream& stream, T& reference) {
@ -123,6 +106,42 @@ bool write_parameters(std::ostream& stream, const T& reference) {
} // namespace Detail } // namespace Detail
template<typename Arch, typename Transformer>
Network<Arch, Transformer>::Network(const Network<Arch, Transformer>& other) :
evalFile(other.evalFile),
embeddedType(other.embeddedType) {
if (other.featureTransformer)
featureTransformer = make_unique_large_page<Transformer>(*other.featureTransformer);
network = make_unique_aligned<Arch[]>(LayerStacks);
if (!other.network)
return;
for (std::size_t i = 0; i < LayerStacks; ++i)
network[i] = other.network[i];
}
template<typename Arch, typename Transformer>
Network<Arch, Transformer>&
Network<Arch, Transformer>::operator=(const Network<Arch, Transformer>& other) {
evalFile = other.evalFile;
embeddedType = other.embeddedType;
if (other.featureTransformer)
featureTransformer = make_unique_large_page<Transformer>(*other.featureTransformer);
network = make_unique_aligned<Arch[]>(LayerStacks);
if (!other.network)
return *this;
for (std::size_t i = 0; i < LayerStacks; ++i)
network[i] = other.network[i];
return *this;
}
template<typename Arch, typename Transformer> template<typename Arch, typename Transformer>
void Network<Arch, Transformer>::load(const std::string& rootDirectory, std::string evalfilePath) { void Network<Arch, Transformer>::load(const std::string& rootDirectory, std::string evalfilePath) {
@ -186,15 +205,13 @@ bool Network<Arch, Transformer>::save(const std::optional<std::string>& filename
template<typename Arch, typename Transformer> template<typename Arch, typename Transformer>
Value Network<Arch, Transformer>::evaluate(const Position& pos, NetworkOutput
AccumulatorCaches::Cache<FTDimensions>* cache, Network<Arch, Transformer>::evaluate(const Position& pos,
bool adjusted, AccumulatorCaches::Cache<FTDimensions>* cache) const {
int* complexity) const {
// We manually align the arrays on the stack because with gcc < 9.3 // We manually align the arrays on the stack because with gcc < 9.3
// overaligning stack variables with alignas() doesn't work correctly. // overaligning stack variables with alignas() doesn't work correctly.
constexpr uint64_t alignment = CacheLineSize; constexpr uint64_t alignment = CacheLineSize;
constexpr int delta = 24;
#if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN) #if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN)
TransformedFeatureType TransformedFeatureType
@ -211,50 +228,50 @@ Value Network<Arch, Transformer>::evaluate(const Position&
const int bucket = (pos.count<ALL_PIECES>() - 1) / 4; const int bucket = (pos.count<ALL_PIECES>() - 1) / 4;
const auto psqt = featureTransformer->transform(pos, cache, transformedFeatures, bucket); const auto psqt = featureTransformer->transform(pos, cache, transformedFeatures, bucket);
const auto positional = network[bucket]->propagate(transformedFeatures); const auto positional = network[bucket].propagate(transformedFeatures);
return {static_cast<Value>(psqt / OutputScale), static_cast<Value>(positional / OutputScale)};
if (complexity)
*complexity = std::abs(psqt - positional) / OutputScale;
// Give more value to positional evaluation when adjusted flag is set
if (adjusted)
return static_cast<Value>(((1024 - delta) * psqt + (1024 + delta) * positional)
/ (1024 * OutputScale));
else
return static_cast<Value>((psqt + positional) / OutputScale);
} }
template<typename Arch, typename Transformer> template<typename Arch, typename Transformer>
void Network<Arch, Transformer>::verify(std::string evalfilePath) const { void Network<Arch, Transformer>::verify(std::string evalfilePath,
const std::function<void(std::string_view)>& f) const {
if (evalfilePath.empty()) if (evalfilePath.empty())
evalfilePath = evalFile.defaultName; evalfilePath = evalFile.defaultName;
if (evalFile.current != evalfilePath) if (evalFile.current != evalfilePath)
{ {
std::string msg1 = if (f)
"Network evaluation parameters compatible with the engine must be available."; {
std::string msg2 = "The network file " + evalfilePath + " was not loaded successfully."; std::string msg1 =
std::string msg3 = "The UCI option EvalFile might need to specify the full path, " "Network evaluation parameters compatible with the engine must be available.";
"including the directory name, to the network file."; std::string msg2 = "The network file " + evalfilePath + " was not loaded successfully.";
std::string msg4 = "The default net can be downloaded from: " std::string msg3 = "The UCI option EvalFile might need to specify the full path, "
"https://tests.stockfishchess.org/api/nn/" "including the directory name, to the network file.";
+ evalFile.defaultName; std::string msg4 = "The default net can be downloaded from: "
std::string msg5 = "The engine will be terminated now."; "https://tests.stockfishchess.org/api/nn/"
+ evalFile.defaultName;
std::string msg5 = "The engine will be terminated now.";
std::string msg = "ERROR: " + msg1 + '\n' + "ERROR: " + msg2 + '\n' + "ERROR: " + msg3
+ '\n' + "ERROR: " + msg4 + '\n' + "ERROR: " + msg5 + '\n';
f(msg);
}
sync_cout << "info string ERROR: " << msg1 << sync_endl;
sync_cout << "info string ERROR: " << msg2 << sync_endl;
sync_cout << "info string ERROR: " << msg3 << sync_endl;
sync_cout << "info string ERROR: " << msg4 << sync_endl;
sync_cout << "info string ERROR: " << msg5 << sync_endl;
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
size_t size = sizeof(*featureTransformer) + sizeof(*network) * LayerStacks; if (f)
sync_cout << "info string NNUE evaluation using " << evalfilePath << " (" {
<< size / (1024 * 1024) << "MiB, (" << featureTransformer->InputDimensions << ", " size_t size = sizeof(*featureTransformer) + sizeof(Arch) * LayerStacks;
<< network[0]->TransformedFeatureDimensions << ", " << network[0]->FC_0_OUTPUTS f("info string NNUE evaluation using " + evalfilePath + " ("
<< ", " << network[0]->FC_1_OUTPUTS << ", 1))" << sync_endl; + std::to_string(size / (1024 * 1024)) + "MiB, ("
+ std::to_string(featureTransformer->InputDimensions) + ", "
+ std::to_string(network[0].TransformedFeatureDimensions) + ", "
+ std::to_string(network[0].FC_0_OUTPUTS) + ", " + std::to_string(network[0].FC_1_OUTPUTS)
+ ", 1))");
}
} }
@ -291,7 +308,7 @@ Network<Arch, Transformer>::trace_evaluate(const Position&
{ {
const auto materialist = const auto materialist =
featureTransformer->transform(pos, cache, transformedFeatures, bucket); featureTransformer->transform(pos, cache, transformedFeatures, bucket);
const auto positional = network[bucket]->propagate(transformedFeatures); const auto positional = network[bucket].propagate(transformedFeatures);
t.psqt[bucket] = static_cast<Value>(materialist / OutputScale); t.psqt[bucket] = static_cast<Value>(materialist / OutputScale);
t.positional[bucket] = static_cast<Value>(positional / OutputScale); t.positional[bucket] = static_cast<Value>(positional / OutputScale);
@ -344,9 +361,8 @@ void Network<Arch, Transformer>::load_internal() {
template<typename Arch, typename Transformer> template<typename Arch, typename Transformer>
void Network<Arch, Transformer>::initialize() { void Network<Arch, Transformer>::initialize() {
Detail::initialize(featureTransformer); featureTransformer = make_unique_large_page<Transformer>();
for (std::size_t i = 0; i < LayerStacks; ++i) network = make_unique_aligned<Arch[]>(LayerStacks);
Detail::initialize(network[i]);
} }
@ -413,7 +429,7 @@ bool Network<Arch, Transformer>::read_parameters(std::istream& stream,
return false; return false;
for (std::size_t i = 0; i < LayerStacks; ++i) for (std::size_t i = 0; i < LayerStacks; ++i)
{ {
if (!Detail::read_parameters(stream, *(network[i]))) if (!Detail::read_parameters(stream, network[i]))
return false; return false;
} }
return stream && stream.peek() == std::ios::traits_type::eof(); return stream && stream.peek() == std::ios::traits_type::eof();
@ -429,7 +445,7 @@ bool Network<Arch, Transformer>::write_parameters(std::ostream& stream,
return false; return false;
for (std::size_t i = 0; i < LayerStacks; ++i) for (std::size_t i = 0; i < LayerStacks; ++i)
{ {
if (!Detail::write_parameters(stream, *(network[i]))) if (!Detail::write_parameters(stream, network[i]))
return false; return false;
} }
return bool(stream); return bool(stream);

View file

@ -20,18 +20,21 @@
#define NETWORK_H_INCLUDED #define NETWORK_H_INCLUDED
#include <cstdint> #include <cstdint>
#include <functional>
#include <iostream> #include <iostream>
#include <optional> #include <optional>
#include <string> #include <string>
#include <string_view>
#include <tuple>
#include <utility> #include <utility>
#include "../misc.h" #include "../memory.h"
#include "../position.h" #include "../position.h"
#include "../types.h" #include "../types.h"
#include "nnue_accumulator.h"
#include "nnue_architecture.h" #include "nnue_architecture.h"
#include "nnue_feature_transformer.h" #include "nnue_feature_transformer.h"
#include "nnue_misc.h" #include "nnue_misc.h"
#include "nnue_accumulator.h"
namespace Stockfish::Eval::NNUE { namespace Stockfish::Eval::NNUE {
@ -40,6 +43,7 @@ enum class EmbeddedNNUEType {
SMALL, SMALL,
}; };
using NetworkOutput = std::tuple<Value, Value>;
template<typename Arch, typename Transformer> template<typename Arch, typename Transformer>
class Network { class Network {
@ -50,19 +54,23 @@ class Network {
evalFile(file), evalFile(file),
embeddedType(type) {} embeddedType(type) {}
Network(const Network& other);
Network(Network&& other) = default;
Network& operator=(const Network& other);
Network& operator=(Network&& other) = default;
void load(const std::string& rootDirectory, std::string evalfilePath); void load(const std::string& rootDirectory, std::string evalfilePath);
bool save(const std::optional<std::string>& filename) const; bool save(const std::optional<std::string>& filename) const;
Value evaluate(const Position& pos, NetworkOutput evaluate(const Position& pos,
AccumulatorCaches::Cache<FTDimensions>* cache, AccumulatorCaches::Cache<FTDimensions>* cache) const;
bool adjusted = false,
int* complexity = nullptr) const;
void hint_common_access(const Position& pos, void hint_common_access(const Position& pos,
AccumulatorCaches::Cache<FTDimensions>* cache) const; AccumulatorCaches::Cache<FTDimensions>* cache) const;
void verify(std::string evalfilePath) const; void verify(std::string evalfilePath, const std::function<void(std::string_view)>&) const;
NnueEvalTrace trace_evaluate(const Position& pos, NnueEvalTrace trace_evaluate(const Position& pos,
AccumulatorCaches::Cache<FTDimensions>* cache) const; AccumulatorCaches::Cache<FTDimensions>* cache) const;
@ -85,7 +93,7 @@ class Network {
LargePagePtr<Transformer> featureTransformer; LargePagePtr<Transformer> featureTransformer;
// Evaluation function // Evaluation function
AlignedPtr<Arch> network[LayerStacks]; AlignedPtr<Arch[]> network;
EvalFile evalFile; EvalFile evalFile;
EmbeddedNNUEType embeddedType; EmbeddedNNUEType embeddedType;

View file

@ -80,11 +80,6 @@ struct AccumulatorCaches {
entry.clear(network.featureTransformer->biases); entry.clear(network.featureTransformer->biases);
} }
void clear(const BiasType* biases) {
for (auto& entry : entries)
entry.clear(biases);
}
std::array<Entry, COLOR_NB>& operator[](Square sq) { return entries[sq]; } std::array<Entry, COLOR_NB>& operator[](Square sq) { return entries[sq]; }
std::array<std::array<Entry, COLOR_NB>, SQUARE_NB> entries; std::array<std::array<Entry, COLOR_NB>, SQUARE_NB> entries;

View file

@ -55,14 +55,14 @@ using psqt_vec_t = __m256i;
#define vec_store(a, b) _mm512_store_si512(a, b) #define vec_store(a, b) _mm512_store_si512(a, b)
#define vec_add_16(a, b) _mm512_add_epi16(a, b) #define vec_add_16(a, b) _mm512_add_epi16(a, b)
#define vec_sub_16(a, b) _mm512_sub_epi16(a, b) #define vec_sub_16(a, b) _mm512_sub_epi16(a, b)
#define vec_mul_16(a, b) _mm512_mullo_epi16(a, b) #define vec_mulhi_16(a, b) _mm512_mulhi_epi16(a, b)
#define vec_zero() _mm512_setzero_epi32() #define vec_zero() _mm512_setzero_epi32()
#define vec_set_16(a) _mm512_set1_epi16(a) #define vec_set_16(a) _mm512_set1_epi16(a)
#define vec_max_16(a, b) _mm512_max_epi16(a, b) #define vec_max_16(a, b) _mm512_max_epi16(a, b)
#define vec_min_16(a, b) _mm512_min_epi16(a, b) #define vec_min_16(a, b) _mm512_min_epi16(a, b)
#define vec_slli_16(a, b) _mm512_slli_epi16(a, b)
// Inverse permuted at load time // Inverse permuted at load time
#define vec_msb_pack_16(a, b) \ #define vec_packus_16(a, b) _mm512_packus_epi16(a, b)
_mm512_packs_epi16(_mm512_srli_epi16(a, 7), _mm512_srli_epi16(b, 7))
#define vec_load_psqt(a) _mm256_load_si256(a) #define vec_load_psqt(a) _mm256_load_si256(a)
#define vec_store_psqt(a, b) _mm256_store_si256(a, b) #define vec_store_psqt(a, b) _mm256_store_si256(a, b)
#define vec_add_psqt_32(a, b) _mm256_add_epi32(a, b) #define vec_add_psqt_32(a, b) _mm256_add_epi32(a, b)
@ -78,14 +78,14 @@ using psqt_vec_t = __m256i;
#define vec_store(a, b) _mm256_store_si256(a, b) #define vec_store(a, b) _mm256_store_si256(a, b)
#define vec_add_16(a, b) _mm256_add_epi16(a, b) #define vec_add_16(a, b) _mm256_add_epi16(a, b)
#define vec_sub_16(a, b) _mm256_sub_epi16(a, b) #define vec_sub_16(a, b) _mm256_sub_epi16(a, b)
#define vec_mul_16(a, b) _mm256_mullo_epi16(a, b) #define vec_mulhi_16(a, b) _mm256_mulhi_epi16(a, b)
#define vec_zero() _mm256_setzero_si256() #define vec_zero() _mm256_setzero_si256()
#define vec_set_16(a) _mm256_set1_epi16(a) #define vec_set_16(a) _mm256_set1_epi16(a)
#define vec_max_16(a, b) _mm256_max_epi16(a, b) #define vec_max_16(a, b) _mm256_max_epi16(a, b)
#define vec_min_16(a, b) _mm256_min_epi16(a, b) #define vec_min_16(a, b) _mm256_min_epi16(a, b)
#define vec_slli_16(a, b) _mm256_slli_epi16(a, b)
// Inverse permuted at load time // Inverse permuted at load time
#define vec_msb_pack_16(a, b) \ #define vec_packus_16(a, b) _mm256_packus_epi16(a, b)
_mm256_packs_epi16(_mm256_srli_epi16(a, 7), _mm256_srli_epi16(b, 7))
#define vec_load_psqt(a) _mm256_load_si256(a) #define vec_load_psqt(a) _mm256_load_si256(a)
#define vec_store_psqt(a, b) _mm256_store_si256(a, b) #define vec_store_psqt(a, b) _mm256_store_si256(a, b)
#define vec_add_psqt_32(a, b) _mm256_add_epi32(a, b) #define vec_add_psqt_32(a, b) _mm256_add_epi32(a, b)
@ -101,12 +101,13 @@ using psqt_vec_t = __m128i;
#define vec_store(a, b) *(a) = (b) #define vec_store(a, b) *(a) = (b)
#define vec_add_16(a, b) _mm_add_epi16(a, b) #define vec_add_16(a, b) _mm_add_epi16(a, b)
#define vec_sub_16(a, b) _mm_sub_epi16(a, b) #define vec_sub_16(a, b) _mm_sub_epi16(a, b)
#define vec_mul_16(a, b) _mm_mullo_epi16(a, b) #define vec_mulhi_16(a, b) _mm_mulhi_epi16(a, b)
#define vec_zero() _mm_setzero_si128() #define vec_zero() _mm_setzero_si128()
#define vec_set_16(a) _mm_set1_epi16(a) #define vec_set_16(a) _mm_set1_epi16(a)
#define vec_max_16(a, b) _mm_max_epi16(a, b) #define vec_max_16(a, b) _mm_max_epi16(a, b)
#define vec_min_16(a, b) _mm_min_epi16(a, b) #define vec_min_16(a, b) _mm_min_epi16(a, b)
#define vec_msb_pack_16(a, b) _mm_packs_epi16(_mm_srli_epi16(a, 7), _mm_srli_epi16(b, 7)) #define vec_slli_16(a, b) _mm_slli_epi16(a, b)
#define vec_packus_16(a, b) _mm_packus_epi16(a, b)
#define vec_load_psqt(a) (*(a)) #define vec_load_psqt(a) (*(a))
#define vec_store_psqt(a, b) *(a) = (b) #define vec_store_psqt(a, b) *(a) = (b)
#define vec_add_psqt_32(a, b) _mm_add_epi32(a, b) #define vec_add_psqt_32(a, b) _mm_add_epi32(a, b)
@ -122,18 +123,14 @@ using psqt_vec_t = int32x4_t;
#define vec_store(a, b) *(a) = (b) #define vec_store(a, b) *(a) = (b)
#define vec_add_16(a, b) vaddq_s16(a, b) #define vec_add_16(a, b) vaddq_s16(a, b)
#define vec_sub_16(a, b) vsubq_s16(a, b) #define vec_sub_16(a, b) vsubq_s16(a, b)
#define vec_mul_16(a, b) vmulq_s16(a, b) #define vec_mulhi_16(a, b) vqdmulhq_s16(a, b)
#define vec_zero() \ #define vec_zero() \
vec_t { 0 } vec_t { 0 }
#define vec_set_16(a) vdupq_n_s16(a) #define vec_set_16(a) vdupq_n_s16(a)
#define vec_max_16(a, b) vmaxq_s16(a, b) #define vec_max_16(a, b) vmaxq_s16(a, b)
#define vec_min_16(a, b) vminq_s16(a, b) #define vec_min_16(a, b) vminq_s16(a, b)
inline vec_t vec_msb_pack_16(vec_t a, vec_t b) { #define vec_slli_16(a, b) vshlq_s16(a, vec_set_16(b))
const int8x8_t shifta = vshrn_n_s16(a, 7); #define vec_packus_16(a, b) reinterpret_cast<vec_t>(vcombine_u8(vqmovun_s16(a), vqmovun_s16(b)))
const int8x8_t shiftb = vshrn_n_s16(b, 7);
const int8x16_t compacted = vcombine_s8(shifta, shiftb);
return *reinterpret_cast<const vec_t*>(&compacted);
}
#define vec_load_psqt(a) (*(a)) #define vec_load_psqt(a) (*(a))
#define vec_store_psqt(a, b) *(a) = (b) #define vec_store_psqt(a, b) *(a) = (b)
#define vec_add_psqt_32(a, b) vaddq_s32(a, b) #define vec_add_psqt_32(a, b) vaddq_s32(a, b)
@ -281,6 +278,19 @@ class FeatureTransformer {
#endif #endif
} }
inline void scale_weights(bool read) const {
for (IndexType j = 0; j < InputDimensions; ++j)
{
WeightType* w = const_cast<WeightType*>(&weights[j * HalfDimensions]);
for (IndexType i = 0; i < HalfDimensions; ++i)
w[i] = read ? w[i] * 2 : w[i] / 2;
}
BiasType* b = const_cast<BiasType*>(biases);
for (IndexType i = 0; i < HalfDimensions; ++i)
b[i] = read ? b[i] * 2 : b[i] / 2;
}
// Read network parameters // Read network parameters
bool read_parameters(std::istream& stream) { bool read_parameters(std::istream& stream) {
@ -289,6 +299,7 @@ class FeatureTransformer {
read_leb_128<PSQTWeightType>(stream, psqtWeights, PSQTBuckets * InputDimensions); read_leb_128<PSQTWeightType>(stream, psqtWeights, PSQTBuckets * InputDimensions);
permute_weights(inverse_order_packs); permute_weights(inverse_order_packs);
scale_weights(true);
return !stream.fail(); return !stream.fail();
} }
@ -296,12 +307,14 @@ class FeatureTransformer {
bool write_parameters(std::ostream& stream) const { bool write_parameters(std::ostream& stream) const {
permute_weights(order_packs); permute_weights(order_packs);
scale_weights(false);
write_leb_128<BiasType>(stream, biases, HalfDimensions); write_leb_128<BiasType>(stream, biases, HalfDimensions);
write_leb_128<WeightType>(stream, weights, HalfDimensions * InputDimensions); write_leb_128<WeightType>(stream, weights, HalfDimensions * InputDimensions);
write_leb_128<PSQTWeightType>(stream, psqtWeights, PSQTBuckets * InputDimensions); write_leb_128<PSQTWeightType>(stream, psqtWeights, PSQTBuckets * InputDimensions);
permute_weights(inverse_order_packs); permute_weights(inverse_order_packs);
scale_weights(true);
return !stream.fail(); return !stream.fail();
} }
@ -332,24 +345,86 @@ class FeatureTransformer {
constexpr IndexType NumOutputChunks = HalfDimensions / 2 / OutputChunkSize; constexpr IndexType NumOutputChunks = HalfDimensions / 2 / OutputChunkSize;
const vec_t Zero = vec_zero(); const vec_t Zero = vec_zero();
const vec_t One = vec_set_16(127); const vec_t One = vec_set_16(127 * 2);
const vec_t* in0 = reinterpret_cast<const vec_t*>(&(accumulation[perspectives[p]][0])); const vec_t* in0 = reinterpret_cast<const vec_t*>(&(accumulation[perspectives[p]][0]));
const vec_t* in1 = const vec_t* in1 =
reinterpret_cast<const vec_t*>(&(accumulation[perspectives[p]][HalfDimensions / 2])); reinterpret_cast<const vec_t*>(&(accumulation[perspectives[p]][HalfDimensions / 2]));
vec_t* out = reinterpret_cast<vec_t*>(output + offset); vec_t* out = reinterpret_cast<vec_t*>(output + offset);
// Per the NNUE architecture, here we want to multiply pairs of
// clipped elements and divide the product by 128. To do this,
// we can naively perform min/max operation to clip each of the
// four int16 vectors, mullo pairs together, then pack them into
// one int8 vector. However, there exists a faster way.
// The idea here is to use the implicit clipping from packus to
// save us two vec_max_16 instructions. This clipping works due
// to the fact that any int16 integer below zero will be zeroed
// on packus.
// Consider the case where the second element is negative.
// If we do standard clipping, that element will be zero, which
// means our pairwise product is zero. If we perform packus and
// remove the lower-side clip for the second element, then our
// product before packus will be negative, and is zeroed on pack.
// The two operation produce equivalent results, but the second
// one (using packus) saves one max operation per pair.
// But here we run into a problem: mullo does not preserve the
// sign of the multiplication. We can get around this by doing
// mulhi, which keeps the sign. But that requires an additional
// tweak.
// mulhi cuts off the last 16 bits of the resulting product,
// which is the same as performing a rightward shift of 16 bits.
// We can use this to our advantage. Recall that we want to
// divide the final product by 128, which is equivalent to a
// 7-bit right shift. Intuitively, if we shift the clipped
// value left by 9, and perform mulhi, which shifts the product
// right by 16 bits, then we will net a right shift of 7 bits.
// However, this won't work as intended. Since we clip the
// values to have a maximum value of 127, shifting it by 9 bits
// might occupy the signed bit, resulting in some positive
// values being interpreted as negative after the shift.
// There is a way, however, to get around this limitation. When
// loading the network, scale accumulator weights and biases by
// 2. To get the same pairwise multiplication result as before,
// we need to divide the product by 128 * 2 * 2 = 512, which
// amounts to a right shift of 9 bits. So now we only have to
// shift left by 7 bits, perform mulhi (shifts right by 16 bits)
// and net a 9 bit right shift. Since we scaled everything by
// two, the values are clipped at 127 * 2 = 254, which occupies
// 8 bits. Shifting it by 7 bits left will no longer occupy the
// signed bit, so we are safe.
// Note that on NEON processors, we shift left by 6 instead
// because the instruction "vqdmulhq_s16" also doubles the
// return value after the multiplication, adding an extra shift
// to the left by 1, so we compensate by shifting less before
// the multiplication.
constexpr int shift =
#if defined(USE_SSE2)
7;
#else
6;
#endif
for (IndexType j = 0; j < NumOutputChunks; ++j) for (IndexType j = 0; j < NumOutputChunks; ++j)
{ {
const vec_t sum0a = vec_max_16(vec_min_16(in0[j * 2 + 0], One), Zero); const vec_t sum0a =
const vec_t sum0b = vec_max_16(vec_min_16(in0[j * 2 + 1], One), Zero); vec_slli_16(vec_max_16(vec_min_16(in0[j * 2 + 0], One), Zero), shift);
const vec_t sum1a = vec_max_16(vec_min_16(in1[j * 2 + 0], One), Zero); const vec_t sum0b =
const vec_t sum1b = vec_max_16(vec_min_16(in1[j * 2 + 1], One), Zero); vec_slli_16(vec_max_16(vec_min_16(in0[j * 2 + 1], One), Zero), shift);
const vec_t sum1a = vec_min_16(in1[j * 2 + 0], One);
const vec_t sum1b = vec_min_16(in1[j * 2 + 1], One);
const vec_t pa = vec_mul_16(sum0a, sum1a); const vec_t pa = vec_mulhi_16(sum0a, sum1a);
const vec_t pb = vec_mul_16(sum0b, sum1b); const vec_t pb = vec_mulhi_16(sum0b, sum1b);
out[j] = vec_msb_pack_16(pa, pb); out[j] = vec_packus_16(pa, pb);
} }
#else #else
@ -359,9 +434,9 @@ class FeatureTransformer {
BiasType sum0 = accumulation[static_cast<int>(perspectives[p])][j + 0]; BiasType sum0 = accumulation[static_cast<int>(perspectives[p])][j + 0];
BiasType sum1 = BiasType sum1 =
accumulation[static_cast<int>(perspectives[p])][j + HalfDimensions / 2]; accumulation[static_cast<int>(perspectives[p])][j + HalfDimensions / 2];
sum0 = std::clamp<BiasType>(sum0, 0, 127); sum0 = std::clamp<BiasType>(sum0, 0, 127 * 2);
sum1 = std::clamp<BiasType>(sum1, 0, 127); sum1 = std::clamp<BiasType>(sum1, 0, 127 * 2);
output[offset + j] = static_cast<OutputType>(unsigned(sum0 * sum1) / 128); output[offset + j] = static_cast<OutputType>(unsigned(sum0 * sum1) / 512);
} }
#endif #endif
@ -378,11 +453,10 @@ class FeatureTransformer {
private: private:
template<Color Perspective> template<Color Perspective>
[[nodiscard]] std::pair<StateInfo*, StateInfo*> StateInfo* try_find_computed_accumulator(const Position& pos) const {
try_find_computed_accumulator(const Position& pos) const {
// Look for a usable accumulator of an earlier position. We keep track // Look for a usable accumulator of an earlier position. We keep track
// of the estimated gain in terms of features to be added/subtracted. // of the estimated gain in terms of features to be added/subtracted.
StateInfo *st = pos.state(), *next = nullptr; StateInfo* st = pos.state();
int gain = FeatureSet::refresh_cost(pos); int gain = FeatureSet::refresh_cost(pos);
while (st->previous && !(st->*accPtr).computed[Perspective]) while (st->previous && !(st->*accPtr).computed[Perspective])
{ {
@ -391,236 +465,205 @@ class FeatureTransformer {
if (FeatureSet::requires_refresh(st, Perspective) if (FeatureSet::requires_refresh(st, Perspective)
|| (gain -= FeatureSet::update_cost(st) + 1) < 0) || (gain -= FeatureSet::update_cost(st) + 1) < 0)
break; break;
next = st; st = st->previous;
st = st->previous;
} }
return {st, next}; return st;
} }
// NOTE: The parameter states_to_update is an array of position states. // It computes the accumulator of the next position, or updates the
// All states must be sequential, that is states_to_update[i] must either be reachable // current position's accumulator if CurrentOnly is true.
// by repeatedly applying ->previous from states_to_update[i+1]. template<Color Perspective, bool CurrentOnly>
// computed_st must be reachable by repeatedly applying ->previous on void update_accumulator_incremental(const Position& pos, StateInfo* computed) const {
// states_to_update[0]. assert((computed->*accPtr).computed[Perspective]);
template<Color Perspective, size_t N> assert(computed->next != nullptr);
void update_accumulator_incremental(const Position& pos,
StateInfo* computed_st,
StateInfo* states_to_update[N]) const {
static_assert(N > 0);
assert([&]() {
for (size_t i = 0; i < N; ++i)
{
if (states_to_update[i] == nullptr)
return false;
}
return true;
}());
#ifdef VECTOR #ifdef VECTOR
// Gcc-10.2 unnecessarily spills AVX2 registers if this array // Gcc-10.2 unnecessarily spills AVX2 registers if this array
// is defined in the VECTOR code below, once in each branch // is defined in the VECTOR code below, once in each branch.
vec_t acc[NumRegs]; vec_t acc[NumRegs];
psqt_vec_t psqt[NumPsqtRegs]; psqt_vec_t psqt[NumPsqtRegs];
#endif #endif
// Update incrementally going back through states_to_update.
// Gather all features to be updated.
const Square ksq = pos.square<KING>(Perspective); const Square ksq = pos.square<KING>(Perspective);
// The size must be enough to contain the largest possible update. // The size must be enough to contain the largest possible update.
// That might depend on the feature set and generally relies on the // That might depend on the feature set and generally relies on the
// feature set's update cost calculation to be correct and never allow // feature set's update cost calculation to be correct and never allow
// updates with more added/removed features than MaxActiveDimensions. // updates with more added/removed features than MaxActiveDimensions.
FeatureSet::IndexList removed[N], added[N]; FeatureSet::IndexList removed, added;
for (int i = N - 1; i >= 0; --i) if constexpr (CurrentOnly)
{ for (StateInfo* st = pos.state(); st != computed; st = st->previous)
(states_to_update[i]->*accPtr).computed[Perspective] = true; FeatureSet::append_changed_indices<Perspective>(ksq, st->dirtyPiece, removed,
added);
else
FeatureSet::append_changed_indices<Perspective>(ksq, computed->next->dirtyPiece,
removed, added);
const StateInfo* end_state = i == 0 ? computed_st : states_to_update[i - 1]; StateInfo* next = CurrentOnly ? pos.state() : computed->next;
assert(!(next->*accPtr).computed[Perspective]);
for (StateInfo* st2 = states_to_update[i]; st2 != end_state; st2 = st2->previous)
FeatureSet::append_changed_indices<Perspective>(ksq, st2->dirtyPiece, removed[i],
added[i]);
}
StateInfo* st = computed_st;
// Now update the accumulators listed in states_to_update[], where the last element is a sentinel.
#ifdef VECTOR #ifdef VECTOR
if ((removed.size() == 1 || removed.size() == 2) && added.size() == 1)
if (N == 1 && (removed[0].size() == 1 || removed[0].size() == 2) && added[0].size() == 1)
{ {
assert(states_to_update[0]);
auto accIn = auto accIn =
reinterpret_cast<const vec_t*>(&(st->*accPtr).accumulation[Perspective][0]); reinterpret_cast<const vec_t*>(&(computed->*accPtr).accumulation[Perspective][0]);
auto accOut = reinterpret_cast<vec_t*>( auto accOut = reinterpret_cast<vec_t*>(&(next->*accPtr).accumulation[Perspective][0]);
&(states_to_update[0]->*accPtr).accumulation[Perspective][0]);
const IndexType offsetR0 = HalfDimensions * removed[0][0]; const IndexType offsetR0 = HalfDimensions * removed[0];
auto columnR0 = reinterpret_cast<const vec_t*>(&weights[offsetR0]); auto columnR0 = reinterpret_cast<const vec_t*>(&weights[offsetR0]);
const IndexType offsetA = HalfDimensions * added[0][0]; const IndexType offsetA = HalfDimensions * added[0];
auto columnA = reinterpret_cast<const vec_t*>(&weights[offsetA]); auto columnA = reinterpret_cast<const vec_t*>(&weights[offsetA]);
if (removed[0].size() == 1) if (removed.size() == 1)
{ {
for (IndexType k = 0; k < HalfDimensions * sizeof(std::int16_t) / sizeof(vec_t); for (IndexType i = 0; i < HalfDimensions * sizeof(WeightType) / sizeof(vec_t); ++i)
++k) accOut[i] = vec_add_16(vec_sub_16(accIn[i], columnR0[i]), columnA[i]);
accOut[k] = vec_add_16(vec_sub_16(accIn[k], columnR0[k]), columnA[k]);
} }
else else
{ {
const IndexType offsetR1 = HalfDimensions * removed[0][1]; const IndexType offsetR1 = HalfDimensions * removed[1];
auto columnR1 = reinterpret_cast<const vec_t*>(&weights[offsetR1]); auto columnR1 = reinterpret_cast<const vec_t*>(&weights[offsetR1]);
for (IndexType k = 0; k < HalfDimensions * sizeof(std::int16_t) / sizeof(vec_t); for (IndexType i = 0; i < HalfDimensions * sizeof(WeightType) / sizeof(vec_t); ++i)
++k) accOut[i] = vec_sub_16(vec_add_16(accIn[i], columnA[i]),
accOut[k] = vec_sub_16(vec_add_16(accIn[k], columnA[k]), vec_add_16(columnR0[i], columnR1[i]));
vec_add_16(columnR0[k], columnR1[k]));
} }
auto accPsqtIn = auto accPsqtIn = reinterpret_cast<const psqt_vec_t*>(
reinterpret_cast<const psqt_vec_t*>(&(st->*accPtr).psqtAccumulation[Perspective][0]); &(computed->*accPtr).psqtAccumulation[Perspective][0]);
auto accPsqtOut = reinterpret_cast<psqt_vec_t*>( auto accPsqtOut =
&(states_to_update[0]->*accPtr).psqtAccumulation[Perspective][0]); reinterpret_cast<psqt_vec_t*>(&(next->*accPtr).psqtAccumulation[Perspective][0]);
const IndexType offsetPsqtR0 = PSQTBuckets * removed[0][0]; const IndexType offsetPsqtR0 = PSQTBuckets * removed[0];
auto columnPsqtR0 = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offsetPsqtR0]); auto columnPsqtR0 = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offsetPsqtR0]);
const IndexType offsetPsqtA = PSQTBuckets * added[0][0]; const IndexType offsetPsqtA = PSQTBuckets * added[0];
auto columnPsqtA = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offsetPsqtA]); auto columnPsqtA = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offsetPsqtA]);
if (removed[0].size() == 1) if (removed.size() == 1)
{ {
for (std::size_t k = 0; k < PSQTBuckets * sizeof(std::int32_t) / sizeof(psqt_vec_t); for (std::size_t i = 0;
++k) i < PSQTBuckets * sizeof(PSQTWeightType) / sizeof(psqt_vec_t); ++i)
accPsqtOut[k] = vec_add_psqt_32(vec_sub_psqt_32(accPsqtIn[k], columnPsqtR0[k]), accPsqtOut[i] = vec_add_psqt_32(vec_sub_psqt_32(accPsqtIn[i], columnPsqtR0[i]),
columnPsqtA[k]); columnPsqtA[i]);
} }
else else
{ {
const IndexType offsetPsqtR1 = PSQTBuckets * removed[0][1]; const IndexType offsetPsqtR1 = PSQTBuckets * removed[1];
auto columnPsqtR1 = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offsetPsqtR1]); auto columnPsqtR1 = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offsetPsqtR1]);
for (std::size_t k = 0; k < PSQTBuckets * sizeof(std::int32_t) / sizeof(psqt_vec_t); for (std::size_t i = 0;
++k) i < PSQTBuckets * sizeof(PSQTWeightType) / sizeof(psqt_vec_t); ++i)
accPsqtOut[k] = accPsqtOut[i] =
vec_sub_psqt_32(vec_add_psqt_32(accPsqtIn[k], columnPsqtA[k]), vec_sub_psqt_32(vec_add_psqt_32(accPsqtIn[i], columnPsqtA[i]),
vec_add_psqt_32(columnPsqtR0[k], columnPsqtR1[k])); vec_add_psqt_32(columnPsqtR0[i], columnPsqtR1[i]));
} }
} }
else else
{ {
for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j) for (IndexType i = 0; i < HalfDimensions / TileHeight; ++i)
{ {
// Load accumulator // Load accumulator
auto accTileIn = reinterpret_cast<const vec_t*>( auto accTileIn = reinterpret_cast<const vec_t*>(
&(st->*accPtr).accumulation[Perspective][j * TileHeight]); &(computed->*accPtr).accumulation[Perspective][i * TileHeight]);
for (IndexType k = 0; k < NumRegs; ++k) for (IndexType j = 0; j < NumRegs; ++j)
acc[k] = vec_load(&accTileIn[k]); acc[j] = vec_load(&accTileIn[j]);
for (IndexType i = 0; i < N; ++i) // Difference calculation for the deactivated features
for (const auto index : removed)
{ {
// Difference calculation for the deactivated features const IndexType offset = HalfDimensions * index + i * TileHeight;
for (const auto index : removed[i]) auto column = reinterpret_cast<const vec_t*>(&weights[offset]);
{ for (IndexType j = 0; j < NumRegs; ++j)
const IndexType offset = HalfDimensions * index + j * TileHeight; acc[j] = vec_sub_16(acc[j], column[j]);
auto column = reinterpret_cast<const vec_t*>(&weights[offset]);
for (IndexType k = 0; k < NumRegs; ++k)
acc[k] = vec_sub_16(acc[k], column[k]);
}
// Difference calculation for the activated features
for (const auto index : added[i])
{
const IndexType offset = HalfDimensions * index + j * TileHeight;
auto column = reinterpret_cast<const vec_t*>(&weights[offset]);
for (IndexType k = 0; k < NumRegs; ++k)
acc[k] = vec_add_16(acc[k], column[k]);
}
// Store accumulator
auto accTileOut = reinterpret_cast<vec_t*>(
&(states_to_update[i]->*accPtr).accumulation[Perspective][j * TileHeight]);
for (IndexType k = 0; k < NumRegs; ++k)
vec_store(&accTileOut[k], acc[k]);
} }
// Difference calculation for the activated features
for (const auto index : added)
{
const IndexType offset = HalfDimensions * index + i * TileHeight;
auto column = reinterpret_cast<const vec_t*>(&weights[offset]);
for (IndexType j = 0; j < NumRegs; ++j)
acc[j] = vec_add_16(acc[j], column[j]);
}
// Store accumulator
auto accTileOut = reinterpret_cast<vec_t*>(
&(next->*accPtr).accumulation[Perspective][i * TileHeight]);
for (IndexType j = 0; j < NumRegs; ++j)
vec_store(&accTileOut[j], acc[j]);
} }
for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j) for (IndexType i = 0; i < PSQTBuckets / PsqtTileHeight; ++i)
{ {
// Load accumulator // Load accumulator
auto accTilePsqtIn = reinterpret_cast<const psqt_vec_t*>( auto accTilePsqtIn = reinterpret_cast<const psqt_vec_t*>(
&(st->*accPtr).psqtAccumulation[Perspective][j * PsqtTileHeight]); &(computed->*accPtr).psqtAccumulation[Perspective][i * PsqtTileHeight]);
for (std::size_t k = 0; k < NumPsqtRegs; ++k) for (std::size_t j = 0; j < NumPsqtRegs; ++j)
psqt[k] = vec_load_psqt(&accTilePsqtIn[k]); psqt[j] = vec_load_psqt(&accTilePsqtIn[j]);
for (IndexType i = 0; i < N; ++i) // Difference calculation for the deactivated features
for (const auto index : removed)
{ {
// Difference calculation for the deactivated features const IndexType offset = PSQTBuckets * index + i * PsqtTileHeight;
for (const auto index : removed[i]) auto columnPsqt = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offset]);
{ for (std::size_t j = 0; j < NumPsqtRegs; ++j)
const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight; psqt[j] = vec_sub_psqt_32(psqt[j], columnPsqt[j]);
auto columnPsqt = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offset]);
for (std::size_t k = 0; k < NumPsqtRegs; ++k)
psqt[k] = vec_sub_psqt_32(psqt[k], columnPsqt[k]);
}
// Difference calculation for the activated features
for (const auto index : added[i])
{
const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight;
auto columnPsqt = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offset]);
for (std::size_t k = 0; k < NumPsqtRegs; ++k)
psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]);
}
// Store accumulator
auto accTilePsqtOut = reinterpret_cast<psqt_vec_t*>(
&(states_to_update[i]->*accPtr)
.psqtAccumulation[Perspective][j * PsqtTileHeight]);
for (std::size_t k = 0; k < NumPsqtRegs; ++k)
vec_store_psqt(&accTilePsqtOut[k], psqt[k]);
} }
// Difference calculation for the activated features
for (const auto index : added)
{
const IndexType offset = PSQTBuckets * index + i * PsqtTileHeight;
auto columnPsqt = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offset]);
for (std::size_t j = 0; j < NumPsqtRegs; ++j)
psqt[j] = vec_add_psqt_32(psqt[j], columnPsqt[j]);
}
// Store accumulator
auto accTilePsqtOut = reinterpret_cast<psqt_vec_t*>(
&(next->*accPtr).psqtAccumulation[Perspective][i * PsqtTileHeight]);
for (std::size_t j = 0; j < NumPsqtRegs; ++j)
vec_store_psqt(&accTilePsqtOut[j], psqt[j]);
} }
} }
#else #else
for (IndexType i = 0; i < N; ++i) std::memcpy((next->*accPtr).accumulation[Perspective],
(computed->*accPtr).accumulation[Perspective],
HalfDimensions * sizeof(BiasType));
std::memcpy((next->*accPtr).psqtAccumulation[Perspective],
(computed->*accPtr).psqtAccumulation[Perspective],
PSQTBuckets * sizeof(PSQTWeightType));
// Difference calculation for the deactivated features
for (const auto index : removed)
{ {
std::memcpy((states_to_update[i]->*accPtr).accumulation[Perspective], const IndexType offset = HalfDimensions * index;
(st->*accPtr).accumulation[Perspective], HalfDimensions * sizeof(BiasType)); for (IndexType i = 0; i < HalfDimensions; ++i)
(next->*accPtr).accumulation[Perspective][i] -= weights[offset + i];
for (std::size_t k = 0; k < PSQTBuckets; ++k) for (std::size_t i = 0; i < PSQTBuckets; ++i)
(states_to_update[i]->*accPtr).psqtAccumulation[Perspective][k] = (next->*accPtr).psqtAccumulation[Perspective][i] -=
(st->*accPtr).psqtAccumulation[Perspective][k]; psqtWeights[index * PSQTBuckets + i];
}
st = states_to_update[i]; // Difference calculation for the activated features
for (const auto index : added)
{
const IndexType offset = HalfDimensions * index;
for (IndexType i = 0; i < HalfDimensions; ++i)
(next->*accPtr).accumulation[Perspective][i] += weights[offset + i];
// Difference calculation for the deactivated features for (std::size_t i = 0; i < PSQTBuckets; ++i)
for (const auto index : removed[i]) (next->*accPtr).psqtAccumulation[Perspective][i] +=
{ psqtWeights[index * PSQTBuckets + i];
const IndexType offset = HalfDimensions * index;
for (IndexType j = 0; j < HalfDimensions; ++j)
(st->*accPtr).accumulation[Perspective][j] -= weights[offset + j];
for (std::size_t k = 0; k < PSQTBuckets; ++k)
(st->*accPtr).psqtAccumulation[Perspective][k] -=
psqtWeights[index * PSQTBuckets + k];
}
// Difference calculation for the activated features
for (const auto index : added[i])
{
const IndexType offset = HalfDimensions * index;
for (IndexType j = 0; j < HalfDimensions; ++j)
(st->*accPtr).accumulation[Perspective][j] += weights[offset + j];
for (std::size_t k = 0; k < PSQTBuckets; ++k)
(st->*accPtr).psqtAccumulation[Perspective][k] +=
psqtWeights[index * PSQTBuckets + k];
}
} }
#endif #endif
(next->*accPtr).computed[Perspective] = true;
if (!CurrentOnly && next != pos.state())
update_accumulator_incremental<Perspective, false>(pos, next);
} }
template<Color Perspective> template<Color Perspective>
@ -664,7 +707,10 @@ class FeatureTransformer {
for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j) for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j)
{ {
auto accTile =
reinterpret_cast<vec_t*>(&accumulator.accumulation[Perspective][j * TileHeight]);
auto entryTile = reinterpret_cast<vec_t*>(&entry.accumulation[j * TileHeight]); auto entryTile = reinterpret_cast<vec_t*>(&entry.accumulation[j * TileHeight]);
for (IndexType k = 0; k < NumRegs; ++k) for (IndexType k = 0; k < NumRegs; ++k)
acc[k] = entryTile[k]; acc[k] = entryTile[k];
@ -679,7 +725,7 @@ class FeatureTransformer {
auto columnA = reinterpret_cast<const vec_t*>(&weights[offsetA]); auto columnA = reinterpret_cast<const vec_t*>(&weights[offsetA]);
for (unsigned k = 0; k < NumRegs; ++k) for (unsigned k = 0; k < NumRegs; ++k)
acc[k] = vec_add_16(vec_sub_16(acc[k], columnR[k]), columnA[k]); acc[k] = vec_add_16(acc[k], vec_sub_16(columnA[k], columnR[k]));
} }
for (; i < int(removed.size()); ++i) for (; i < int(removed.size()); ++i)
{ {
@ -702,12 +748,17 @@ class FeatureTransformer {
for (IndexType k = 0; k < NumRegs; k++) for (IndexType k = 0; k < NumRegs; k++)
vec_store(&entryTile[k], acc[k]); vec_store(&entryTile[k], acc[k]);
for (IndexType k = 0; k < NumRegs; k++)
vec_store(&accTile[k], acc[k]);
} }
for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j) for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j)
{ {
auto accTilePsqt = reinterpret_cast<psqt_vec_t*>(
&accumulator.psqtAccumulation[Perspective][j * PsqtTileHeight]);
auto entryTilePsqt = auto entryTilePsqt =
reinterpret_cast<psqt_vec_t*>(&entry.psqtAccumulation[j * PsqtTileHeight]); reinterpret_cast<psqt_vec_t*>(&entry.psqtAccumulation[j * PsqtTileHeight]);
for (std::size_t k = 0; k < NumPsqtRegs; ++k) for (std::size_t k = 0; k < NumPsqtRegs; ++k)
psqt[k] = entryTilePsqt[k]; psqt[k] = entryTilePsqt[k];
@ -732,6 +783,8 @@ class FeatureTransformer {
for (std::size_t k = 0; k < NumPsqtRegs; ++k) for (std::size_t k = 0; k < NumPsqtRegs; ++k)
vec_store_psqt(&entryTilePsqt[k], psqt[k]); vec_store_psqt(&entryTilePsqt[k], psqt[k]);
for (std::size_t k = 0; k < NumPsqtRegs; ++k)
vec_store_psqt(&accTilePsqt[k], psqt[k]);
} }
#else #else
@ -755,16 +808,15 @@ class FeatureTransformer {
entry.psqtAccumulation[k] += psqtWeights[index * PSQTBuckets + k]; entry.psqtAccumulation[k] += psqtWeights[index * PSQTBuckets + k];
} }
#endif
// The accumulator of the refresh entry has been updated. // The accumulator of the refresh entry has been updated.
// Now copy its content to the actual accumulator we were refreshing // Now copy its content to the actual accumulator we were refreshing.
std::memcpy(accumulator.accumulation[Perspective], entry.accumulation, std::memcpy(accumulator.accumulation[Perspective], entry.accumulation,
sizeof(BiasType) * HalfDimensions); sizeof(BiasType) * HalfDimensions);
std::memcpy(accumulator.psqtAccumulation[Perspective], entry.psqtAccumulation, std::memcpy(accumulator.psqtAccumulation[Perspective], entry.psqtAccumulation,
sizeof(int32_t) * PSQTBuckets); sizeof(int32_t) * PSQTBuckets);
#endif
for (Color c : {WHITE, BLACK}) for (Color c : {WHITE, BLACK})
entry.byColorBB[c] = pos.pieces(c); entry.byColorBB[c] = pos.pieces(c);
@ -786,14 +838,10 @@ class FeatureTransformer {
if ((pos.state()->*accPtr).computed[Perspective]) if ((pos.state()->*accPtr).computed[Perspective])
return; return;
auto [oldest_st, _] = try_find_computed_accumulator<Perspective>(pos); StateInfo* oldest = try_find_computed_accumulator<Perspective>(pos);
if ((oldest_st->*accPtr).computed[Perspective]) if ((oldest->*accPtr).computed[Perspective] && oldest != pos.state())
{ update_accumulator_incremental<Perspective, true>(pos, oldest);
// Only update current position accumulator to minimize work.
StateInfo* states_to_update[1] = {pos.state()};
update_accumulator_incremental<Perspective, 1>(pos, oldest_st, states_to_update);
}
else else
update_accumulator_refresh_cache<Perspective>(pos, cache); update_accumulator_refresh_cache<Perspective>(pos, cache);
} }
@ -802,31 +850,12 @@ class FeatureTransformer {
void update_accumulator(const Position& pos, void update_accumulator(const Position& pos,
AccumulatorCaches::Cache<HalfDimensions>* cache) const { AccumulatorCaches::Cache<HalfDimensions>* cache) const {
auto [oldest_st, next] = try_find_computed_accumulator<Perspective>(pos); StateInfo* oldest = try_find_computed_accumulator<Perspective>(pos);
if ((oldest_st->*accPtr).computed[Perspective]) if ((oldest->*accPtr).computed[Perspective] && oldest != pos.state())
{ // Start from the oldest computed accumulator, update all the
if (next == nullptr) // accumulators up to the current position.
return; update_accumulator_incremental<Perspective, false>(pos, oldest);
// Now update the accumulators listed in states_to_update[], where the last element is a sentinel.
// Currently we update 2 accumulators.
// 1. for the current position
// 2. the next accumulator after the computed one
// The heuristic may change in the future.
if (next == pos.state())
{
StateInfo* states_to_update[1] = {next};
update_accumulator_incremental<Perspective, 1>(pos, oldest_st, states_to_update);
}
else
{
StateInfo* states_to_update[2] = {next, pos.state()};
update_accumulator_incremental<Perspective, 2>(pos, oldest_st, states_to_update);
}
}
else else
update_accumulator_refresh_cache<Perspective>(pos, cache); update_accumulator_refresh_cache<Perspective>(pos, cache);
} }

View file

@ -28,6 +28,7 @@
#include <iostream> #include <iostream>
#include <sstream> #include <sstream>
#include <string_view> #include <string_view>
#include <tuple>
#include "../evaluate.h" #include "../evaluate.h"
#include "../position.h" #include "../position.h"
@ -45,9 +46,7 @@ constexpr std::string_view PieceToChar(" PNBRQK pnbrqk");
void hint_common_parent_position(const Position& pos, void hint_common_parent_position(const Position& pos,
const Networks& networks, const Networks& networks,
AccumulatorCaches& caches) { AccumulatorCaches& caches) {
if (Eval::use_smallnet(pos))
int simpleEvalAbs = std::abs(simple_eval(pos, pos.side_to_move()));
if (simpleEvalAbs > Eval::SmallNetThreshold)
networks.small.hint_common_access(pos, &caches.small); networks.small.hint_common_access(pos, &caches.small);
else else
networks.big.hint_common_access(pos, &caches.big); networks.big.hint_common_access(pos, &caches.big);
@ -127,14 +126,15 @@ trace(Position& pos, const Eval::NNUE::Networks& networks, Eval::NNUE::Accumulat
board[y][x] = board[y][x + 8] = board[y + 3][x + 8] = board[y + 3][x] = '+'; board[y][x] = board[y][x + 8] = board[y + 3][x + 8] = board[y + 3][x] = '+';
if (pc != NO_PIECE) if (pc != NO_PIECE)
board[y + 1][x + 4] = PieceToChar[pc]; board[y + 1][x + 4] = PieceToChar[pc];
if (value != VALUE_NONE) if (is_valid(value))
format_cp_compact(value, &board[y + 2][x + 2], pos); format_cp_compact(value, &board[y + 2][x + 2], pos);
}; };
// We estimate the value of each piece by doing a differential evaluation from // We estimate the value of each piece by doing a differential evaluation from
// the current base eval, simulating the removal of the piece from its square. // the current base eval, simulating the removal of the piece from its square.
Value base = networks.big.evaluate(pos, &caches.big); auto [psqt, positional] = networks.big.evaluate(pos, &caches.big);
base = pos.side_to_move() == WHITE ? base : -base; Value base = psqt + positional;
base = pos.side_to_move() == WHITE ? base : -base;
for (File f = FILE_A; f <= FILE_H; ++f) for (File f = FILE_A; f <= FILE_H; ++f)
for (Rank r = RANK_1; r <= RANK_8; ++r) for (Rank r = RANK_1; r <= RANK_8; ++r)
@ -150,9 +150,10 @@ trace(Position& pos, const Eval::NNUE::Networks& networks, Eval::NNUE::Accumulat
pos.remove_piece(sq); pos.remove_piece(sq);
st->accumulatorBig.computed[WHITE] = st->accumulatorBig.computed[BLACK] = false; st->accumulatorBig.computed[WHITE] = st->accumulatorBig.computed[BLACK] = false;
Value eval = networks.big.evaluate(pos, &caches.big); std::tie(psqt, positional) = networks.big.evaluate(pos, &caches.big);
eval = pos.side_to_move() == WHITE ? eval : -eval; Value eval = psqt + positional;
v = base - eval; eval = pos.side_to_move() == WHITE ? eval : -eval;
v = base - eval;
pos.put_piece(pc, sq); pos.put_piece(pc, sq);
st->accumulatorBig.computed[WHITE] = st->accumulatorBig.computed[BLACK] = false; st->accumulatorBig.computed[WHITE] = st->accumulatorBig.computed[BLACK] = false;
@ -177,16 +178,16 @@ trace(Position& pos, const Eval::NNUE::Networks& networks, Eval::NNUE::Accumulat
for (std::size_t bucket = 0; bucket < LayerStacks; ++bucket) for (std::size_t bucket = 0; bucket < LayerStacks; ++bucket)
{ {
ss << "| " << bucket << " "; ss << "| " << bucket << " " //
ss << " | "; << " | ";
format_cp_aligned_dot(t.psqt[bucket], ss, pos); format_cp_aligned_dot(t.psqt[bucket], ss, pos);
ss << " " ss << " " //
<< " | "; << " | ";
format_cp_aligned_dot(t.positional[bucket], ss, pos); format_cp_aligned_dot(t.positional[bucket], ss, pos);
ss << " " ss << " " //
<< " | "; << " | ";
format_cp_aligned_dot(t.psqt[bucket] + t.positional[bucket], ss, pos); format_cp_aligned_dot(t.psqt[bucket] + t.positional[bucket], ss, pos);
ss << " " ss << " " //
<< " |"; << " |";
if (bucket == t.correctBucket) if (bucket == t.correctBucket)
ss << " <-- this bucket is used"; ss << " <-- this bucket is used";

1346
src/numa.h Normal file

File diff suppressed because it is too large Load diff

View file

@ -334,8 +334,10 @@ void Position::set_check_info() const {
// The function is only used when a new position is set up // The function is only used when a new position is set up
void Position::set_state() const { void Position::set_state() const {
st->key = st->materialKey = 0; st->key = st->materialKey = 0;
st->pawnKey = Zobrist::noPawns; st->majorPieceKey = st->minorPieceKey = 0;
st->nonPawnKey[WHITE] = st->nonPawnKey[BLACK] = 0;
st->pawnKey = Zobrist::noPawns;
st->nonPawnMaterial[WHITE] = st->nonPawnMaterial[BLACK] = VALUE_ZERO; st->nonPawnMaterial[WHITE] = st->nonPawnMaterial[BLACK] = VALUE_ZERO;
st->checkersBB = attackers_to(square<KING>(sideToMove)) & pieces(~sideToMove); st->checkersBB = attackers_to(square<KING>(sideToMove)) & pieces(~sideToMove);
@ -350,8 +352,27 @@ void Position::set_state() const {
if (type_of(pc) == PAWN) if (type_of(pc) == PAWN)
st->pawnKey ^= Zobrist::psq[pc][s]; st->pawnKey ^= Zobrist::psq[pc][s];
else if (type_of(pc) != KING) else
st->nonPawnMaterial[color_of(pc)] += PieceValue[pc]; {
st->nonPawnKey[color_of(pc)] ^= Zobrist::psq[pc][s];
if (type_of(pc) != KING)
{
st->nonPawnMaterial[color_of(pc)] += PieceValue[pc];
if (type_of(pc) >= ROOK)
st->majorPieceKey ^= Zobrist::psq[pc][s];
else
st->minorPieceKey ^= Zobrist::psq[pc][s];
}
else
{
st->majorPieceKey ^= Zobrist::psq[pc][s];
st->minorPieceKey ^= Zobrist::psq[pc][s];
}
}
} }
if (st->epSquare != SQ_NONE) if (st->epSquare != SQ_NONE)
@ -671,6 +692,7 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
// our state pointer to point to the new (ready to be updated) state. // our state pointer to point to the new (ready to be updated) state.
std::memcpy(&newSt, st, offsetof(StateInfo, key)); std::memcpy(&newSt, st, offsetof(StateInfo, key));
newSt.previous = st; newSt.previous = st;
st->next = &newSt;
st = &newSt; st = &newSt;
// Increment ply counters. In particular, rule50 will be reset to zero later on // Increment ply counters. In particular, rule50 will be reset to zero later on
@ -706,6 +728,8 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
do_castling<true>(us, from, to, rfrom, rto); do_castling<true>(us, from, to, rfrom, rto);
k ^= Zobrist::psq[captured][rfrom] ^ Zobrist::psq[captured][rto]; k ^= Zobrist::psq[captured][rfrom] ^ Zobrist::psq[captured][rto];
st->majorPieceKey ^= Zobrist::psq[captured][rfrom] ^ Zobrist::psq[captured][rto];
st->nonPawnKey[us] ^= Zobrist::psq[captured][rfrom] ^ Zobrist::psq[captured][rto];
captured = NO_PIECE; captured = NO_PIECE;
} }
@ -731,7 +755,16 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
st->pawnKey ^= Zobrist::psq[captured][capsq]; st->pawnKey ^= Zobrist::psq[captured][capsq];
} }
else else
{
st->nonPawnMaterial[them] -= PieceValue[captured]; st->nonPawnMaterial[them] -= PieceValue[captured];
st->nonPawnKey[them] ^= Zobrist::psq[captured][capsq];
if (type_of(captured) >= ROOK)
st->majorPieceKey ^= Zobrist::psq[captured][capsq];
else
st->minorPieceKey ^= Zobrist::psq[captured][capsq];
}
dp.dirty_num = 2; // 1 piece moved, 1 piece captured dp.dirty_num = 2; // 1 piece moved, 1 piece captured
dp.piece[1] = captured; dp.piece[1] = captured;
@ -789,7 +822,8 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
else if (m.type_of() == PROMOTION) else if (m.type_of() == PROMOTION)
{ {
Piece promotion = make_piece(us, m.promotion_type()); Piece promotion = make_piece(us, m.promotion_type());
PieceType promotionType = type_of(promotion);
assert(relative_rank(us, to) == RANK_8); assert(relative_rank(us, to) == RANK_8);
assert(type_of(promotion) >= KNIGHT && type_of(promotion) <= QUEEN); assert(type_of(promotion) >= KNIGHT && type_of(promotion) <= QUEEN);
@ -810,6 +844,12 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
st->materialKey ^= st->materialKey ^=
Zobrist::psq[promotion][pieceCount[promotion] - 1] ^ Zobrist::psq[pc][pieceCount[pc]]; Zobrist::psq[promotion][pieceCount[promotion] - 1] ^ Zobrist::psq[pc][pieceCount[pc]];
if (promotionType >= ROOK)
st->majorPieceKey ^= Zobrist::psq[promotion][to];
else
st->minorPieceKey ^= Zobrist::psq[promotion][to];
// Update material // Update material
st->nonPawnMaterial[us] += PieceValue[promotion]; st->nonPawnMaterial[us] += PieceValue[promotion];
} }
@ -821,6 +861,23 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
st->rule50 = 0; st->rule50 = 0;
} }
else
{
st->nonPawnKey[us] ^= Zobrist::psq[pc][from] ^ Zobrist::psq[pc][to];
if (type_of(pc) == KING)
{
st->majorPieceKey ^= Zobrist::psq[pc][from] ^ Zobrist::psq[pc][to];
st->minorPieceKey ^= Zobrist::psq[pc][from] ^ Zobrist::psq[pc][to];
}
else if (type_of(pc) >= ROOK)
st->majorPieceKey ^= Zobrist::psq[pc][from] ^ Zobrist::psq[pc][to];
else
st->minorPieceKey ^= Zobrist::psq[pc][from] ^ Zobrist::psq[pc][to];
}
// Set capture piece // Set capture piece
st->capturedPiece = captured; st->capturedPiece = captured;
@ -963,6 +1020,7 @@ void Position::do_null_move(StateInfo& newSt, TranspositionTable& tt) {
std::memcpy(&newSt, st, offsetof(StateInfo, accumulatorBig)); std::memcpy(&newSt, st, offsetof(StateInfo, accumulatorBig));
newSt.previous = st; newSt.previous = st;
st->next = &newSt;
st = &newSt; st = &newSt;
st->dirtyPiece.dirty_num = 0; st->dirtyPiece.dirty_num = 0;
@ -1156,9 +1214,9 @@ bool Position::has_repeated() const {
} }
// Tests if the position has a move which draws by repetition, // Tests if the position has a move which draws by repetition.
// or an earlier position has a move that directly reaches the current position. // This function accurately matches the outcome of is_draw() over all legal moves.
bool Position::has_game_cycle(int ply) const { bool Position::upcoming_repetition(int ply) const {
int j; int j;
@ -1169,10 +1227,16 @@ bool Position::has_game_cycle(int ply) const {
Key originalKey = st->key; Key originalKey = st->key;
StateInfo* stp = st->previous; StateInfo* stp = st->previous;
Key other = originalKey ^ stp->key ^ Zobrist::side;
for (int i = 3; i <= end; i += 2) for (int i = 3; i <= end; i += 2)
{ {
stp = stp->previous->previous; stp = stp->previous;
other ^= stp->key ^ stp->previous->key ^ Zobrist::side;
stp = stp->previous;
if (other != 0)
continue;
Key moveKey = originalKey ^ stp->key; Key moveKey = originalKey ^ stp->key;
if ((j = H1(moveKey), cuckoo[j] == moveKey) || (j = H2(moveKey), cuckoo[j] == moveKey)) if ((j = H1(moveKey), cuckoo[j] == moveKey) || (j = H2(moveKey), cuckoo[j] == moveKey))
@ -1188,12 +1252,6 @@ bool Position::has_game_cycle(int ply) const {
// For nodes before or at the root, check that the move is a // For nodes before or at the root, check that the move is a
// repetition rather than a move to the current position. // repetition rather than a move to the current position.
// In the cuckoo table, both moves Rc1c5 and Rc5c1 are stored in
// the same location, so we have to select which square to check.
if (color_of(piece_on(empty(s1) ? s2 : s1)) != side_to_move())
continue;
// For repetitions before or at the root, require one more
if (stp->repetition) if (stp->repetition)
return true; return true;
} }

View file

@ -43,6 +43,9 @@ struct StateInfo {
// Copied when making a move // Copied when making a move
Key materialKey; Key materialKey;
Key pawnKey; Key pawnKey;
Key majorPieceKey;
Key minorPieceKey;
Key nonPawnKey[COLOR_NB];
Value nonPawnMaterial[COLOR_NB]; Value nonPawnMaterial[COLOR_NB];
int castlingRights; int castlingRights;
int rule50; int rule50;
@ -53,6 +56,7 @@ struct StateInfo {
Key key; Key key;
Bitboard checkersBB; Bitboard checkersBB;
StateInfo* previous; StateInfo* previous;
StateInfo* next;
Bitboard blockersForKing[COLOR_NB]; Bitboard blockersForKing[COLOR_NB];
Bitboard pinners[COLOR_NB]; Bitboard pinners[COLOR_NB];
Bitboard checkSquares[PIECE_TYPE_NB]; Bitboard checkSquares[PIECE_TYPE_NB];
@ -150,13 +154,16 @@ class Position {
Key key_after(Move m) const; Key key_after(Move m) const;
Key material_key() const; Key material_key() const;
Key pawn_key() const; Key pawn_key() const;
Key major_piece_key() const;
Key minor_piece_key() const;
Key non_pawn_key(Color c) const;
// Other properties of the position // Other properties of the position
Color side_to_move() const; Color side_to_move() const;
int game_ply() const; int game_ply() const;
bool is_chess960() const; bool is_chess960() const;
bool is_draw(int ply) const; bool is_draw(int ply) const;
bool has_game_cycle(int ply) const; bool upcoming_repetition(int ply) const;
bool has_repeated() const; bool has_repeated() const;
int rule50_count() const; int rule50_count() const;
Value non_pawn_material(Color c) const; Value non_pawn_material(Color c) const;
@ -297,6 +304,12 @@ inline Key Position::pawn_key() const { return st->pawnKey; }
inline Key Position::material_key() const { return st->materialKey; } inline Key Position::material_key() const { return st->materialKey; }
inline Key Position::major_piece_key() const { return st->majorPieceKey; }
inline Key Position::minor_piece_key() const { return st->minorPieceKey; }
inline Key Position::non_pawn_key(Color c) const { return st->nonPawnKey[c]; }
inline Value Position::non_pawn_material(Color c) const { return st->nonPawnMaterial[c]; } inline Value Position::non_pawn_material(Color c) const { return st->nonPawnMaterial[c]; }
inline Value Position::non_pawn_material() const { inline Value Position::non_pawn_material() const {
@ -315,8 +328,8 @@ inline bool Position::capture(Move m) const {
} }
// Returns true if a move is generated from the capture stage, having also // Returns true if a move is generated from the capture stage, having also
// queen promotions covered, i.e. consistency with the capture stage move generation // queen promotions covered, i.e. consistency with the capture stage move
// is needed to avoid the generation of duplicate moves. // generation is needed to avoid the generation of duplicate moves.
inline bool Position::capture_stage(Move m) const { inline bool Position::capture_stage(Move m) const {
assert(m.is_ok()); assert(m.is_ok());
return capture(m) || m.promotion_type() == QUEEN; return capture(m) || m.promotion_type() == QUEEN;

View file

@ -29,7 +29,7 @@ namespace Stockfish {
Score::Score(Value v, const Position& pos) { Score::Score(Value v, const Position& pos) {
assert(-VALUE_INFINITE < v && v < VALUE_INFINITE); assert(-VALUE_INFINITE < v && v < VALUE_INFINITE);
if (std::abs(v) < VALUE_TB_WIN_IN_MAX_PLY) if (!is_decisive(v))
{ {
score = InternalUnits{UCIEngine::to_cp(v, pos)}; score = InternalUnits{UCIEngine::to_cp(v, pos)};
} }

File diff suppressed because it is too large Load diff

View file

@ -19,6 +19,7 @@
#ifndef SEARCH_H_INCLUDED #ifndef SEARCH_H_INCLUDED
#define SEARCH_H_INCLUDED #define SEARCH_H_INCLUDED
#include <algorithm>
#include <array> #include <array>
#include <atomic> #include <atomic>
#include <cassert> #include <cassert>
@ -30,21 +31,19 @@
#include <string_view> #include <string_view>
#include <vector> #include <vector>
#include "history.h"
#include "misc.h" #include "misc.h"
#include "movepick.h" #include "nnue/network.h"
#include "nnue/nnue_accumulator.h"
#include "numa.h"
#include "position.h" #include "position.h"
#include "score.h" #include "score.h"
#include "syzygy/tbprobe.h" #include "syzygy/tbprobe.h"
#include "timeman.h" #include "timeman.h"
#include "types.h" #include "types.h"
#include "nnue/nnue_accumulator.h"
namespace Stockfish { namespace Stockfish {
namespace Eval::NNUE {
struct Networks;
}
// Different node types, used as a template parameter // Different node types, used as a template parameter
enum NodeType { enum NodeType {
NonPV, NonPV,
@ -62,19 +61,19 @@ namespace Search {
// shallower and deeper in the tree during the search. Each search thread has // shallower and deeper in the tree during the search. Each search thread has
// its own array of Stack objects, indexed by the current ply. // its own array of Stack objects, indexed by the current ply.
struct Stack { struct Stack {
Move* pv; Move* pv;
PieceToHistory* continuationHistory; PieceToHistory* continuationHistory;
int ply; CorrectionHistory<PieceTo>* continuationCorrectionHistory;
Move currentMove; int ply;
Move excludedMove; Move currentMove;
Move killers[2]; Move excludedMove;
Value staticEval; Value staticEval;
int statScore; int statScore;
int moveCount; int moveCount;
bool inCheck; bool inCheck;
bool ttPv; bool ttPv;
bool ttHit; bool ttHit;
int cutoffCnt; int cutoffCnt;
}; };
@ -92,15 +91,16 @@ struct RootMove {
return m.score != score ? m.score < score : m.previousScore < previousScore; return m.score != score ? m.score < score : m.previousScore < previousScore;
} }
uint64_t effort = 0; uint64_t effort = 0;
Value score = -VALUE_INFINITE; Value score = -VALUE_INFINITE;
Value previousScore = -VALUE_INFINITE; Value previousScore = -VALUE_INFINITE;
Value averageScore = -VALUE_INFINITE; Value averageScore = -VALUE_INFINITE;
Value uciScore = -VALUE_INFINITE; Value meanSquaredScore = -VALUE_INFINITE * VALUE_INFINITE;
bool scoreLowerbound = false; Value uciScore = -VALUE_INFINITE;
bool scoreUpperbound = false; bool scoreLowerbound = false;
int selDepth = 0; bool scoreUpperbound = false;
int tbRank = 0; int selDepth = 0;
int tbRank = 0;
Value tbScore; Value tbScore;
std::vector<Move> pv; std::vector<Move> pv;
}; };
@ -133,19 +133,19 @@ struct LimitsType {
// The UCI stores the uci options, thread pool, and transposition table. // The UCI stores the uci options, thread pool, and transposition table.
// This struct is used to easily forward data to the Search::Worker class. // This struct is used to easily forward data to the Search::Worker class.
struct SharedState { struct SharedState {
SharedState(const OptionsMap& optionsMap, SharedState(const OptionsMap& optionsMap,
ThreadPool& threadPool, ThreadPool& threadPool,
TranspositionTable& transpositionTable, TranspositionTable& transpositionTable,
const Eval::NNUE::Networks& nets) : const LazyNumaReplicated<Eval::NNUE::Networks>& nets) :
options(optionsMap), options(optionsMap),
threads(threadPool), threads(threadPool),
tt(transpositionTable), tt(transpositionTable),
networks(nets) {} networks(nets) {}
const OptionsMap& options; const OptionsMap& options;
ThreadPool& threads; ThreadPool& threads;
TranspositionTable& tt; TranspositionTable& tt;
const Eval::NNUE::Networks& networks; const LazyNumaReplicated<Eval::NNUE::Networks>& networks;
}; };
class Worker; class Worker;
@ -182,6 +182,34 @@ struct InfoIteration {
size_t currmovenumber; size_t currmovenumber;
}; };
// Skill structure is used to implement strength limit. If we have a UCI_Elo,
// we convert it to an appropriate skill level, anchored to the Stash engine.
// This method is based on a fit of the Elo results for games played between
// Stockfish at various skill levels and various versions of the Stash engine.
// Skill 0 .. 19 now covers CCRL Blitz Elo from 1320 to 3190, approximately
// Reference: https://github.com/vondele/Stockfish/commit/a08b8d4e9711c2
struct Skill {
// Lowest and highest Elo ratings used in the skill level calculation
constexpr static int LowestElo = 1320;
constexpr static int HighestElo = 3190;
Skill(int skill_level, int uci_elo) {
if (uci_elo)
{
double e = double(uci_elo - LowestElo) / (HighestElo - LowestElo);
level = std::clamp((((37.2473 * e - 40.8525) * e + 22.2943) * e - 0.311438), 0.0, 19.0);
}
else
level = double(skill_level);
}
bool enabled() const { return level < 20.0; }
bool time_to_pick(Depth depth) const { return depth == 1 + int(level); }
Move pick_best(const RootMoves&, size_t multiPV);
double level;
Move best = Move::none();
};
// SearchManager manages the search from the main thread. It is responsible for // SearchManager manages the search from the main thread. It is responsible for
// keeping track of the time, and storing data strictly related to the main thread. // keeping track of the time, and storing data strictly related to the main thread.
class SearchManager: public ISearchManager { class SearchManager: public ISearchManager {
@ -204,12 +232,13 @@ class SearchManager: public ISearchManager {
void check_time(Search::Worker& worker) override; void check_time(Search::Worker& worker) override;
void pv(const Search::Worker& worker, void pv(Search::Worker& worker,
const ThreadPool& threads, const ThreadPool& threads,
const TranspositionTable& tt, const TranspositionTable& tt,
Depth depth) const; Depth depth);
Stockfish::TimeManagement tm; Stockfish::TimeManagement tm;
double originalTimeAdjust;
int callsCnt; int callsCnt;
std::atomic_bool ponder; std::atomic_bool ponder;
@ -235,47 +264,55 @@ class NullSearchManager: public ISearchManager {
// of the search history, and storing data required for the search. // of the search history, and storing data required for the search.
class Worker { class Worker {
public: public:
Worker(SharedState&, std::unique_ptr<ISearchManager>, size_t); Worker(SharedState&, std::unique_ptr<ISearchManager>, size_t, NumaReplicatedAccessToken);
// Called at instantiation to initialize Reductions tables // Called at instantiation to initialize reductions tables.
// Reset histories, usually before a new game // Reset histories, usually before a new game.
void clear(); void clear();
// Called when the program receives the UCI 'go' command. // Called when the program receives the UCI 'go' command.
// It searches from the root position and outputs the "bestmove". // It searches from the root position and outputs the "bestmove".
void start_searching(); void start_searching();
bool is_mainthread() const { return thread_idx == 0; } bool is_mainthread() const { return threadIdx == 0; }
void ensure_network_replicated();
// Public because they need to be updatable by the stats // Public because they need to be updatable by the stats
CounterMoveHistory counterMoves; ButterflyHistory mainHistory;
ButterflyHistory mainHistory; LowPlyHistory lowPlyHistory;
CapturePieceToHistory captureHistory; CapturePieceToHistory captureHistory;
ContinuationHistory continuationHistory[2][2]; ContinuationHistory continuationHistory[2][2];
PawnHistory pawnHistory; PawnHistory pawnHistory;
CorrectionHistory correctionHistory;
CorrectionHistory<Pawn> pawnCorrectionHistory;
CorrectionHistory<Major> majorPieceCorrectionHistory;
CorrectionHistory<Minor> minorPieceCorrectionHistory;
CorrectionHistory<NonPawn> nonPawnCorrectionHistory[COLOR_NB];
CorrectionHistory<Continuation> continuationCorrectionHistory;
private: private:
void iterative_deepening(); void iterative_deepening();
// Main search function for both PV and non-PV nodes // This is the main search function, for both PV and non-PV nodes
template<NodeType nodeType> template<NodeType nodeType>
Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, bool cutNode); Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, bool cutNode);
// Quiescence search function, which is called by the main search // Quiescence search function, which is called by the main search
template<NodeType nodeType> template<NodeType nodeType>
Value qsearch(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth = 0); Value qsearch(Position& pos, Stack* ss, Value alpha, Value beta);
Depth reduction(bool i, Depth d, int mn, int delta); Depth reduction(bool i, Depth d, int mn, int delta) const;
// Get a pointer to the search manager, only allowed to be called by the // Pointer to the search manager, only allowed to be called by the main thread
// main thread.
SearchManager* main_manager() const { SearchManager* main_manager() const {
assert(thread_idx == 0); assert(threadIdx == 0);
return static_cast<SearchManager*>(manager.get()); return static_cast<SearchManager*>(manager.get());
} }
TimePoint elapsed() const; TimePoint elapsed() const;
TimePoint elapsed_time() const;
LimitsType limits; LimitsType limits;
@ -291,7 +328,8 @@ class Worker {
Depth rootDepth, completedDepth; Depth rootDepth, completedDepth;
Value rootDelta; Value rootDelta;
size_t thread_idx; size_t threadIdx;
NumaReplicatedAccessToken numaAccessToken;
// Reductions lookup table initialized at startup // Reductions lookup table initialized at startup
std::array<int, MAX_MOVES> reductions; // [depth or moveNumber] std::array<int, MAX_MOVES> reductions; // [depth or moveNumber]
@ -301,10 +339,10 @@ class Worker {
Tablebases::Config tbConfig; Tablebases::Config tbConfig;
const OptionsMap& options; const OptionsMap& options;
ThreadPool& threads; ThreadPool& threads;
TranspositionTable& tt; TranspositionTable& tt;
const Eval::NNUE::Networks& networks; const LazyNumaReplicated<Eval::NNUE::Networks>& networks;
// Used by NNUE // Used by NNUE
Eval::NNUE::AccumulatorCaches refreshTable; Eval::NNUE::AccumulatorCaches refreshTable;

View file

@ -66,7 +66,7 @@ namespace {
constexpr int TBPIECES = 7; // Max number of supported pieces constexpr int TBPIECES = 7; // Max number of supported pieces
constexpr int MAX_DTZ = constexpr int MAX_DTZ =
1 << 18; // Max DTZ supported, large enough to deal with the syzygy TB limit. 1 << 18; // Max DTZ supported times 2, large enough to deal with the syzygy TB limit.
enum { enum {
BigEndian, BigEndian,
@ -443,6 +443,8 @@ class TBTables {
std::deque<TBTable<WDL>> wdlTable; std::deque<TBTable<WDL>> wdlTable;
std::deque<TBTable<DTZ>> dtzTable; std::deque<TBTable<DTZ>> dtzTable;
size_t foundDTZFiles = 0;
size_t foundWDLFiles = 0;
void insert(Key key, TBTable<WDL>* wdl, TBTable<DTZ>* dtz) { void insert(Key key, TBTable<WDL>* wdl, TBTable<DTZ>* dtz) {
uint32_t homeBucket = uint32_t(key) & (Size - 1); uint32_t homeBucket = uint32_t(key) & (Size - 1);
@ -486,9 +488,16 @@ class TBTables {
memset(hashTable, 0, sizeof(hashTable)); memset(hashTable, 0, sizeof(hashTable));
wdlTable.clear(); wdlTable.clear();
dtzTable.clear(); dtzTable.clear();
foundDTZFiles = 0;
foundWDLFiles = 0;
} }
size_t size() const { return wdlTable.size(); }
void add(const std::vector<PieceType>& pieces); void info() const {
sync_cout << "info string Found " << foundWDLFiles << " WDL and " << foundDTZFiles
<< " DTZ tablebase files (up to " << MaxCardinality << "-man)." << sync_endl;
}
void add(const std::vector<PieceType>& pieces);
}; };
TBTables TBTables; TBTables TBTables;
@ -501,13 +510,22 @@ void TBTables::add(const std::vector<PieceType>& pieces) {
for (PieceType pt : pieces) for (PieceType pt : pieces)
code += PieceToChar[pt]; code += PieceToChar[pt];
code.insert(code.find('K', 1), "v");
TBFile file(code.insert(code.find('K', 1), "v") + ".rtbw"); // KRK -> KRvK TBFile file_dtz(code + ".rtbz"); // KRK -> KRvK
if (file_dtz.is_open())
{
file_dtz.close();
foundDTZFiles++;
}
TBFile file(code + ".rtbw"); // KRK -> KRvK
if (!file.is_open()) // Only WDL file is checked if (!file.is_open()) // Only WDL file is checked
return; return;
file.close(); file.close();
foundWDLFiles++;
MaxCardinality = std::max(int(pieces.size()), MaxCardinality); MaxCardinality = std::max(int(pieces.size()), MaxCardinality);
@ -1326,7 +1344,7 @@ void Tablebases::init(const std::string& paths) {
MaxCardinality = 0; MaxCardinality = 0;
TBFile::Paths = paths; TBFile::Paths = paths;
if (paths.empty() || paths == "<empty>") if (paths.empty())
return; return;
// MapB1H1H7[] encodes a square below a1-h8 diagonal to 0..27 // MapB1H1H7[] encodes a square below a1-h8 diagonal to 0..27
@ -1466,7 +1484,7 @@ void Tablebases::init(const std::string& paths) {
} }
} }
sync_cout << "info string Found " << TBTables.size() << " tablebases" << sync_endl; TBTables.info();
} }
// Probe the WDL table for a particular position. // Probe the WDL table for a particular position.
@ -1574,7 +1592,10 @@ int Tablebases::probe_dtz(Position& pos, ProbeState* result) {
// Use the DTZ tables to rank root moves. // Use the DTZ tables to rank root moves.
// //
// A return value false indicates that not all probes were successful. // A return value false indicates that not all probes were successful.
bool Tablebases::root_probe(Position& pos, Search::RootMoves& rootMoves, bool rule50) { bool Tablebases::root_probe(Position& pos,
Search::RootMoves& rootMoves,
bool rule50,
bool rankDTZ) {
ProbeState result = OK; ProbeState result = OK;
StateInfo st; StateInfo st;
@ -1585,7 +1606,7 @@ bool Tablebases::root_probe(Position& pos, Search::RootMoves& rootMoves, bool ru
// Check whether a position was repeated since the last zeroing move. // Check whether a position was repeated since the last zeroing move.
bool rep = pos.has_repeated(); bool rep = pos.has_repeated();
int dtz, bound = rule50 ? (MAX_DTZ - 100) : 1; int dtz, bound = rule50 ? (MAX_DTZ / 2 - 100) : 1;
// Probe and rank each move // Probe and rank each move
for (auto& m : rootMoves) for (auto& m : rootMoves)
@ -1624,8 +1645,10 @@ bool Tablebases::root_probe(Position& pos, Search::RootMoves& rootMoves, bool ru
// Better moves are ranked higher. Certain wins are ranked equally. // Better moves are ranked higher. Certain wins are ranked equally.
// Losing moves are ranked equally unless a 50-move draw is in sight. // Losing moves are ranked equally unless a 50-move draw is in sight.
int r = dtz > 0 ? (dtz + cnt50 <= 99 && !rep ? MAX_DTZ : MAX_DTZ - (dtz + cnt50)) int r = dtz > 0 ? (dtz + cnt50 <= 99 && !rep ? MAX_DTZ - (rankDTZ ? dtz : 0)
: dtz < 0 ? (-dtz * 2 + cnt50 < 100 ? -MAX_DTZ : -MAX_DTZ + (-dtz + cnt50)) : MAX_DTZ / 2 - (dtz + cnt50))
: dtz < 0 ? (-dtz * 2 + cnt50 < 100 ? -MAX_DTZ - (rankDTZ ? dtz : 0)
: -MAX_DTZ / 2 + (-dtz + cnt50))
: 0; : 0;
m.tbRank = r; m.tbRank = r;
@ -1633,10 +1656,11 @@ bool Tablebases::root_probe(Position& pos, Search::RootMoves& rootMoves, bool ru
// 1 cp to cursed wins and let it grow to 49 cp as the positions gets // 1 cp to cursed wins and let it grow to 49 cp as the positions gets
// closer to a real win. // closer to a real win.
m.tbScore = r >= bound ? VALUE_MATE - MAX_PLY - 1 m.tbScore = r >= bound ? VALUE_MATE - MAX_PLY - 1
: r > 0 ? Value((std::max(3, r - (MAX_DTZ - 200)) * int(PawnValue)) / 200) : r > 0 ? Value((std::max(3, r - (MAX_DTZ / 2 - 200)) * int(PawnValue)) / 200)
: r == 0 ? VALUE_DRAW : r == 0 ? VALUE_DRAW
: r > -bound ? Value((std::min(-3, r + (MAX_DTZ - 200)) * int(PawnValue)) / 200) : r > -bound
: -VALUE_MATE + MAX_PLY + 1; ? Value((std::min(-3, r + (MAX_DTZ / 2 - 200)) * int(PawnValue)) / 200)
: -VALUE_MATE + MAX_PLY + 1;
} }
return true; return true;
@ -1683,7 +1707,8 @@ bool Tablebases::root_probe_wdl(Position& pos, Search::RootMoves& rootMoves, boo
Config Tablebases::rank_root_moves(const OptionsMap& options, Config Tablebases::rank_root_moves(const OptionsMap& options,
Position& pos, Position& pos,
Search::RootMoves& rootMoves) { Search::RootMoves& rootMoves,
bool rankDTZ) {
Config config; Config config;
if (rootMoves.empty()) if (rootMoves.empty())
@ -1707,7 +1732,7 @@ Config Tablebases::rank_root_moves(const OptionsMap& options,
if (config.cardinality >= popcount(pos.pieces()) && !pos.can_castle(ANY_CASTLING)) if (config.cardinality >= popcount(pos.pieces()) && !pos.can_castle(ANY_CASTLING))
{ {
// Rank moves using DTZ tables // Rank moves using DTZ tables
config.rootInTB = root_probe(pos, rootMoves, options["Syzygy50MoveRule"]); config.rootInTB = root_probe(pos, rootMoves, options["Syzygy50MoveRule"], rankDTZ);
if (!config.rootInTB) if (!config.rootInTB)
{ {

View file

@ -66,9 +66,12 @@ extern int MaxCardinality;
void init(const std::string& paths); void init(const std::string& paths);
WDLScore probe_wdl(Position& pos, ProbeState* result); WDLScore probe_wdl(Position& pos, ProbeState* result);
int probe_dtz(Position& pos, ProbeState* result); int probe_dtz(Position& pos, ProbeState* result);
bool root_probe(Position& pos, Search::RootMoves& rootMoves, bool rule50); bool root_probe(Position& pos, Search::RootMoves& rootMoves, bool rule50, bool rankDTZ);
bool root_probe_wdl(Position& pos, Search::RootMoves& rootMoves, bool rule50); bool root_probe_wdl(Position& pos, Search::RootMoves& rootMoves, bool rule50);
Config rank_root_moves(const OptionsMap& options, Position& pos, Search::RootMoves& rootMoves); Config rank_root_moves(const OptionsMap& options,
Position& pos,
Search::RootMoves& rootMoves,
bool rankDTZ = false);
} // namespace Stockfish::Tablebases } // namespace Stockfish::Tablebases

View file

@ -22,19 +22,17 @@
#include <cassert> #include <cassert>
#include <deque> #include <deque>
#include <memory> #include <memory>
#include <string>
#include <unordered_map> #include <unordered_map>
#include <utility> #include <utility>
#include <string>
#include "misc.h"
#include "movegen.h" #include "movegen.h"
#include "search.h" #include "search.h"
#include "syzygy/tbprobe.h" #include "syzygy/tbprobe.h"
#include "timeman.h" #include "timeman.h"
#include "tt.h"
#include "types.h" #include "types.h"
#include "ucioption.h"
#include "uci.h" #include "uci.h"
#include "ucioption.h"
namespace Stockfish { namespace Stockfish {
@ -42,13 +40,24 @@ namespace Stockfish {
// in idle_loop(). Note that 'searching' and 'exit' should be already set. // in idle_loop(). Note that 'searching' and 'exit' should be already set.
Thread::Thread(Search::SharedState& sharedState, Thread::Thread(Search::SharedState& sharedState,
std::unique_ptr<Search::ISearchManager> sm, std::unique_ptr<Search::ISearchManager> sm,
size_t n) : size_t n,
worker(std::make_unique<Search::Worker>(sharedState, std::move(sm), n)), OptionalThreadToNumaNodeBinder binder) :
idx(n), idx(n),
nthreads(sharedState.options["Threads"]), nthreads(sharedState.options["Threads"]),
stdThread(&Thread::idle_loop, this) { stdThread(&Thread::idle_loop, this) {
wait_for_search_finished(); wait_for_search_finished();
run_custom_job([this, &binder, &sharedState, &sm, n]() {
// Use the binder to [maybe] bind the threads to a NUMA node before doing
// the Worker allocation. Ideally we would also allocate the SearchManager
// here, but that's minor.
this->numaAccessToken = binder();
this->worker =
std::make_unique<Search::Worker>(sharedState, std::move(sm), n, this->numaAccessToken);
});
wait_for_search_finished();
} }
@ -63,38 +72,42 @@ Thread::~Thread() {
stdThread.join(); stdThread.join();
} }
// Wakes up the thread that will start the search // Wakes up the thread that will start the search
void Thread::start_searching() { void Thread::start_searching() {
mutex.lock(); assert(worker != nullptr);
searching = true; run_custom_job([this]() { worker->start_searching(); });
mutex.unlock(); // Unlock before notifying saves a few CPU-cycles
cv.notify_one(); // Wake up the thread in idle_loop()
} }
// Clears the histories for the thread worker (usually before a new game)
void Thread::clear_worker() {
assert(worker != nullptr);
run_custom_job([this]() { worker->clear(); });
}
// Blocks on the condition variable // Blocks on the condition variable until the thread has finished searching
// until the thread has finished searching.
void Thread::wait_for_search_finished() { void Thread::wait_for_search_finished() {
std::unique_lock<std::mutex> lk(mutex); std::unique_lock<std::mutex> lk(mutex);
cv.wait(lk, [&] { return !searching; }); cv.wait(lk, [&] { return !searching; });
} }
// Launching a function in the thread
void Thread::run_custom_job(std::function<void()> f) {
{
std::unique_lock<std::mutex> lk(mutex);
cv.wait(lk, [&] { return !searching; });
jobFunc = std::move(f);
searching = true;
}
cv.notify_one();
}
// Thread gets parked here, blocked on the void Thread::ensure_network_replicated() { worker->ensure_network_replicated(); }
// condition variable, when it has no work to do.
// Thread gets parked here, blocked on the condition variable
// when the thread has no work to do.
void Thread::idle_loop() { void Thread::idle_loop() {
// If OS already scheduled us on a different group than 0 then don't overwrite
// the choice, eventually we are one of many one-threaded processes running on
// some Windows NUMA hardware, for instance in fishtest. To make it simple,
// just check if running threads are below a threshold, in this case, all this
// NUMA machinery is not needed.
if (nthreads > 8)
WinProcGroup::bind_this_thread(idx);
while (true) while (true)
{ {
std::unique_lock<std::mutex> lk(mutex); std::unique_lock<std::mutex> lk(mutex);
@ -105,15 +118,17 @@ void Thread::idle_loop() {
if (exit) if (exit)
return; return;
std::function<void()> job = std::move(jobFunc);
jobFunc = nullptr;
lk.unlock(); lk.unlock();
worker->start_searching(); if (job)
job();
} }
} }
Search::SearchManager* ThreadPool::main_manager() { Search::SearchManager* ThreadPool::main_manager() { return main_thread()->worker->main_manager(); }
return static_cast<Search::SearchManager*>(main_thread()->worker.get()->manager.get());
}
uint64_t ThreadPool::nodes_searched() const { return accumulate(&Search::Worker::nodes); } uint64_t ThreadPool::nodes_searched() const { return accumulate(&Search::Worker::nodes); }
uint64_t ThreadPool::tb_hits() const { return accumulate(&Search::Worker::tbHits); } uint64_t ThreadPool::tb_hits() const { return accumulate(&Search::Worker::tbHits); }
@ -121,59 +136,107 @@ uint64_t ThreadPool::tb_hits() const { return accumulate(&Search::Worker::tbHits
// Creates/destroys threads to match the requested number. // Creates/destroys threads to match the requested number.
// Created and launched threads will immediately go to sleep in idle_loop. // Created and launched threads will immediately go to sleep in idle_loop.
// Upon resizing, threads are recreated to allow for binding if necessary. // Upon resizing, threads are recreated to allow for binding if necessary.
void ThreadPool::set(Search::SharedState sharedState, void ThreadPool::set(const NumaConfig& numaConfig,
Search::SharedState sharedState,
const Search::SearchManager::UpdateContext& updateContext) { const Search::SearchManager::UpdateContext& updateContext) {
if (threads.size() > 0) // destroy any existing thread(s) if (threads.size() > 0) // destroy any existing thread(s)
{ {
main_thread()->wait_for_search_finished(); main_thread()->wait_for_search_finished();
while (threads.size() > 0) threads.clear();
delete threads.back(), threads.pop_back();
boundThreadToNumaNode.clear();
} }
const size_t requested = sharedState.options["Threads"]; const size_t requested = sharedState.options["Threads"];
if (requested > 0) // create new thread(s) if (requested > 0) // create new thread(s)
{ {
auto manager = std::make_unique<Search::SearchManager>(updateContext); // Binding threads may be problematic when there's multiple NUMA nodes and
threads.push_back(new Thread(sharedState, std::move(manager), 0)); // multiple Stockfish instances running. In particular, if each instance
// runs a single thread then they would all be mapped to the first NUMA node.
// This is undesirable, and so the default behaviour (i.e. when the user does not
// change the NumaConfig UCI setting) is to not bind the threads to processors
// unless we know for sure that we span NUMA nodes and replication is required.
const std::string numaPolicy(sharedState.options["NumaPolicy"]);
const bool doBindThreads = [&]() {
if (numaPolicy == "none")
return false;
if (numaPolicy == "auto")
return numaConfig.suggests_binding_threads(requested);
// numaPolicy == "system", or explicitly set by the user
return true;
}();
boundThreadToNumaNode = doBindThreads
? numaConfig.distribute_threads_among_numa_nodes(requested)
: std::vector<NumaIndex>{};
while (threads.size() < requested) while (threads.size() < requested)
{ {
auto null_manager = std::make_unique<Search::NullSearchManager>(); const size_t threadId = threads.size();
threads.push_back(new Thread(sharedState, std::move(null_manager), threads.size())); const NumaIndex numaId = doBindThreads ? boundThreadToNumaNode[threadId] : 0;
auto manager = threadId == 0 ? std::unique_ptr<Search::ISearchManager>(
std::make_unique<Search::SearchManager>(updateContext))
: std::make_unique<Search::NullSearchManager>();
// When not binding threads we want to force all access to happen
// from the same NUMA node, because in case of NUMA replicated memory
// accesses we don't want to trash cache in case the threads get scheduled
// on the same NUMA node.
auto binder = doBindThreads ? OptionalThreadToNumaNodeBinder(numaConfig, numaId)
: OptionalThreadToNumaNodeBinder(numaId);
threads.emplace_back(
std::make_unique<Thread>(sharedState, std::move(manager), threadId, binder));
} }
clear(); clear();
main_thread()->wait_for_search_finished(); main_thread()->wait_for_search_finished();
// Reallocate the hash with the new threadpool size
sharedState.tt.resize(sharedState.options["Hash"], requested);
} }
} }
// Sets threadPool data to initial values // Sets threadPool data to initial values
void ThreadPool::clear() { void ThreadPool::clear() {
for (Thread* th : threads)
th->worker->clear();
if (threads.size() == 0) if (threads.size() == 0)
return; return;
main_manager()->callsCnt = 0; for (auto&& th : threads)
main_manager()->bestPreviousScore = VALUE_INFINITE; th->clear_worker();
for (auto&& th : threads)
th->wait_for_search_finished();
// These two affect the time taken on the first move of a game:
main_manager()->bestPreviousAverageScore = VALUE_INFINITE; main_manager()->bestPreviousAverageScore = VALUE_INFINITE;
main_manager()->previousTimeReduction = 1.0; main_manager()->previousTimeReduction = 0.85;
main_manager()->callsCnt = 0;
main_manager()->bestPreviousScore = VALUE_INFINITE;
main_manager()->originalTimeAdjust = -1;
main_manager()->tm.clear(); main_manager()->tm.clear();
} }
void ThreadPool::run_on_thread(size_t threadId, std::function<void()> f) {
assert(threads.size() > threadId);
threads[threadId]->run_custom_job(std::move(f));
}
// Wakes up main thread waiting in idle_loop() and void ThreadPool::wait_on_thread(size_t threadId) {
// returns immediately. Main thread will wake up other threads and start the search. assert(threads.size() > threadId);
threads[threadId]->wait_for_search_finished();
}
size_t ThreadPool::num_threads() const { return threads.size(); }
// Wakes up main thread waiting in idle_loop() and returns immediately.
// Main thread will wake up other threads and start the search.
void ThreadPool::start_thinking(const OptionsMap& options, void ThreadPool::start_thinking(const OptionsMap& options,
Position& pos, Position& pos,
StateListPtr& states, StateListPtr& states,
@ -213,33 +276,38 @@ void ThreadPool::start_thinking(const OptionsMap& options,
// We use Position::set() to set root position across threads. But there are // We use Position::set() to set root position across threads. But there are
// some StateInfo fields (previous, pliesFromNull, capturedPiece) that cannot // some StateInfo fields (previous, pliesFromNull, capturedPiece) that cannot
// be deduced from a fen string, so set() clears them and they are set from // be deduced from a fen string, so set() clears them and they are set from
// setupStates->back() later. The rootState is per thread, earlier states are shared // setupStates->back() later. The rootState is per thread, earlier states are
// since they are read-only. // shared since they are read-only.
for (Thread* th : threads) for (auto&& th : threads)
{ {
th->worker->limits = limits; th->run_custom_job([&]() {
th->worker->nodes = th->worker->tbHits = th->worker->nmpMinPly = th->worker->limits = limits;
th->worker->bestMoveChanges = 0; th->worker->nodes = th->worker->tbHits = th->worker->nmpMinPly =
th->worker->rootDepth = th->worker->completedDepth = 0; th->worker->bestMoveChanges = 0;
th->worker->rootMoves = rootMoves; th->worker->rootDepth = th->worker->completedDepth = 0;
th->worker->rootPos.set(pos.fen(), pos.is_chess960(), &th->worker->rootState); th->worker->rootMoves = rootMoves;
th->worker->rootState = setupStates->back(); th->worker->rootPos.set(pos.fen(), pos.is_chess960(), &th->worker->rootState);
th->worker->tbConfig = tbConfig; th->worker->rootState = setupStates->back();
th->worker->tbConfig = tbConfig;
});
} }
for (auto&& th : threads)
th->wait_for_search_finished();
main_thread()->start_searching(); main_thread()->start_searching();
} }
Thread* ThreadPool::get_best_thread() const { Thread* ThreadPool::get_best_thread() const {
Thread* bestThread = threads.front(); Thread* bestThread = threads.front().get();
Value minScore = VALUE_NONE; Value minScore = VALUE_NONE;
std::unordered_map<Move, int64_t, Move::MoveHash> votes( std::unordered_map<Move, int64_t, Move::MoveHash> votes(
2 * std::min(size(), bestThread->worker->rootMoves.size())); 2 * std::min(size(), bestThread->worker->rootMoves.size()));
// Find the minimum score of all threads // Find the minimum score of all threads
for (Thread* th : threads) for (auto&& th : threads)
minScore = std::min(minScore, th->worker->rootMoves[0].score); minScore = std::min(minScore, th->worker->rootMoves[0].score);
// Vote according to score and depth, and select the best thread // Vote according to score and depth, and select the best thread
@ -247,10 +315,10 @@ Thread* ThreadPool::get_best_thread() const {
return (th->worker->rootMoves[0].score - minScore + 14) * int(th->worker->completedDepth); return (th->worker->rootMoves[0].score - minScore + 14) * int(th->worker->completedDepth);
}; };
for (Thread* th : threads) for (auto&& th : threads)
votes[th->worker->rootMoves[0].pv[0]] += thread_voting_value(th); votes[th->worker->rootMoves[0].pv[0]] += thread_voting_value(th.get());
for (Thread* th : threads) for (auto&& th : threads)
{ {
const auto bestThreadScore = bestThread->worker->rootMoves[0].score; const auto bestThreadScore = bestThread->worker->rootMoves[0].score;
const auto newThreadScore = th->worker->rootMoves[0].score; const auto newThreadScore = th->worker->rootMoves[0].score;
@ -261,59 +329,82 @@ Thread* ThreadPool::get_best_thread() const {
const auto bestThreadMoveVote = votes[bestThreadPV[0]]; const auto bestThreadMoveVote = votes[bestThreadPV[0]];
const auto newThreadMoveVote = votes[newThreadPV[0]]; const auto newThreadMoveVote = votes[newThreadPV[0]];
const bool bestThreadInProvenWin = bestThreadScore >= VALUE_TB_WIN_IN_MAX_PLY; const bool bestThreadInProvenWin = is_win(bestThreadScore);
const bool newThreadInProvenWin = newThreadScore >= VALUE_TB_WIN_IN_MAX_PLY; const bool newThreadInProvenWin = is_win(newThreadScore);
const bool bestThreadInProvenLoss = const bool bestThreadInProvenLoss =
bestThreadScore != -VALUE_INFINITE && bestThreadScore <= VALUE_TB_LOSS_IN_MAX_PLY; bestThreadScore != -VALUE_INFINITE && is_loss(bestThreadScore);
const bool newThreadInProvenLoss = const bool newThreadInProvenLoss =
newThreadScore != -VALUE_INFINITE && newThreadScore <= VALUE_TB_LOSS_IN_MAX_PLY; newThreadScore != -VALUE_INFINITE && is_loss(newThreadScore);
// Note that we make sure not to pick a thread with truncated-PV for better viewer experience. // We make sure not to pick a thread with truncated principal variation
const bool betterVotingValue = const bool betterVotingValue =
thread_voting_value(th) * int(newThreadPV.size() > 2) thread_voting_value(th.get()) * int(newThreadPV.size() > 2)
> thread_voting_value(bestThread) * int(bestThreadPV.size() > 2); > thread_voting_value(bestThread) * int(bestThreadPV.size() > 2);
if (bestThreadInProvenWin) if (bestThreadInProvenWin)
{ {
// Make sure we pick the shortest mate / TB conversion // Make sure we pick the shortest mate / TB conversion
if (newThreadScore > bestThreadScore) if (newThreadScore > bestThreadScore)
bestThread = th; bestThread = th.get();
} }
else if (bestThreadInProvenLoss) else if (bestThreadInProvenLoss)
{ {
// Make sure we pick the shortest mated / TB conversion // Make sure we pick the shortest mated / TB conversion
if (newThreadInProvenLoss && newThreadScore < bestThreadScore) if (newThreadInProvenLoss && newThreadScore < bestThreadScore)
bestThread = th; bestThread = th.get();
} }
else if (newThreadInProvenWin || newThreadInProvenLoss else if (newThreadInProvenWin || newThreadInProvenLoss
|| (newThreadScore > VALUE_TB_LOSS_IN_MAX_PLY || (!is_loss(newThreadScore)
&& (newThreadMoveVote > bestThreadMoveVote && (newThreadMoveVote > bestThreadMoveVote
|| (newThreadMoveVote == bestThreadMoveVote && betterVotingValue)))) || (newThreadMoveVote == bestThreadMoveVote && betterVotingValue))))
bestThread = th; bestThread = th.get();
} }
return bestThread; return bestThread;
} }
// Start non-main threads // Start non-main threads.
// Will be invoked by main thread after it has started searching // Will be invoked by main thread after it has started searching.
void ThreadPool::start_searching() { void ThreadPool::start_searching() {
for (Thread* th : threads) for (auto&& th : threads)
if (th != threads.front()) if (th != threads.front())
th->start_searching(); th->start_searching();
} }
// Wait for non-main threads // Wait for non-main threads
void ThreadPool::wait_for_search_finished() const { void ThreadPool::wait_for_search_finished() const {
for (Thread* th : threads) for (auto&& th : threads)
if (th != threads.front()) if (th != threads.front())
th->wait_for_search_finished(); th->wait_for_search_finished();
} }
std::vector<size_t> ThreadPool::get_bound_thread_count_by_numa_node() const {
std::vector<size_t> counts;
if (!boundThreadToNumaNode.empty())
{
NumaIndex highestNumaNode = 0;
for (NumaIndex n : boundThreadToNumaNode)
if (n > highestNumaNode)
highestNumaNode = n;
counts.resize(highestNumaNode + 1, 0);
for (NumaIndex n : boundThreadToNumaNode)
counts[n] += 1;
}
return counts;
}
void ThreadPool::ensure_network_replicated() {
for (auto&& th : threads)
th->ensure_network_replicated();
}
} // namespace Stockfish } // namespace Stockfish

View file

@ -23,10 +23,12 @@
#include <condition_variable> #include <condition_variable>
#include <cstddef> #include <cstddef>
#include <cstdint> #include <cstdint>
#include <functional>
#include <memory> #include <memory>
#include <mutex> #include <mutex>
#include <vector> #include <vector>
#include "numa.h"
#include "position.h" #include "position.h"
#include "search.h" #include "search.h"
#include "thread_win32_osx.h" #include "thread_win32_osx.h"
@ -37,6 +39,32 @@ namespace Stockfish {
class OptionsMap; class OptionsMap;
using Value = int; using Value = int;
// Sometimes we don't want to actually bind the threads, but the recipient still
// needs to think it runs on *some* NUMA node, such that it can access structures
// that rely on NUMA node knowledge. This class encapsulates this optional process
// such that the recipient does not need to know whether the binding happened or not.
class OptionalThreadToNumaNodeBinder {
public:
OptionalThreadToNumaNodeBinder(NumaIndex n) :
numaConfig(nullptr),
numaId(n) {}
OptionalThreadToNumaNodeBinder(const NumaConfig& cfg, NumaIndex n) :
numaConfig(&cfg),
numaId(n) {}
NumaReplicatedAccessToken operator()() const {
if (numaConfig != nullptr)
return numaConfig->bind_current_thread_to_numa_node(numaId);
else
return NumaReplicatedAccessToken(numaId);
}
private:
const NumaConfig* numaConfig;
NumaIndex numaId;
};
// Abstraction of a thread. It contains a pointer to the worker and a native thread. // Abstraction of a thread. It contains a pointer to the worker and a native thread.
// After construction, the native thread is started with idle_loop() // After construction, the native thread is started with idle_loop()
// waiting for a signal to start searching. // waiting for a signal to start searching.
@ -44,22 +72,37 @@ using Value = int;
// the search is finished, it goes back to idle_loop() waiting for a new signal. // the search is finished, it goes back to idle_loop() waiting for a new signal.
class Thread { class Thread {
public: public:
Thread(Search::SharedState&, std::unique_ptr<Search::ISearchManager>, size_t); Thread(Search::SharedState&,
std::unique_ptr<Search::ISearchManager>,
size_t,
OptionalThreadToNumaNodeBinder);
virtual ~Thread(); virtual ~Thread();
void idle_loop(); void idle_loop();
void start_searching(); void start_searching();
void clear_worker();
void run_custom_job(std::function<void()> f);
void ensure_network_replicated();
// Thread has been slightly altered to allow running custom jobs, so
// this name is no longer correct. However, this class (and ThreadPool)
// require further work to make them properly generic while maintaining
// appropriate specificity regarding search, from the point of view of an
// outside user, so renaming of this function is left for whenever that happens.
void wait_for_search_finished(); void wait_for_search_finished();
size_t id() const { return idx; } size_t id() const { return idx; }
std::unique_ptr<Search::Worker> worker; std::unique_ptr<Search::Worker> worker;
std::function<void()> jobFunc;
private: private:
std::mutex mutex; std::mutex mutex;
std::condition_variable cv; std::condition_variable cv;
size_t idx, nthreads; size_t idx, nthreads;
bool exit = false, searching = true; // Set before starting std::thread bool exit = false, searching = true; // Set before starting std::thread
NativeThread stdThread; NativeThread stdThread;
NumaReplicatedAccessToken numaAccessToken;
}; };
@ -67,31 +110,46 @@ class Thread {
// parking and, most importantly, launching a thread. All the access to threads // parking and, most importantly, launching a thread. All the access to threads
// is done through this class. // is done through this class.
class ThreadPool { class ThreadPool {
public: public:
ThreadPool() {}
~ThreadPool() { ~ThreadPool() {
// destroy any existing thread(s) // destroy any existing thread(s)
if (threads.size() > 0) if (threads.size() > 0)
{ {
main_thread()->wait_for_search_finished(); main_thread()->wait_for_search_finished();
while (threads.size() > 0) threads.clear();
delete threads.back(), threads.pop_back();
} }
} }
void start_thinking(const OptionsMap&, Position&, StateListPtr&, Search::LimitsType); ThreadPool(const ThreadPool&) = delete;
void clear(); ThreadPool(ThreadPool&&) = delete;
void set(Search::SharedState, const Search::SearchManager::UpdateContext&);
ThreadPool& operator=(const ThreadPool&) = delete;
ThreadPool& operator=(ThreadPool&&) = delete;
void start_thinking(const OptionsMap&, Position&, StateListPtr&, Search::LimitsType);
void run_on_thread(size_t threadId, std::function<void()> f);
void wait_on_thread(size_t threadId);
size_t num_threads() const;
void clear();
void set(const NumaConfig& numaConfig,
Search::SharedState,
const Search::SearchManager::UpdateContext&);
Search::SearchManager* main_manager(); Search::SearchManager* main_manager();
Thread* main_thread() const { return threads.front(); } Thread* main_thread() const { return threads.front().get(); }
uint64_t nodes_searched() const; uint64_t nodes_searched() const;
uint64_t tb_hits() const; uint64_t tb_hits() const;
Thread* get_best_thread() const; Thread* get_best_thread() const;
void start_searching(); void start_searching();
void wait_for_search_finished() const; void wait_for_search_finished() const;
std::vector<size_t> get_bound_thread_count_by_numa_node() const;
void ensure_network_replicated();
std::atomic_bool stop, abortedSearch, increaseDepth; std::atomic_bool stop, abortedSearch, increaseDepth;
auto cbegin() const noexcept { return threads.cbegin(); } auto cbegin() const noexcept { return threads.cbegin(); }
@ -102,13 +160,14 @@ class ThreadPool {
auto empty() const noexcept { return threads.empty(); } auto empty() const noexcept { return threads.empty(); }
private: private:
StateListPtr setupStates; StateListPtr setupStates;
std::vector<Thread*> threads; std::vector<std::unique_ptr<Thread>> threads;
std::vector<NumaIndex> boundThreadToNumaNode;
uint64_t accumulate(std::atomic<uint64_t> Search::Worker::*member) const { uint64_t accumulate(std::atomic<uint64_t> Search::Worker::*member) const {
uint64_t sum = 0; uint64_t sum = 0;
for (Thread* th : threads) for (auto&& th : threads)
sum += (th->worker.get()->*member).load(std::memory_order_relaxed); sum += (th->worker.get()->*member).load(std::memory_order_relaxed);
return sum; return sum;
} }

View file

@ -32,12 +32,12 @@ TimePoint TimeManagement::optimum() const { return optimumTime; }
TimePoint TimeManagement::maximum() const { return maximumTime; } TimePoint TimeManagement::maximum() const { return maximumTime; }
void TimeManagement::clear() { void TimeManagement::clear() {
availableNodes = 0; // When in 'nodes as time' mode availableNodes = -1; // When in 'nodes as time' mode
} }
void TimeManagement::advance_nodes_time(std::int64_t nodes) { void TimeManagement::advance_nodes_time(std::int64_t nodes) {
assert(useNodesTime); assert(useNodesTime);
availableNodes += nodes; availableNodes = std::max(int64_t(0), availableNodes - nodes);
} }
// Called at the beginning of the search and calculates // Called at the beginning of the search and calculates
@ -47,15 +47,19 @@ void TimeManagement::advance_nodes_time(std::int64_t nodes) {
void TimeManagement::init(Search::LimitsType& limits, void TimeManagement::init(Search::LimitsType& limits,
Color us, Color us,
int ply, int ply,
const OptionsMap& options) { const OptionsMap& options,
// If we have no time, no need to initialize TM, except for the start time, double& originalTimeAdjust) {
// which is used by movetime. TimePoint npmsec = TimePoint(options["nodestime"]);
startTime = limits.startTime;
// If we have no time, we don't need to fully initialize TM.
// startTime is used by movetime and useNodesTime is used in elapsed calls.
startTime = limits.startTime;
useNodesTime = npmsec != 0;
if (limits.time[us] == 0) if (limits.time[us] == 0)
return; return;
TimePoint moveOverhead = TimePoint(options["Move Overhead"]); TimePoint moveOverhead = TimePoint(options["Move Overhead"]);
TimePoint npmsec = TimePoint(options["nodestime"]);
// optScale is a percentage of available time to use for the current move. // optScale is a percentage of available time to use for the current move.
// maxScale is a multiplier applied to optimumTime. // maxScale is a multiplier applied to optimumTime.
@ -65,26 +69,31 @@ void TimeManagement::init(Search::LimitsType& limits,
// to nodes, and use resulting values in time management formulas. // to nodes, and use resulting values in time management formulas.
// WARNING: to avoid time losses, the given npmsec (nodes per millisecond) // WARNING: to avoid time losses, the given npmsec (nodes per millisecond)
// must be much lower than the real engine speed. // must be much lower than the real engine speed.
if (npmsec) if (useNodesTime)
{ {
useNodesTime = true; if (availableNodes == -1) // Only once at game start
if (!availableNodes) // Only once at game start
availableNodes = npmsec * limits.time[us]; // Time is in msec availableNodes = npmsec * limits.time[us]; // Time is in msec
// Convert from milliseconds to nodes // Convert from milliseconds to nodes
limits.time[us] = TimePoint(availableNodes); limits.time[us] = TimePoint(availableNodes);
limits.inc[us] *= npmsec; limits.inc[us] *= npmsec;
limits.npmsec = npmsec; limits.npmsec = npmsec;
moveOverhead *= npmsec;
} }
// These numbers are used where multiplications, divisions or comparisons
// with constants are involved.
const int64_t scaleFactor = useNodesTime ? npmsec : 1;
const TimePoint scaledTime = limits.time[us] / scaleFactor;
const TimePoint scaledInc = limits.inc[us] / scaleFactor;
// Maximum move horizon of 50 moves // Maximum move horizon of 50 moves
int mtg = limits.movestogo ? std::min(limits.movestogo, 50) : 50; int mtg = limits.movestogo ? std::min(limits.movestogo, 50) : 50;
// if less than one second, gradually reduce mtg // If less than one second, gradually reduce mtg
if (limits.time[us] < 1000 && (double(mtg) / limits.time[us] > 0.05)) if (scaledTime < 1000 && double(mtg) / scaledInc > 0.05)
{ {
mtg = limits.time[us] * 0.05; mtg = scaledTime * 0.05;
} }
// Make sure timeLeft is > 0 since we may use it as a divisor // Make sure timeLeft is > 0 since we may use it as a divisor
@ -96,24 +105,26 @@ void TimeManagement::init(Search::LimitsType& limits,
// game time for the current move, so also cap to a percentage of available game time. // game time for the current move, so also cap to a percentage of available game time.
if (limits.movestogo == 0) if (limits.movestogo == 0)
{ {
// Use extra time with larger increments // Extra time according to timeLeft
double optExtra = limits.inc[us] < 500 ? 1.0 : 1.13; if (originalTimeAdjust < 0)
originalTimeAdjust = 0.3285 * std::log10(timeLeft) - 0.4830;
// Calculate time constants based on current time left. // Calculate time constants based on current time left.
double optConstant = double logTimeInSec = std::log10(scaledTime / 1000.0);
std::min(0.00308 + 0.000319 * std::log10(limits.time[us] / 1000.0), 0.00506); double optConstant = std::min(0.00308 + 0.000319 * logTimeInSec, 0.00506);
double maxConstant = std::max(3.39 + 3.01 * std::log10(limits.time[us] / 1000.0), 2.93); double maxConstant = std::max(3.39 + 3.01 * logTimeInSec, 2.93);
optScale = std::min(0.0122 + std::pow(ply + 2.95, 0.462) * optConstant, optScale = std::min(0.0122 + std::pow(ply + 2.95, 0.462) * optConstant,
0.213 * limits.time[us] / double(timeLeft)) 0.213 * limits.time[us] / timeLeft)
* optExtra; * originalTimeAdjust;
maxScale = std::min(6.64, maxConstant + ply / 12.0); maxScale = std::min(6.64, maxConstant + ply / 12.0);
} }
// x moves in y seconds (+ z increment) // x moves in y seconds (+ z increment)
else else
{ {
optScale = std::min((0.88 + ply / 116.4) / mtg, 0.88 * limits.time[us] / double(timeLeft)); optScale = std::min((0.88 + ply / 116.4) / mtg, 0.88 * limits.time[us] / timeLeft);
maxScale = std::min(6.3, 1.5 + 0.11 * mtg); maxScale = std::min(6.3, 1.5 + 0.11 * mtg);
} }

View file

@ -36,14 +36,19 @@ struct LimitsType;
// the maximum available time, the game move number, and other parameters. // the maximum available time, the game move number, and other parameters.
class TimeManagement { class TimeManagement {
public: public:
void init(Search::LimitsType& limits, Color us, int ply, const OptionsMap& options); void init(Search::LimitsType& limits,
Color us,
int ply,
const OptionsMap& options,
double& originalTimeAdjust);
TimePoint optimum() const; TimePoint optimum() const;
TimePoint maximum() const; TimePoint maximum() const;
template<typename FUNC> template<typename FUNC>
TimePoint elapsed(FUNC nodes) const { TimePoint elapsed(FUNC nodes) const {
return useNodesTime ? TimePoint(nodes()) : now() - startTime; return useNodesTime ? TimePoint(nodes()) : elapsed_time();
} }
TimePoint elapsed_time() const { return now() - startTime; };
void clear(); void clear();
void advance_nodes_time(std::int64_t nodes); void advance_nodes_time(std::int64_t nodes);
@ -53,7 +58,7 @@ class TimeManagement {
TimePoint optimumTime; TimePoint optimumTime;
TimePoint maximumTime; TimePoint maximumTime;
std::int64_t availableNodes = 0; // When in 'nodes as time' mode std::int64_t availableNodes = -1; // When in 'nodes as time' mode
bool useNodesTime = false; // True if we are in 'nodes as time' mode bool useNodesTime = false; // True if we are in 'nodes as time' mode
}; };

View file

@ -23,31 +23,89 @@
#include <cstdlib> #include <cstdlib>
#include <cstring> #include <cstring>
#include <iostream> #include <iostream>
#include <thread>
#include <vector>
#include "memory.h"
#include "misc.h" #include "misc.h"
#include "syzygy/tbprobe.h"
#include "thread.h"
namespace Stockfish { namespace Stockfish {
// TTEntry struct is the 10 bytes transposition table entry, defined as below:
//
// key 16 bit
// depth 8 bit
// generation 5 bit
// pv node 1 bit
// bound type 2 bit
// move 16 bit
// value 16 bit
// evaluation 16 bit
//
// These fields are in the same order as accessed by TT::probe(), since memory is fastest sequentially.
// Equally, the store order in save() matches this order.
struct TTEntry {
// Convert internal bitfields to external types
TTData read() const {
return TTData{Move(move16), Value(value16),
Value(eval16), Depth(depth8 + DEPTH_ENTRY_OFFSET),
Bound(genBound8 & 0x3), bool(genBound8 & 0x4)};
}
bool is_occupied() const;
void save(Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev, uint8_t generation8);
// The returned age is a multiple of TranspositionTable::GENERATION_DELTA
uint8_t relative_age(const uint8_t generation8) const;
private:
friend class TranspositionTable;
uint16_t key16;
uint8_t depth8;
uint8_t genBound8;
Move move16;
int16_t value16;
int16_t eval16;
};
// `genBound8` is where most of the details are. We use the following constants to manipulate 5 leading generation bits
// and 3 trailing miscellaneous bits.
// These bits are reserved for other things.
static constexpr unsigned GENERATION_BITS = 3;
// increment for generation field
static constexpr int GENERATION_DELTA = (1 << GENERATION_BITS);
// cycle length
static constexpr int GENERATION_CYCLE = 255 + GENERATION_DELTA;
// mask to pull out generation number
static constexpr int GENERATION_MASK = (0xFF << GENERATION_BITS) & 0xFF;
// DEPTH_ENTRY_OFFSET exists because 1) we use `bool(depth8)` as the occupancy check, but
// 2) we need to store negative depths for QS. (`depth8` is the only field with "spare bits":
// we sacrifice the ability to store depths greater than 1<<8 less the offset, as asserted in `save`.)
bool TTEntry::is_occupied() const { return bool(depth8); }
// Populates the TTEntry with a new node's data, possibly // Populates the TTEntry with a new node's data, possibly
// overwriting an old position. The update is not atomic and can be racy. // overwriting an old position. The update is not atomic and can be racy.
void TTEntry::save( void TTEntry::save(
Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev, uint8_t generation8) { Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev, uint8_t generation8) {
// Preserve any existing move for the same position // Preserve the old ttmove if we don't have a new one
if (m || uint16_t(k) != key16) if (m || uint16_t(k) != key16)
move16 = m; move16 = m;
// Overwrite less valuable entries (cheapest checks first) // Overwrite less valuable entries (cheapest checks first)
if (b == BOUND_EXACT || uint16_t(k) != key16 || d - DEPTH_OFFSET + 2 * pv > depth8 - 4 if (b == BOUND_EXACT || uint16_t(k) != key16 || d - DEPTH_ENTRY_OFFSET + 2 * pv > depth8 - 4
|| relative_age(generation8)) || relative_age(generation8))
{ {
assert(d > DEPTH_OFFSET); assert(d > DEPTH_ENTRY_OFFSET);
assert(d < 256 + DEPTH_OFFSET); assert(d < 256 + DEPTH_ENTRY_OFFSET);
key16 = uint16_t(k); key16 = uint16_t(k);
depth8 = uint8_t(d - DEPTH_OFFSET); depth8 = uint8_t(d - DEPTH_ENTRY_OFFSET);
genBound8 = uint8_t(generation8 | uint8_t(pv) << 2 | b); genBound8 = uint8_t(generation8 | uint8_t(pv) << 2 | b);
value16 = int16_t(v); value16 = int16_t(v);
eval16 = int16_t(ev); eval16 = int16_t(ev);
@ -61,70 +119,117 @@ uint8_t TTEntry::relative_age(const uint8_t generation8) const {
// is needed to keep the unrelated lowest n bits from affecting // is needed to keep the unrelated lowest n bits from affecting
// the result) to calculate the entry age correctly even after // the result) to calculate the entry age correctly even after
// generation8 overflows into the next cycle. // generation8 overflows into the next cycle.
return (GENERATION_CYCLE + generation8 - genBound8) & GENERATION_MASK;
return (TranspositionTable::GENERATION_CYCLE + generation8 - genBound8)
& TranspositionTable::GENERATION_MASK;
} }
// TTWriter is but a very thin wrapper around the pointer
TTWriter::TTWriter(TTEntry* tte) :
entry(tte) {}
void TTWriter::write(
Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev, uint8_t generation8) {
entry->save(k, v, pv, b, d, m, ev, generation8);
}
// A TranspositionTable is an array of Cluster, of size clusterCount. Each cluster consists of ClusterSize number
// of TTEntry. Each non-empty TTEntry contains information on exactly one position. The size of a Cluster should
// divide the size of a cache line for best performance, as the cacheline is prefetched when possible.
static constexpr int ClusterSize = 3;
struct Cluster {
TTEntry entry[ClusterSize];
char padding[2]; // Pad to 32 bytes
};
static_assert(sizeof(Cluster) == 32, "Suboptimal Cluster size");
// Sets the size of the transposition table, // Sets the size of the transposition table,
// measured in megabytes. Transposition table consists // measured in megabytes. Transposition table consists
// of clusters and each cluster consists of ClusterSize number of TTEntry. // of clusters and each cluster consists of ClusterSize number of TTEntry.
void TranspositionTable::resize(size_t mbSize, int threadCount) { void TranspositionTable::resize(size_t mbSize, ThreadPool& threads) {
aligned_large_pages_free(table); aligned_large_pages_free(table);
clusterCount = mbSize * 1024 * 1024 / sizeof(Cluster); clusterCount = mbSize * 1024 * 1024 / sizeof(Cluster);
table = static_cast<Cluster*>(aligned_large_pages_alloc(clusterCount * sizeof(Cluster))); table = static_cast<Cluster*>(aligned_large_pages_alloc(clusterCount * sizeof(Cluster)));
if (!table) if (!table)
{ {
std::cerr << "Failed to allocate " << mbSize << "MB for transposition table." << std::endl; std::cerr << "Failed to allocate " << mbSize << "MB for transposition table." << std::endl;
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
clear(threadCount); clear(threads);
} }
// Initializes the entire transposition table to zero, // Initializes the entire transposition table to zero,
// in a multi-threaded way. // in a multi-threaded way.
void TranspositionTable::clear(size_t threadCount) { void TranspositionTable::clear(ThreadPool& threads) {
std::vector<std::thread> threads; generation8 = 0;
const size_t threadCount = threads.num_threads();
for (size_t idx = 0; idx < size_t(threadCount); ++idx) for (size_t i = 0; i < threadCount; ++i)
{ {
threads.emplace_back([this, idx, threadCount]() { threads.run_on_thread(i, [this, i, threadCount]() {
// Thread binding gives faster search on systems with a first-touch policy
if (threadCount > 8)
WinProcGroup::bind_this_thread(idx);
// Each thread will zero its part of the hash table // Each thread will zero its part of the hash table
const size_t stride = size_t(clusterCount / threadCount), start = size_t(stride * idx), const size_t stride = clusterCount / threadCount;
len = idx != size_t(threadCount) - 1 ? stride : clusterCount - start; const size_t start = stride * i;
const size_t len = i + 1 != threadCount ? stride : clusterCount - start;
std::memset(&table[start], 0, len * sizeof(Cluster)); std::memset(&table[start], 0, len * sizeof(Cluster));
}); });
} }
for (std::thread& th : threads) for (size_t i = 0; i < threadCount; ++i)
th.join(); threads.wait_on_thread(i);
} }
// Returns an approximation of the hashtable
// occupation during a search. The hash is x permill full, as per UCI protocol.
// Only counts entries which match the current generation.
int TranspositionTable::hashfull(int maxAge) const {
int maxAgeInternal = maxAge << GENERATION_BITS;
int cnt = 0;
for (int i = 0; i < 1000; ++i)
for (int j = 0; j < ClusterSize; ++j)
cnt += table[i].entry[j].is_occupied()
&& table[i].entry[j].relative_age(generation8) <= maxAgeInternal;
return cnt / ClusterSize;
}
void TranspositionTable::new_search() {
// increment by delta to keep lower bits as is
generation8 += GENERATION_DELTA;
}
uint8_t TranspositionTable::generation() const { return generation8; }
// Looks up the current position in the transposition // Looks up the current position in the transposition
// table. It returns true and a pointer to the TTEntry if the position is found. // table. It returns true if the position is found.
// Otherwise, it returns false and a pointer to an empty or least valuable TTEntry // Otherwise, it returns false and a pointer to an empty or least valuable TTEntry
// to be replaced later. The replace value of an entry is calculated as its depth // to be replaced later. The replace value of an entry is calculated as its depth
// minus 8 times its relative age. TTEntry t1 is considered more valuable than // minus 8 times its relative age. TTEntry t1 is considered more valuable than
// TTEntry t2 if its replace value is greater than that of t2. // TTEntry t2 if its replace value is greater than that of t2.
TTEntry* TranspositionTable::probe(const Key key, bool& found) const { std::tuple<bool, TTData, TTWriter> TranspositionTable::probe(const Key key) const {
TTEntry* const tte = first_entry(key); TTEntry* const tte = first_entry(key);
const uint16_t key16 = uint16_t(key); // Use the low 16 bits as key inside the cluster const uint16_t key16 = uint16_t(key); // Use the low 16 bits as key inside the cluster
for (int i = 0; i < ClusterSize; ++i) for (int i = 0; i < ClusterSize; ++i)
if (tte[i].key16 == key16 || !tte[i].depth8) if (tte[i].key16 == key16)
return found = bool(tte[i].depth8), &tte[i]; // This gap is the main place for read races.
// After `read()` completes that copy is final, but may be self-inconsistent.
return {tte[i].is_occupied(), tte[i].read(), TTWriter(&tte[i])};
// Find an entry to be replaced according to the replacement strategy // Find an entry to be replaced according to the replacement strategy
TTEntry* replace = tte; TTEntry* replace = tte;
@ -133,22 +238,12 @@ TTEntry* TranspositionTable::probe(const Key key, bool& found) const {
> tte[i].depth8 - tte[i].relative_age(generation8) * 2) > tte[i].depth8 - tte[i].relative_age(generation8) * 2)
replace = &tte[i]; replace = &tte[i];
return found = false, replace; return {false, TTData(), TTWriter(replace)};
} }
// Returns an approximation of the hashtable TTEntry* TranspositionTable::first_entry(const Key key) const {
// occupation during a search. The hash is x permill full, as per UCI protocol. return &table[mul_hi64(key, clusterCount)].entry[0];
// Only counts entries which match the current generation.
int TranspositionTable::hashfull() const {
int cnt = 0;
for (int i = 0; i < 1000; ++i)
for (int j = 0; j < ClusterSize; ++j)
cnt += table[i].entry[j].depth8
&& (table[i].entry[j].genBound8 & GENERATION_MASK) == generation8;
return cnt / ClusterSize;
} }
} // namespace Stockfish } // namespace Stockfish

119
src/tt.h
View file

@ -21,99 +21,76 @@
#include <cstddef> #include <cstddef>
#include <cstdint> #include <cstdint>
#include <tuple>
#include "misc.h" #include "memory.h"
#include "types.h" #include "types.h"
namespace Stockfish { namespace Stockfish {
// TTEntry struct is the 10 bytes transposition table entry, defined as below: class ThreadPool;
struct TTEntry;
struct Cluster;
// There is only one global hash table for the engine and all its threads. For chess in particular, we even allow racy
// updates between threads to and from the TT, as taking the time to synchronize access would cost thinking time and
// thus elo. As a hash table, collisions are possible and may cause chess playing issues (bizarre blunders, faulty mate
// reports, etc). Fixing these also loses elo; however such risk decreases quickly with larger TT size.
// //
// key 16 bit // `probe` is the primary method: given a board position, we lookup its entry in the table, and return a tuple of:
// depth 8 bit // 1) whether the entry already has this position
// generation 5 bit // 2) a copy of the prior data (if any) (may be inconsistent due to read races)
// pv node 1 bit // 3) a writer object to this entry
// bound type 2 bit // The copied data and the writer are separated to maintain clear boundaries between local vs global objects.
// move 16 bit
// value 16 bit
// eval value 16 bit
struct TTEntry {
Move move() const { return Move(move16); }
Value value() const { return Value(value16); }
Value eval() const { return Value(eval16); }
Depth depth() const { return Depth(depth8 + DEPTH_OFFSET); }
bool is_pv() const { return bool(genBound8 & 0x4); }
Bound bound() const { return Bound(genBound8 & 0x3); }
void save(Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev, uint8_t generation8);
// The returned age is a multiple of TranspositionTable::GENERATION_DELTA
uint8_t relative_age(const uint8_t generation8) const;
private: // A copy of the data already in the entry (possibly collided). `probe` may be racy, resulting in inconsistent data.
friend class TranspositionTable; struct TTData {
Move move;
uint16_t key16; Value value, eval;
uint8_t depth8; Depth depth;
uint8_t genBound8; Bound bound;
Move move16; bool is_pv;
int16_t value16; };
int16_t eval16;
// This is used to make racy writes to the global TT.
struct TTWriter {
public:
void write(Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev, uint8_t generation8);
private:
friend class TranspositionTable;
TTEntry* entry;
TTWriter(TTEntry* tte);
}; };
// A TranspositionTable is an array of Cluster, of size clusterCount. Each
// cluster consists of ClusterSize number of TTEntry. Each non-empty TTEntry
// contains information on exactly one position. The size of a Cluster should
// divide the size of a cache line for best performance, as the cacheline is
// prefetched when possible.
class TranspositionTable { class TranspositionTable {
static constexpr int ClusterSize = 3;
struct Cluster {
TTEntry entry[ClusterSize];
char padding[2]; // Pad to 32 bytes
};
static_assert(sizeof(Cluster) == 32, "Unexpected Cluster size");
// Constants used to refresh the hash table periodically
// We have 8 bits available where the lowest 3 bits are
// reserved for other things.
static constexpr unsigned GENERATION_BITS = 3;
// increment for generation field
static constexpr int GENERATION_DELTA = (1 << GENERATION_BITS);
// cycle length
static constexpr int GENERATION_CYCLE = 255 + GENERATION_DELTA;
// mask to pull out generation number
static constexpr int GENERATION_MASK = (0xFF << GENERATION_BITS) & 0xFF;
public: public:
~TranspositionTable() { aligned_large_pages_free(table); } ~TranspositionTable() { aligned_large_pages_free(table); }
void new_search() { void resize(size_t mbSize, ThreadPool& threads); // Set TT size
// increment by delta to keep lower bits as is void clear(ThreadPool& threads); // Re-initialize memory, multithreaded
generation8 += GENERATION_DELTA; int hashfull(int maxAge = 0)
} const; // Approximate what fraction of entries (permille) have been written to during this root search
TTEntry* probe(const Key key, bool& found) const; void
int hashfull() const; new_search(); // This must be called at the beginning of each root search to track entry aging
void resize(size_t mbSize, int threadCount); uint8_t generation() const; // The current age, used when writing new data to the TT
void clear(size_t threadCount); std::tuple<bool, TTData, TTWriter>
probe(const Key key) const; // The main method, whose retvals separate local vs global objects
TTEntry* first_entry(const Key key) const { TTEntry* first_entry(const Key key)
return &table[mul_hi64(key, clusterCount)].entry[0]; const; // This is the hash function; its only external use is memory prefetching.
}
uint8_t generation() const { return generation8; }
private: private:
friend struct TTEntry; friend struct TTEntry;
size_t clusterCount; size_t clusterCount;
Cluster* table = nullptr; Cluster* table = nullptr;
uint8_t generation8 = 0; // Size must be not bigger than TTEntry::genBound8
uint8_t generation8 = 0; // Size must be not bigger than TTEntry::genBound8
}; };
} // namespace Stockfish } // namespace Stockfish

View file

@ -21,6 +21,7 @@
#include <algorithm> #include <algorithm>
#include <iostream> #include <iostream>
#include <map> #include <map>
#include <optional>
#include <sstream> #include <sstream>
#include <string> #include <string>
@ -33,19 +34,19 @@ namespace Stockfish {
bool Tune::update_on_last; bool Tune::update_on_last;
const Option* LastOption = nullptr; const Option* LastOption = nullptr;
OptionsMap* Tune::options; OptionsMap* Tune::options;
namespace { namespace {
std::map<std::string, int> TuneResults; std::map<std::string, int> TuneResults;
void on_tune(const Option& o) { std::optional<std::string> on_tune(const Option& o) {
if (!Tune::update_on_last || LastOption == &o) if (!Tune::update_on_last || LastOption == &o)
Tune::read_options(); Tune::read_options();
return std::nullopt;
}
} }
void Tune::make_option(OptionsMap* opts, const string& n, int v, const SetRange& r) {
void make_option(OptionsMap* options, const string& n, int v, const SetRange& r) {
// Do not generate option when there is nothing to tune (ie. min = max) // Do not generate option when there is nothing to tune (ie. min = max)
if (r(v).first == r(v).second) if (r(v).first == r(v).second)
@ -54,15 +55,17 @@ void make_option(OptionsMap* options, const string& n, int v, const SetRange& r)
if (TuneResults.count(n)) if (TuneResults.count(n))
v = TuneResults[n]; v = TuneResults[n];
(*options)[n] << Option(v, r(v).first, r(v).second, on_tune); (*opts)[n] << Option(v, r(v).first, r(v).second, on_tune);
LastOption = &((*options)[n]); LastOption = &((*opts)[n]);
// Print formatted parameters, ready to be copy-pasted in Fishtest // Print formatted parameters, ready to be copy-pasted in Fishtest
std::cout << n << "," << v << "," << r(v).first << "," << r(v).second << "," std::cout << n << "," //
<< (r(v).second - r(v).first) / 20.0 << "," << v << "," //
<< r(v).first << "," //
<< r(v).second << "," //
<< (r(v).second - r(v).first) / 20.0 << "," //
<< "0.0020" << std::endl; << "0.0020" << std::endl;
} }
}
string Tune::next(string& names, bool pop) { string Tune::next(string& names, bool pop) {
@ -118,7 +121,6 @@ void Tune::Entry<Tune::PostUpdate>::read_option() {
namespace Stockfish { namespace Stockfish {
void Tune::read_results() { /* ...insert your values here... */ void Tune::read_results() { /* ...insert your values here... */ }
}
} // namespace Stockfish } // namespace Stockfish

View file

@ -145,6 +145,8 @@ class Tune {
return add(value, (next(names), std::move(names)), args...); return add(value, (next(names), std::move(names)), args...);
} }
static void make_option(OptionsMap* options, const std::string& n, int v, const SetRange& r);
std::vector<std::unique_ptr<EntryBase>> list; std::vector<std::unique_ptr<EntryBase>> list;
public: public:

View file

@ -137,9 +137,9 @@ enum Bound {
BOUND_EXACT = BOUND_UPPER | BOUND_LOWER BOUND_EXACT = BOUND_UPPER | BOUND_LOWER
}; };
// Value is used as an alias for int16_t, this is done to differentiate between // Value is used as an alias for int, this is done to differentiate between a search
// a search value and any other integer value. The values used in search are always // value and any other integer value. The values used in search are always supposed
// supposed to be in the range (-VALUE_NONE, VALUE_NONE] and should not exceed this range. // to be in the range (-VALUE_NONE, VALUE_NONE] and should not exceed this range.
using Value = int; using Value = int;
constexpr Value VALUE_ZERO = 0; constexpr Value VALUE_ZERO = 0;
@ -155,6 +155,21 @@ constexpr Value VALUE_TB = VALUE_MATE_IN_MAX_PLY - 1;
constexpr Value VALUE_TB_WIN_IN_MAX_PLY = VALUE_TB - MAX_PLY; constexpr Value VALUE_TB_WIN_IN_MAX_PLY = VALUE_TB - MAX_PLY;
constexpr Value VALUE_TB_LOSS_IN_MAX_PLY = -VALUE_TB_WIN_IN_MAX_PLY; constexpr Value VALUE_TB_LOSS_IN_MAX_PLY = -VALUE_TB_WIN_IN_MAX_PLY;
constexpr bool is_valid(Value value) { return value != VALUE_NONE; }
constexpr bool is_win(Value value) {
assert(is_valid(value));
return value >= VALUE_TB_WIN_IN_MAX_PLY;
}
constexpr bool is_loss(Value value) {
assert(is_valid(value));
return value <= VALUE_TB_LOSS_IN_MAX_PLY;
}
constexpr bool is_decisive(Value value) { return is_win(value) || is_loss(value); }
// In the code, we make the assumption that these values // In the code, we make the assumption that these values
// are such that non_pawn_material() can be used to uniquely // are such that non_pawn_material() can be used to uniquely
// identify the material on the board. // identify the material on the board.
@ -187,12 +202,21 @@ constexpr Value PieceValue[PIECE_NB] = {
using Depth = int; using Depth = int;
enum : int { enum : int {
DEPTH_QS_CHECKS = 0, // The following DEPTH_ constants are used for transposition table entries
DEPTH_QS_NO_CHECKS = -1, // and quiescence search move generation stages. In regular search, the
// depth stored in the transposition table is literal: the search depth
DEPTH_NONE = -6, // (effort) used to make the corresponding transposition table value. In
// quiescence search, however, the transposition table entries only store
DEPTH_OFFSET = -7 // value used only for TT entry occupancy check // the current quiescence move generation stage (which should thus compare
// lower than any regular search depth).
DEPTH_QS = 0,
// For transposition table entries where no searching at all was done
// (whether regular or qsearch) we use DEPTH_UNSEARCHED, which should thus
// compare lower than any quiescence or regular depth. DEPTH_ENTRY_OFFSET
// is used only for the transposition table entry occupancy check (see tt.cpp),
// and should thus be lower than DEPTH_UNSEARCHED.
DEPTH_UNSEARCHED = -2,
DEPTH_ENTRY_OFFSET = -3
}; };
// clang-format off // clang-format off
@ -357,9 +381,10 @@ enum MoveType {
// bit 14-15: special move flag: promotion (1), en passant (2), castling (3) // bit 14-15: special move flag: promotion (1), en passant (2), castling (3)
// NOTE: en passant bit is set only when a pawn can be captured // NOTE: en passant bit is set only when a pawn can be captured
// //
// Special cases are Move::none() and Move::null(). We can sneak these in because in // Special cases are Move::none() and Move::null(). We can sneak these in because
// any normal move destination square is always different from origin square // in any normal move the destination square and origin square are always different,
// while Move::none() and Move::null() have the same origin and destination square. // but Move::none() and Move::null() have the same origin and destination square.
class Move { class Move {
public: public:
Move() = default; Move() = default;

View file

@ -22,6 +22,7 @@
#include <cctype> #include <cctype>
#include <cmath> #include <cmath>
#include <cstdint> #include <cstdint>
#include <iterator>
#include <optional> #include <optional>
#include <sstream> #include <sstream>
#include <string_view> #include <string_view>
@ -30,20 +31,19 @@
#include "benchmark.h" #include "benchmark.h"
#include "engine.h" #include "engine.h"
#include "evaluate.h" #include "memory.h"
#include "movegen.h" #include "movegen.h"
#include "position.h" #include "position.h"
#include "score.h" #include "score.h"
#include "search.h" #include "search.h"
#include "syzygy/tbprobe.h"
#include "types.h" #include "types.h"
#include "ucioption.h" #include "ucioption.h"
namespace Stockfish { namespace Stockfish {
constexpr auto StartFEN = "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1"; constexpr auto BenchmarkCommand = "speedtest";
constexpr int MaxHashMB = Is64Bit ? 33554432 : 2048;
constexpr auto StartFEN = "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1";
template<typename... Ts> template<typename... Ts>
struct overload: Ts... { struct overload: Ts... {
using Ts::operator()...; using Ts::operator()...;
@ -52,50 +52,40 @@ struct overload: Ts... {
template<typename... Ts> template<typename... Ts>
overload(Ts...) -> overload<Ts...>; overload(Ts...) -> overload<Ts...>;
void UCIEngine::print_info_string(std::string_view str) {
sync_cout_start();
for (auto& line : split(str, "\n"))
{
if (!is_whitespace(line))
{
std::cout << "info string " << line << '\n';
}
}
sync_cout_end();
}
UCIEngine::UCIEngine(int argc, char** argv) : UCIEngine::UCIEngine(int argc, char** argv) :
engine(argv[0]), engine(argv[0]),
cli(argc, argv) { cli(argc, argv) {
auto& options = engine.get_options(); engine.get_options().add_info_listener([](const std::optional<std::string>& str) {
if (str.has_value())
options["Debug Log File"] << Option("", [](const Option& o) { start_logger(o); }); print_info_string(*str);
});
options["Threads"] << Option(1, 1, 1024, [this](const Option&) { engine.resize_threads(); });
options["Hash"] << Option(16, 1, MaxHashMB, [this](const Option& o) { engine.set_tt_size(o); });
options["Clear Hash"] << Option([this](const Option&) { engine.search_clear(); });
options["Ponder"] << Option(false);
options["MultiPV"] << Option(1, 1, MAX_MOVES);
options["Skill Level"] << Option(20, 0, 20);
options["Move Overhead"] << Option(10, 0, 5000);
options["nodestime"] << Option(0, 0, 10000);
options["UCI_Chess960"] << Option(false);
options["UCI_LimitStrength"] << Option(false);
options["UCI_Elo"] << Option(1320, 1320, 3190);
options["UCI_ShowWDL"] << Option(false);
options["SyzygyPath"] << Option("<empty>", [](const Option& o) { Tablebases::init(o); });
options["SyzygyProbeDepth"] << Option(1, 1, 100);
options["Syzygy50MoveRule"] << Option(true);
options["SyzygyProbeLimit"] << Option(7, 0, 7);
options["EvalFile"] << Option(EvalFileDefaultNameBig,
[this](const Option& o) { engine.load_big_network(o); });
options["EvalFileSmall"] << Option(EvalFileDefaultNameSmall,
[this](const Option& o) { engine.load_small_network(o); });
init_search_update_listeners();
}
void UCIEngine::init_search_update_listeners() {
engine.set_on_iter([](const auto& i) { on_iter(i); }); engine.set_on_iter([](const auto& i) { on_iter(i); });
engine.set_on_update_no_moves([](const auto& i) { on_update_no_moves(i); }); engine.set_on_update_no_moves([](const auto& i) { on_update_no_moves(i); });
engine.set_on_update_full([&](const auto& i) { on_update_full(i, options["UCI_ShowWDL"]); }); engine.set_on_update_full(
[this](const auto& i) { on_update_full(i, engine.get_options()["UCI_ShowWDL"]); });
engine.set_on_bestmove([](const auto& bm, const auto& p) { on_bestmove(bm, p); }); engine.set_on_bestmove([](const auto& bm, const auto& p) { on_bestmove(bm, p); });
engine.set_on_verify_networks([](const auto& s) { print_info_string(s); });
engine.load_networks();
engine.resize_threads();
engine.search_clear(); // After threads are up
} }
void UCIEngine::loop() { void UCIEngine::loop() {
std::string token, cmd; std::string token, cmd;
for (int i = 1; i < cli.argc; ++i) for (int i = 1; i < cli.argc; ++i)
@ -123,13 +113,22 @@ void UCIEngine::loop() {
engine.set_ponderhit(false); engine.set_ponderhit(false);
else if (token == "uci") else if (token == "uci")
{
sync_cout << "id name " << engine_info(true) << "\n" sync_cout << "id name " << engine_info(true) << "\n"
<< engine.get_options() << "\nuciok" << sync_endl; << engine.get_options() << sync_endl;
sync_cout << "uciok" << sync_endl;
}
else if (token == "setoption") else if (token == "setoption")
setoption(is); setoption(is);
else if (token == "go") else if (token == "go")
{
// send info strings after the go command is sent for old GUIs and python-chess
print_info_string(engine.numa_config_information_as_string());
print_info_string(engine.thread_allocation_information_as_string());
go(is); go(is);
}
else if (token == "position") else if (token == "position")
position(is); position(is);
else if (token == "ucinewgame") else if (token == "ucinewgame")
@ -143,6 +142,8 @@ void UCIEngine::loop() {
engine.flip(); engine.flip();
else if (token == "bench") else if (token == "bench")
bench(is); bench(is);
else if (token == BenchmarkCommand)
benchmark(is);
else if (token == "d") else if (token == "d")
sync_cout << engine.visualize() << sync_endl; sync_cout << engine.visualize() << sync_endl;
else if (token == "eval") else if (token == "eval")
@ -258,7 +259,7 @@ void UCIEngine::bench(std::istream& args) {
Search::LimitsType limits = parse_limits(is); Search::LimitsType limits = parse_limits(is);
if (limits.perft) if (limits.perft)
nodes = perft(limits); nodesSearched = perft(limits);
else else
{ {
engine.go(limits); engine.go(limits);
@ -286,14 +287,178 @@ void UCIEngine::bench(std::istream& args) {
dbg_print(); dbg_print();
std::cerr << "\n===========================" std::cerr << "\n===========================" //
<< "\nTotal time (ms) : " << elapsed << "\nNodes searched : " << nodes << "\nTotal time (ms) : " << elapsed //
<< "\nNodes searched : " << nodes //
<< "\nNodes/second : " << 1000 * nodes / elapsed << std::endl; << "\nNodes/second : " << 1000 * nodes / elapsed << std::endl;
// reset callback, to not capture a dangling reference to nodesSearched // reset callback, to not capture a dangling reference to nodesSearched
engine.set_on_update_full([&](const auto& i) { on_update_full(i, options["UCI_ShowWDL"]); }); engine.set_on_update_full([&](const auto& i) { on_update_full(i, options["UCI_ShowWDL"]); });
} }
void UCIEngine::benchmark(std::istream& args) {
// Probably not very important for a test this long, but include for completeness and sanity.
static constexpr int NUM_WARMUP_POSITIONS = 3;
std::string token;
uint64_t nodes = 0, cnt = 1;
uint64_t nodesSearched = 0;
engine.set_on_update_full([&](const Engine::InfoFull& i) { nodesSearched = i.nodes; });
engine.set_on_iter([](const auto&) {});
engine.set_on_update_no_moves([](const auto&) {});
engine.set_on_bestmove([](const auto&, const auto&) {});
engine.set_on_verify_networks([](const auto&) {});
Benchmark::BenchmarkSetup setup = Benchmark::setup_benchmark(args);
const int numGoCommands = count_if(setup.commands.begin(), setup.commands.end(),
[](const std::string& s) { return s.find("go ") == 0; });
TimePoint totalTime = 0;
// Set options once at the start.
auto ss = std::istringstream("name Threads value " + std::to_string(setup.threads));
setoption(ss);
ss = std::istringstream("name Hash value " + std::to_string(setup.ttSize));
setoption(ss);
ss = std::istringstream("name UCI_Chess960 value false");
setoption(ss);
// Warmup
for (const auto& cmd : setup.commands)
{
std::istringstream is(cmd);
is >> std::skipws >> token;
if (token == "go")
{
// One new line is produced by the search, so omit it here
std::cerr << "\rWarmup position " << cnt++ << '/' << NUM_WARMUP_POSITIONS;
Search::LimitsType limits = parse_limits(is);
TimePoint elapsed = now();
// Run with silenced network verification
engine.go(limits);
engine.wait_for_search_finished();
totalTime += now() - elapsed;
nodes += nodesSearched;
nodesSearched = 0;
}
else if (token == "position")
position(is);
else if (token == "ucinewgame")
{
engine.search_clear(); // search_clear may take a while
}
if (cnt > NUM_WARMUP_POSITIONS)
break;
}
std::cerr << "\n";
cnt = 1;
nodes = 0;
int numHashfullReadings = 0;
constexpr int hashfullAges[] = {0, 999}; // Only normal hashfull and touched hash.
int totalHashfull[std::size(hashfullAges)] = {0};
int maxHashfull[std::size(hashfullAges)] = {0};
auto updateHashfullReadings = [&]() {
numHashfullReadings += 1;
for (int i = 0; i < static_cast<int>(std::size(hashfullAges)); ++i)
{
const int hashfull = engine.get_hashfull(hashfullAges[i]);
maxHashfull[i] = std::max(maxHashfull[i], hashfull);
totalHashfull[i] += hashfull;
}
};
engine.search_clear(); // search_clear may take a while
for (const auto& cmd : setup.commands)
{
std::istringstream is(cmd);
is >> std::skipws >> token;
if (token == "go")
{
// One new line is produced by the search, so omit it here
std::cerr << "\rPosition " << cnt++ << '/' << numGoCommands;
Search::LimitsType limits = parse_limits(is);
TimePoint elapsed = now();
// Run with silenced network verification
engine.go(limits);
engine.wait_for_search_finished();
totalTime += now() - elapsed;
updateHashfullReadings();
nodes += nodesSearched;
nodesSearched = 0;
}
else if (token == "position")
position(is);
else if (token == "ucinewgame")
{
engine.search_clear(); // search_clear may take a while
}
}
totalTime = std::max<TimePoint>(totalTime, 1); // Ensure positivity to avoid a 'divide by zero'
dbg_print();
std::cerr << "\n";
static_assert(
std::size(hashfullAges) == 2 && hashfullAges[0] == 0 && hashfullAges[1] == 999,
"Hardcoded for display. Would complicate the code needlessly in the current state.");
std::string threadBinding = engine.thread_binding_information_as_string();
if (threadBinding.empty())
threadBinding = "none";
// clang-format off
std::cerr << "==========================="
<< "\nVersion : "
<< engine_version_info()
// "\nCompiled by : "
<< compiler_info()
<< "Large pages : " << (has_large_pages() ? "yes" : "no")
<< "\nUser invocation : " << BenchmarkCommand << " "
<< setup.originalInvocation << "\nFilled invocation : " << BenchmarkCommand
<< " " << setup.filledInvocation
<< "\nAvailable processors : " << engine.get_numa_config_as_string()
<< "\nThread count : " << setup.threads
<< "\nThread binding : " << threadBinding
<< "\nTT size [MiB] : " << setup.ttSize
<< "\nHash max, avg [per mille] : "
<< "\n single search : " << maxHashfull[0] << ", "
<< totalHashfull[0] / numHashfullReadings
<< "\n single game : " << maxHashfull[1] << ", "
<< totalHashfull[1] / numHashfullReadings
<< "\nTotal nodes searched : " << nodes
<< "\nTotal search time [s] : " << totalTime / 1000.0
<< "\nNodes/second : " << 1000 * nodes / totalTime << std::endl;
// clang-format on
init_search_update_listeners();
}
void UCIEngine::setoption(std::istringstream& is) { void UCIEngine::setoption(std::istringstream& is) {
engine.wait_for_search_finished(); engine.wait_for_search_finished();
@ -344,12 +509,12 @@ WinRateParams win_rate_params(const Position& pos) {
int material = pos.count<PAWN>() + 3 * pos.count<KNIGHT>() + 3 * pos.count<BISHOP>() int material = pos.count<PAWN>() + 3 * pos.count<KNIGHT>() + 3 * pos.count<BISHOP>()
+ 5 * pos.count<ROOK>() + 9 * pos.count<QUEEN>(); + 5 * pos.count<ROOK>() + 9 * pos.count<QUEEN>();
// The fitted model only uses data for material counts in [10, 78], and is anchored at count 58. // The fitted model only uses data for material counts in [17, 78], and is anchored at count 58.
double m = std::clamp(material, 10, 78) / 58.0; double m = std::clamp(material, 17, 78) / 58.0;
// Return a = p_a(material) and b = p_b(material), see github.com/official-stockfish/WDL_model // Return a = p_a(material) and b = p_b(material), see github.com/official-stockfish/WDL_model
constexpr double as[] = {-150.77043883, 394.96159472, -321.73403766, 406.15850091}; constexpr double as[] = {-37.45051876, 121.19101539, -132.78783573, 420.70576692};
constexpr double bs[] = {62.33245393, -91.02264855, 45.88486850, 51.63461272}; constexpr double bs[] = {90.26261072, -137.26549898, 71.10130540, 51.35259597};
double a = (((as[0] * m + as[1]) * m + as[2]) * m) + as[3]; double a = (((as[0] * m + as[1]) * m + as[2]) * m) + as[3];
double b = (((bs[0] * m + bs[1]) * m + bs[2]) * m) + bs[3]; double b = (((bs[0] * m + bs[1]) * m + bs[2]) * m) + bs[3];
@ -390,8 +555,8 @@ std::string UCIEngine::format_score(const Score& s) {
// without treatment of mate and similar special scores. // without treatment of mate and similar special scores.
int UCIEngine::to_cp(Value v, const Position& pos) { int UCIEngine::to_cp(Value v, const Position& pos) {
// In general, the score can be defined via the the WDL as // In general, the score can be defined via the WDL as
// (log(1/L - 1) - log(1/W - 1)) / ((log(1/L - 1) + log(1/W - 1)) // (log(1/L - 1) - log(1/W - 1)) / (log(1/L - 1) + log(1/W - 1)).
// Based on our win_rate_model, this simply yields v / a. // Based on our win_rate_model, this simply yields v / a.
auto [a, b] = win_rate_params(pos); auto [a, b] = win_rate_params(pos);

View file

@ -19,10 +19,10 @@
#ifndef UCI_H_INCLUDED #ifndef UCI_H_INCLUDED
#define UCI_H_INCLUDED #define UCI_H_INCLUDED
#include <cstdint>
#include <iostream> #include <iostream>
#include <string> #include <string>
#include <string_view> #include <string_view>
#include <cstdint>
#include "engine.h" #include "engine.h"
#include "misc.h" #include "misc.h"
@ -58,8 +58,11 @@ class UCIEngine {
Engine engine; Engine engine;
CommandLine cli; CommandLine cli;
static void print_info_string(std::string_view str);
void go(std::istringstream& is); void go(std::istringstream& is);
void bench(std::istream& args); void bench(std::istream& args);
void benchmark(std::istream& args);
void position(std::istringstream& is); void position(std::istringstream& is);
void setoption(std::istringstream& is); void setoption(std::istringstream& is);
std::uint64_t perft(const Search::LimitsType&); std::uint64_t perft(const Search::LimitsType&);
@ -68,6 +71,8 @@ class UCIEngine {
static void on_update_full(const Engine::InfoFull& info, bool showWDL); static void on_update_full(const Engine::InfoFull& info, bool showWDL);
static void on_iter(const Engine::InfoIter& info); static void on_iter(const Engine::InfoIter& info);
static void on_bestmove(std::string_view bestmove, std::string_view ponder); static void on_bestmove(std::string_view bestmove, std::string_view ponder);
void init_search_update_listeners();
}; };
} // namespace Stockfish } // namespace Stockfish

View file

@ -36,6 +36,8 @@ bool CaseInsensitiveLess::operator()(const std::string& s1, const std::string& s
[](char c1, char c2) { return std::tolower(c1) < std::tolower(c2); }); [](char c1, char c2) { return std::tolower(c1) < std::tolower(c2); });
} }
void OptionsMap::add_info_listener(InfoListener&& message_func) { info = std::move(message_func); }
void OptionsMap::setoption(std::istringstream& is) { void OptionsMap::setoption(std::istringstream& is) {
std::string token, name, value; std::string token, name, value;
@ -57,13 +59,20 @@ void OptionsMap::setoption(std::istringstream& is) {
Option OptionsMap::operator[](const std::string& name) const { Option OptionsMap::operator[](const std::string& name) const {
auto it = options_map.find(name); auto it = options_map.find(name);
return it != options_map.end() ? it->second : Option(); return it != options_map.end() ? it->second : Option(this);
} }
Option& OptionsMap::operator[](const std::string& name) { return options_map[name]; } Option& OptionsMap::operator[](const std::string& name) {
if (!options_map.count(name))
options_map[name] = Option(this);
return options_map[name];
}
std::size_t OptionsMap::count(const std::string& name) const { return options_map.count(name); } std::size_t OptionsMap::count(const std::string& name) const { return options_map.count(name); }
Option::Option(const OptionsMap* map) :
parent(map) {}
Option::Option(const char* v, OnChange f) : Option::Option(const char* v, OnChange f) :
type("string"), type("string"),
min(0), min(0),
@ -118,6 +127,8 @@ bool Option::operator==(const char* s) const {
return !CaseInsensitiveLess()(currentValue, s) && !CaseInsensitiveLess()(s, currentValue); return !CaseInsensitiveLess()(currentValue, s) && !CaseInsensitiveLess()(s, currentValue);
} }
bool Option::operator!=(const char* s) const { return !(*this == s); }
// Inits options and assigns idx in the correct printing order // Inits options and assigns idx in the correct printing order
@ -125,10 +136,12 @@ void Option::operator<<(const Option& o) {
static size_t insert_order = 0; static size_t insert_order = 0;
*this = o; auto p = this->parent;
idx = insert_order++; *this = o;
}
this->parent = p;
idx = insert_order++;
}
// Updates currentValue and triggers on_change() action. It's up to // Updates currentValue and triggers on_change() action. It's up to
// the GUI to check for option's limits, but we could receive the new value // the GUI to check for option's limits, but we could receive the new value
@ -153,11 +166,18 @@ Option& Option::operator=(const std::string& v) {
return *this; return *this;
} }
if (type != "button") if (type == "string")
currentValue = v == "<empty>" ? "" : v;
else if (type != "button")
currentValue = v; currentValue = v;
if (on_change) if (on_change)
on_change(*this); {
const auto ret = on_change(*this);
if (ret && parent != nullptr && parent->info != nullptr)
parent->info(ret);
}
return *this; return *this;
} }
@ -170,10 +190,16 @@ std::ostream& operator<<(std::ostream& os, const OptionsMap& om) {
const Option& o = it.second; const Option& o = it.second;
os << "\noption name " << it.first << " type " << o.type; os << "\noption name " << it.first << " type " << o.type;
if (o.type == "string" || o.type == "check" || o.type == "combo") if (o.type == "check" || o.type == "combo")
os << " default " << o.defaultValue; os << " default " << o.defaultValue;
if (o.type == "spin") else if (o.type == "string")
{
std::string defaultValue = o.defaultValue.empty() ? "<empty>" : o.defaultValue;
os << " default " << defaultValue;
}
else if (o.type == "spin")
os << " default " << int(stof(o.defaultValue)) << " min " << o.min << " max " os << " default " << int(stof(o.defaultValue)) << " min " << o.min << " max "
<< o.max; << o.max;

View file

@ -23,6 +23,7 @@
#include <functional> #include <functional>
#include <iosfwd> #include <iosfwd>
#include <map> #include <map>
#include <optional>
#include <string> #include <string>
namespace Stockfish { namespace Stockfish {
@ -31,31 +32,14 @@ struct CaseInsensitiveLess {
bool operator()(const std::string&, const std::string&) const; bool operator()(const std::string&, const std::string&) const;
}; };
class Option; class OptionsMap;
class OptionsMap {
public:
void setoption(std::istringstream&);
friend std::ostream& operator<<(std::ostream&, const OptionsMap&);
Option operator[](const std::string&) const;
Option& operator[](const std::string&);
std::size_t count(const std::string&) const;
private:
// The options container is defined as a std::map
using OptionsStore = std::map<std::string, Option, CaseInsensitiveLess>;
OptionsStore options_map;
};
// The Option class implements each option as specified by the UCI protocol // The Option class implements each option as specified by the UCI protocol
class Option { class Option {
public: public:
using OnChange = std::function<void(const Option&)>; using OnChange = std::function<std::optional<std::string>(const Option&)>;
Option(const OptionsMap*);
Option(OnChange = nullptr); Option(OnChange = nullptr);
Option(bool v, OnChange = nullptr); Option(bool v, OnChange = nullptr);
Option(const char* v, OnChange = nullptr); Option(const char* v, OnChange = nullptr);
@ -63,18 +47,57 @@ class Option {
Option(const char* v, const char* cur, OnChange = nullptr); Option(const char* v, const char* cur, OnChange = nullptr);
Option& operator=(const std::string&); Option& operator=(const std::string&);
void operator<<(const Option&);
operator int() const; operator int() const;
operator std::string() const; operator std::string() const;
bool operator==(const char*) const; bool operator==(const char*) const;
bool operator!=(const char*) const;
friend std::ostream& operator<<(std::ostream&, const OptionsMap&); friend std::ostream& operator<<(std::ostream&, const OptionsMap&);
private: private:
std::string defaultValue, currentValue, type; friend class OptionsMap;
int min, max; friend class Engine;
size_t idx; friend class Tune;
OnChange on_change;
void operator<<(const Option&);
std::string defaultValue, currentValue, type;
int min, max;
size_t idx;
OnChange on_change;
const OptionsMap* parent = nullptr;
};
class OptionsMap {
public:
using InfoListener = std::function<void(std::optional<std::string>)>;
OptionsMap() = default;
OptionsMap(const OptionsMap&) = delete;
OptionsMap(OptionsMap&&) = delete;
OptionsMap& operator=(const OptionsMap&) = delete;
OptionsMap& operator=(OptionsMap&&) = delete;
void add_info_listener(InfoListener&&);
void setoption(std::istringstream&);
Option operator[](const std::string&) const;
Option& operator[](const std::string&);
std::size_t count(const std::string&) const;
private:
friend class Engine;
friend class Option;
friend std::ostream& operator<<(std::ostream&, const OptionsMap&);
// The options container is defined as a std::map
using OptionsStore = std::map<std::string, Option, CaseInsensitiveLess>;
OptionsStore options_map;
InfoListener info;
}; };
} }

520
tests/instrumented.py Normal file
View file

@ -0,0 +1,520 @@
import argparse
import re
import sys
import subprocess
import pathlib
import os
from testing import (
EPD,
TSAN,
Stockfish as Engine,
MiniTestFramework,
OrderedClassMembers,
Valgrind,
Syzygy,
)
PATH = pathlib.Path(__file__).parent.resolve()
CWD = os.getcwd()
def get_prefix():
if args.valgrind:
return Valgrind.get_valgrind_command()
if args.valgrind_thread:
return Valgrind.get_valgrind_thread_command()
return []
def get_threads():
if args.valgrind_thread or args.sanitizer_thread:
return 2
return 1
def get_path():
return os.path.abspath(os.path.join(CWD, args.stockfish_path))
def postfix_check(output):
if args.sanitizer_undefined:
for idx, line in enumerate(output):
if "runtime error:" in line:
# print next possible 50 lines
for i in range(50):
debug_idx = idx + i
if debug_idx < len(output):
print(output[debug_idx])
return False
if args.sanitizer_thread:
for idx, line in enumerate(output):
if "WARNING: ThreadSanitizer:" in line:
# print next possible 50 lines
for i in range(50):
debug_idx = idx + i
if debug_idx < len(output):
print(output[debug_idx])
return False
return True
def Stockfish(*args, **kwargs):
return Engine(get_prefix(), get_path(), *args, **kwargs)
class TestCLI(metaclass=OrderedClassMembers):
def beforeAll(self):
pass
def afterAll(self):
pass
def beforeEach(self):
self.stockfish = None
def afterEach(self):
assert postfix_check(self.stockfish.get_output()) == True
self.stockfish.clear_output()
def test_eval(self):
self.stockfish = Stockfish("eval".split(" "), True)
assert self.stockfish.process.returncode == 0
def test_go_nodes_1000(self):
self.stockfish = Stockfish("go nodes 1000".split(" "), True)
assert self.stockfish.process.returncode == 0
def test_go_depth_10(self):
self.stockfish = Stockfish("go depth 10".split(" "), True)
assert self.stockfish.process.returncode == 0
def test_go_perft_4(self):
self.stockfish = Stockfish("go perft 4".split(" "), True)
assert self.stockfish.process.returncode == 0
def test_go_movetime_1000(self):
self.stockfish = Stockfish("go movetime 1000".split(" "), True)
assert self.stockfish.process.returncode == 0
def test_go_wtime_8000_btime_8000_winc_500_binc_500(self):
self.stockfish = Stockfish(
"go wtime 8000 btime 8000 winc 500 binc 500".split(" "),
True,
)
assert self.stockfish.process.returncode == 0
def test_go_wtime_1000_btime_1000_winc_0_binc_0(self):
self.stockfish = Stockfish(
"go wtime 1000 btime 1000 winc 0 binc 0".split(" "),
True,
)
assert self.stockfish.process.returncode == 0
def test_go_wtime_1000_btime_1000_winc_0_binc_0_movestogo_5(self):
self.stockfish = Stockfish(
"go wtime 1000 btime 1000 winc 0 binc 0 movestogo 5".split(" "),
True,
)
assert self.stockfish.process.returncode == 0
def test_go_movetime_200(self):
self.stockfish = Stockfish("go movetime 200".split(" "), True)
assert self.stockfish.process.returncode == 0
def test_go_nodes_20000_searchmoves_e2e4_d2d4(self):
self.stockfish = Stockfish(
"go nodes 20000 searchmoves e2e4 d2d4".split(" "), True
)
assert self.stockfish.process.returncode == 0
def test_bench_128_threads_8_default_depth(self):
self.stockfish = Stockfish(
f"bench 128 {get_threads()} 8 default depth".split(" "),
True,
)
assert self.stockfish.process.returncode == 0
def test_bench_128_threads_3_bench_tmp_epd_depth(self):
self.stockfish = Stockfish(
f"bench 128 {get_threads()} 3 {os.path.join(PATH,'bench_tmp.epd')} depth".split(
" "
),
True,
)
assert self.stockfish.process.returncode == 0
def test_d(self):
self.stockfish = Stockfish("d".split(" "), True)
assert self.stockfish.process.returncode == 0
def test_compiler(self):
self.stockfish = Stockfish("compiler".split(" "), True)
assert self.stockfish.process.returncode == 0
def test_license(self):
self.stockfish = Stockfish("license".split(" "), True)
assert self.stockfish.process.returncode == 0
def test_uci(self):
self.stockfish = Stockfish("uci".split(" "), True)
assert self.stockfish.process.returncode == 0
def test_export_net_verify_nnue(self):
current_path = os.path.abspath(os.getcwd())
self.stockfish = Stockfish(
f"export_net {os.path.join(current_path , 'verify.nnue')}".split(" "), True
)
assert self.stockfish.process.returncode == 0
# verify the generated net equals the base net
def test_network_equals_base(self):
self.stockfish = Stockfish(
["uci"],
True,
)
output = self.stockfish.process.stdout
# find line
for line in output.split("\n"):
if "option name EvalFile type string default" in line:
network = line.split(" ")[-1]
break
# find network file in src dir
network = os.path.join(PATH.parent.resolve(), "src", network)
if not os.path.exists(network):
print(
f"Network file {network} not found, please download the network file over the make command."
)
assert False
diff = subprocess.run(["diff", network, f"verify.nnue"])
assert diff.returncode == 0
class TestInteractive(metaclass=OrderedClassMembers):
def beforeAll(self):
self.stockfish = Stockfish()
def afterAll(self):
self.stockfish.quit()
assert self.stockfish.close() == 0
def afterEach(self):
assert postfix_check(self.stockfish.get_output()) == True
self.stockfish.clear_output()
def test_startup_output(self):
self.stockfish.starts_with("Stockfish")
def test_uci_command(self):
self.stockfish.send_command("uci")
self.stockfish.equals("uciok")
def test_set_threads_option(self):
self.stockfish.send_command(f"setoption name Threads value {get_threads()}")
def test_ucinewgame_and_startpos_nodes_1000(self):
self.stockfish.send_command("ucinewgame")
self.stockfish.send_command("position startpos")
self.stockfish.send_command("go nodes 1000")
self.stockfish.starts_with("bestmove")
def test_ucinewgame_and_startpos_moves(self):
self.stockfish.send_command("ucinewgame")
self.stockfish.send_command("position startpos moves e2e4 e7e6")
self.stockfish.send_command("go nodes 1000")
self.stockfish.starts_with("bestmove")
def test_fen_position_1(self):
self.stockfish.send_command("ucinewgame")
self.stockfish.send_command("position fen 5rk1/1K4p1/8/8/3B4/8/8/8 b - - 0 1")
self.stockfish.send_command("go nodes 1000")
self.stockfish.starts_with("bestmove")
def test_fen_position_2_flip(self):
self.stockfish.send_command("ucinewgame")
self.stockfish.send_command("position fen 5rk1/1K4p1/8/8/3B4/8/8/8 b - - 0 1")
self.stockfish.send_command("flip")
self.stockfish.send_command("go nodes 1000")
self.stockfish.starts_with("bestmove")
def test_depth_5_with_callback(self):
self.stockfish.send_command("ucinewgame")
self.stockfish.send_command("position startpos")
self.stockfish.send_command("go depth 5")
def callback(output):
regex = r"info depth \d+ seldepth \d+ multipv \d+ score cp \d+ nodes \d+ nps \d+ hashfull \d+ tbhits \d+ time \d+ pv"
if output.startswith("info depth") and not re.match(regex, output):
assert False
if output.startswith("bestmove"):
return True
return False
self.stockfish.check_output(callback)
def test_ucinewgame_and_go_depth_9(self):
self.stockfish.send_command("ucinewgame")
self.stockfish.send_command("setoption name UCI_ShowWDL value true")
self.stockfish.send_command("position startpos")
self.stockfish.send_command("go depth 9")
depth = 1
def callback(output):
nonlocal depth
regex = rf"info depth {depth} seldepth \d+ multipv \d+ score cp \d+ wdl \d+ \d+ \d+ nodes \d+ nps \d+ hashfull \d+ tbhits \d+ time \d+ pv"
if output.startswith("info depth"):
if not re.match(regex, output):
assert False
depth += 1
if output.startswith("bestmove"):
assert depth == 10
return True
return False
self.stockfish.check_output(callback)
def test_clear_hash(self):
self.stockfish.send_command("setoption name Clear Hash")
def test_fen_position_mate_1(self):
self.stockfish.send_command("ucinewgame")
self.stockfish.send_command(
"position fen 5K2/8/2qk4/2nPp3/3r4/6B1/B7/3R4 w - e6"
)
self.stockfish.send_command("go depth 18")
self.stockfish.expect("* score mate 1 * pv d5e6")
self.stockfish.equals("bestmove d5e6")
def test_fen_position_mate_minus_1(self):
self.stockfish.send_command("ucinewgame")
self.stockfish.send_command(
"position fen 2brrb2/8/p7/Q7/1p1kpPp1/1P1pN1K1/3P4/8 b - -"
)
self.stockfish.send_command("go depth 18")
self.stockfish.expect("* score mate -1 *")
self.stockfish.starts_with("bestmove")
def test_fen_position_fixed_node(self):
self.stockfish.send_command("ucinewgame")
self.stockfish.send_command(
"position fen 5K2/8/2P1P1Pk/6pP/3p2P1/1P6/3P4/8 w - - 0 1"
)
self.stockfish.send_command("go nodes 500000")
self.stockfish.starts_with("bestmove")
def test_fen_position_with_mate_go_depth(self):
self.stockfish.send_command("ucinewgame")
self.stockfish.send_command(
"position fen 8/5R2/2K1P3/4k3/8/b1PPpp1B/5p2/8 w - -"
)
self.stockfish.send_command("go depth 18 searchmoves c6d7")
self.stockfish.expect("* score mate 2 * pv c6d7 * f7f5")
self.stockfish.starts_with("bestmove")
def test_fen_position_with_mate_go_mate(self):
self.stockfish.send_command("ucinewgame")
self.stockfish.send_command(
"position fen 8/5R2/2K1P3/4k3/8/b1PPpp1B/5p2/8 w - -"
)
self.stockfish.send_command("go mate 2 searchmoves c6d7")
self.stockfish.expect("* score mate 2 * pv c6d7 *")
self.stockfish.starts_with("bestmove")
def test_fen_position_with_mate_go_nodes(self):
self.stockfish.send_command("ucinewgame")
self.stockfish.send_command(
"position fen 8/5R2/2K1P3/4k3/8/b1PPpp1B/5p2/8 w - -"
)
self.stockfish.send_command("go nodes 500000 searchmoves c6d7")
self.stockfish.expect("* score mate 2 * pv c6d7 * f7f5")
self.stockfish.starts_with("bestmove")
def test_fen_position_depth_27(self):
self.stockfish.send_command("ucinewgame")
self.stockfish.send_command(
"position fen r1b2r1k/pp1p2pp/2p5/2B1q3/8/8/P1PN2PP/R4RK1 w - - 0 18"
)
self.stockfish.send_command("go")
self.stockfish.contains("score mate 1")
self.stockfish.starts_with("bestmove")
def test_fen_position_with_mate_go_depth_and_promotion(self):
self.stockfish.send_command("ucinewgame")
self.stockfish.send_command(
"position fen 8/5R2/2K1P3/4k3/8/b1PPpp1B/5p2/8 w - - moves c6d7 f2f1q"
)
self.stockfish.send_command("go depth 18")
self.stockfish.expect("* score mate 1 * pv f7f5")
self.stockfish.starts_with("bestmove f7f5")
def test_fen_position_with_mate_go_depth_and_searchmoves(self):
self.stockfish.send_command("ucinewgame")
self.stockfish.send_command(
"position fen 8/5R2/2K1P3/4k3/8/b1PPpp1B/5p2/8 w - -"
)
self.stockfish.send_command("go depth 18 searchmoves c6d7")
self.stockfish.expect("* score mate 2 * pv c6d7 * f7f5")
self.stockfish.starts_with("bestmove c6d7")
def test_fen_position_with_moves_with_mate_go_depth_and_searchmoves(self):
self.stockfish.send_command("ucinewgame")
self.stockfish.send_command(
"position fen 8/5R2/2K1P3/4k3/8/b1PPpp1B/5p2/8 w - - moves c6d7"
)
self.stockfish.send_command("go depth 18 searchmoves e3e2")
self.stockfish.expect("* score mate -1 * pv e3e2 f7f5")
self.stockfish.starts_with("bestmove e3e2")
def test_verify_nnue_network(self):
current_path = os.path.abspath(os.getcwd())
Stockfish(
f"export_net {os.path.join(current_path , 'verify.nnue')}".split(" "), True
)
self.stockfish.send_command("setoption name EvalFile value verify.nnue")
self.stockfish.send_command("position startpos")
self.stockfish.send_command("go depth 5")
self.stockfish.starts_with("bestmove")
def test_multipv_setting(self):
self.stockfish.send_command("setoption name MultiPV value 4")
self.stockfish.send_command("position startpos")
self.stockfish.send_command("go depth 5")
self.stockfish.starts_with("bestmove")
def test_fen_position_with_skill_level(self):
self.stockfish.send_command("setoption name Skill Level value 10")
self.stockfish.send_command("position startpos")
self.stockfish.send_command("go depth 5")
self.stockfish.starts_with("bestmove")
self.stockfish.send_command("setoption name Skill Level value 20")
class TestSyzygy(metaclass=OrderedClassMembers):
def beforeAll(self):
self.stockfish = Stockfish()
def afterAll(self):
self.stockfish.quit()
assert self.stockfish.close() == 0
def afterEach(self):
assert postfix_check(self.stockfish.get_output()) == True
self.stockfish.clear_output()
def test_syzygy_setup(self):
self.stockfish.starts_with("Stockfish")
self.stockfish.send_command("uci")
self.stockfish.send_command(
f"setoption name SyzygyPath value {os.path.join(PATH, 'syzygy')}"
)
self.stockfish.expect(
"info string Found 35 WDL and 35 DTZ tablebase files (up to 4-man)."
)
def test_syzygy_bench(self):
self.stockfish.send_command("bench 128 1 8 default depth")
self.stockfish.expect("Nodes searched :*")
def test_syzygy_position(self):
self.stockfish.send_command("ucinewgame")
self.stockfish.send_command("position fen 4k3/PP6/8/8/8/8/8/4K3 w - - 0 1")
self.stockfish.send_command("go depth 5")
def check_output(output):
if "score cp 20000" in output or "score mate" in output:
return True
self.stockfish.check_output(check_output)
self.stockfish.expect("bestmove *")
def test_syzygy_position_2(self):
self.stockfish.send_command("ucinewgame")
self.stockfish.send_command("position fen 8/1P6/2B5/8/4K3/8/6k1/8 w - - 0 1")
self.stockfish.send_command("go depth 5")
def check_output(output):
if "score cp 20000" in output or "score mate" in output:
return True
self.stockfish.check_output(check_output)
self.stockfish.expect("bestmove *")
def test_syzygy_position_3(self):
self.stockfish.send_command("ucinewgame")
self.stockfish.send_command("position fen 8/1P6/2B5/8/4K3/8/6k1/8 b - - 0 1")
self.stockfish.send_command("go depth 5")
def check_output(output):
if "score cp -20000" in output or "score mate" in output:
return True
self.stockfish.check_output(check_output)
self.stockfish.expect("bestmove *")
def parse_args():
parser = argparse.ArgumentParser(description="Run Stockfish with testing options")
parser.add_argument("--valgrind", action="store_true", help="Run valgrind testing")
parser.add_argument(
"--valgrind-thread", action="store_true", help="Run valgrind-thread testing"
)
parser.add_argument(
"--sanitizer-undefined",
action="store_true",
help="Run sanitizer-undefined testing",
)
parser.add_argument(
"--sanitizer-thread", action="store_true", help="Run sanitizer-thread testing"
)
parser.add_argument(
"--none", action="store_true", help="Run without any testing options"
)
parser.add_argument("stockfish_path", type=str, help="Path to Stockfish binary")
return parser.parse_args()
if __name__ == "__main__":
args = parse_args()
EPD.create_bench_epd()
TSAN.set_tsan_option()
Syzygy.download_syzygy()
framework = MiniTestFramework()
# Each test suite will be ran inside a temporary directory
framework.run([TestCLI, TestInteractive, TestSyzygy])
EPD.delete_bench_epd()
TSAN.unset_tsan_option()
if framework.has_failed():
sys.exit(1)
sys.exit(0)

View file

@ -1,301 +0,0 @@
#!/bin/bash
# check for errors under Valgrind or sanitizers.
error()
{
echo "instrumented testing failed on line $1"
exit 1
}
trap 'error ${LINENO}' ERR
# define suitable post and prefixes for testing options
case $1 in
--valgrind)
echo "valgrind testing started"
prefix=''
exeprefix='valgrind --error-exitcode=42 --errors-for-leak-kinds=all --leak-check=full'
postfix=''
threads="1"
;;
--valgrind-thread)
echo "valgrind-thread testing started"
prefix=''
exeprefix='valgrind --fair-sched=try --error-exitcode=42'
postfix=''
threads="2"
;;
--sanitizer-undefined)
echo "sanitizer-undefined testing started"
prefix='!'
exeprefix=''
postfix='2>&1 | grep -A50 "runtime error:"'
threads="1"
;;
--sanitizer-thread)
echo "sanitizer-thread testing started"
prefix='!'
exeprefix=''
postfix='2>&1 | grep -A50 "WARNING: ThreadSanitizer:"'
threads="2"
cat << EOF > tsan.supp
race:Stockfish::TTEntry::move
race:Stockfish::TTEntry::depth
race:Stockfish::TTEntry::bound
race:Stockfish::TTEntry::save
race:Stockfish::TTEntry::value
race:Stockfish::TTEntry::eval
race:Stockfish::TTEntry::is_pv
race:Stockfish::TranspositionTable::probe
race:Stockfish::TranspositionTable::hashfull
EOF
export TSAN_OPTIONS="suppressions=./tsan.supp"
;;
*)
echo "unknown testing started"
prefix=''
exeprefix=''
postfix=''
threads="1"
;;
esac
cat << EOF > bench_tmp.epd
Rn6/1rbq1bk1/2p2n1p/2Bp1p2/3Pp1pP/1N2P1P1/2Q1NPB1/6K1 w - - 2 26
rnbqkb1r/ppp1pp2/5n1p/3p2p1/P2PP3/5P2/1PP3PP/RNBQKBNR w KQkq - 0 3
3qnrk1/4bp1p/1p2p1pP/p2bN3/1P1P1B2/P2BQ3/5PP1/4R1K1 w - - 9 28
r4rk1/1b2ppbp/pq4pn/2pp1PB1/1p2P3/1P1P1NN1/1PP3PP/R2Q1RK1 w - - 0 13
EOF
# simple command line testing
for args in "eval" \
"go nodes 1000" \
"go depth 10" \
"go perft 4" \
"go movetime 1000" \
"go wtime 8000 btime 8000 winc 500 binc 500" \
"go wtime 1000 btime 1000 winc 0 binc 0" \
"go wtime 1000 btime 1000 winc 0 binc 0" \
"go wtime 1000 btime 1000 winc 0 binc 0 movestogo 5" \
"go movetime 200" \
"go nodes 20000 searchmoves e2e4 d2d4" \
"bench 128 $threads 8 default depth" \
"bench 128 $threads 3 bench_tmp.epd depth" \
"export_net verify.nnue" \
"d" \
"compiler" \
"license" \
"uci"
do
echo "$prefix $exeprefix ./stockfish $args $postfix"
eval "$prefix $exeprefix ./stockfish $args $postfix"
done
# verify the generated net equals the base net
network=`./stockfish uci | grep 'option name EvalFile type string default' | awk '{print $NF}'`
echo "Comparing $network to the written verify.nnue"
diff $network verify.nnue
# more general testing, following an uci protocol exchange
cat << EOF > game.exp
set timeout 240
# to correctly catch eof we need the following line
# expect_before timeout { exit 2 } eof { exit 3 }
expect_before timeout { exit 2 }
spawn $exeprefix ./stockfish
expect "Stockfish"
send "uci\n"
expect "uciok"
# send "setoption name Debug Log File value debug.log\n"
send "setoption name Threads value $threads\n"
send "ucinewgame\n"
send "position startpos\n"
send "go nodes 1000\n"
expect "bestmove"
send "ucinewgame\n"
send "position startpos moves e2e4 e7e6\n"
send "go nodes 1000\n"
expect "bestmove"
send "ucinewgame\n"
send "position fen 5rk1/1K4p1/8/8/3B4/8/8/8 b - - 0 1\n"
send "go depth 10\n"
expect "bestmove"
send "ucinewgame\n"
send "position fen 5rk1/1K4p1/8/8/3B4/8/8/8 b - - 0 1\n"
send "flip\n"
send "go depth 10\n"
expect "bestmove"
send "ucinewgame\n"
send "position startpos\n"
send "go depth 5\n"
expect -re {info depth \d+ seldepth \d+ multipv \d+ score cp \d+ nodes \d+ nps \d+ hashfull \d+ tbhits \d+ time \d+ pv}
expect "bestmove"
send "ucinewgame\n"
send "setoption name UCI_ShowWDL value true\n"
send "position startpos\n"
send "go depth 9\n"
expect -re {info depth 1 seldepth \d+ multipv \d+ score cp \d+ wdl \d+ \d+ \d+ nodes \d+ nps \d+ hashfull \d+ tbhits \d+ time \d+ pv}
expect -re {info depth 2 seldepth \d+ multipv \d+ score cp \d+ wdl \d+ \d+ \d+ nodes \d+ nps \d+ hashfull \d+ tbhits \d+ time \d+ pv}
expect -re {info depth 3 seldepth \d+ multipv \d+ score cp \d+ wdl \d+ \d+ \d+ nodes \d+ nps \d+ hashfull \d+ tbhits \d+ time \d+ pv}
expect -re {info depth 4 seldepth \d+ multipv \d+ score cp \d+ wdl \d+ \d+ \d+ nodes \d+ nps \d+ hashfull \d+ tbhits \d+ time \d+ pv}
expect -re {info depth 5 seldepth \d+ multipv \d+ score cp \d+ wdl \d+ \d+ \d+ nodes \d+ nps \d+ hashfull \d+ tbhits \d+ time \d+ pv}
expect -re {info depth 6 seldepth \d+ multipv \d+ score cp \d+ wdl \d+ \d+ \d+ nodes \d+ nps \d+ hashfull \d+ tbhits \d+ time \d+ pv}
expect -re {info depth 7 seldepth \d+ multipv \d+ score cp \d+ wdl \d+ \d+ \d+ nodes \d+ nps \d+ hashfull \d+ tbhits \d+ time \d+ pv}
expect -re {info depth 8 seldepth \d+ multipv \d+ score cp \d+ wdl \d+ \d+ \d+ nodes \d+ nps \d+ hashfull \d+ tbhits \d+ time \d+ pv}
expect -re {info depth 9 seldepth \d+ multipv \d+ score cp \d+ wdl \d+ \d+ \d+ nodes \d+ nps \d+ hashfull \d+ tbhits \d+ time \d+ pv}
expect "bestmove"
send "setoption name Clear Hash\n"
send "ucinewgame\n"
send "position fen 5K2/8/2qk4/2nPp3/3r4/6B1/B7/3R4 w - e6\n"
send "go depth 18\n"
expect "score mate 1"
expect "pv d5e6"
expect "bestmove d5e6"
send "ucinewgame\n"
send "position fen 2brrb2/8/p7/Q7/1p1kpPp1/1P1pN1K1/3P4/8 b - -\n"
send "go depth 18\n"
expect "score mate -1"
expect "bestmove"
send "ucinewgame\n"
send "position fen 8/5R2/2K1P3/4k3/8/b1PPpp1B/5p2/8 w - -\n"
send "go depth 18\n"
expect "score mate 2 * pv c6d7 * f7f5"
expect "bestmove c6d7"
send "ucinewgame\n"
send "position fen 8/5R2/2K1P3/4k3/8/b1PPpp1B/5p2/8 w - -\n"
send "go mate 2\n"
expect "score mate 2 * pv c6d7"
expect "bestmove c6d7"
send "ucinewgame\n"
send "position fen 8/5R2/2K1P3/4k3/8/b1PPpp1B/5p2/8 w - -\n"
send "go nodes 10000\n"
expect "score mate 2 * pv c6d7 * f7f5"
expect "bestmove c6d7"
send "ucinewgame\n"
send "position fen 1NR2B2/5p2/5p2/1p1kpp2/1P2rp2/2P1pB2/2P1P1K1/8 b - - \n"
send "go depth 18\n"
expect "score mate -2"
expect "pv d5e6 c8d8"
expect "bestmove d5e6"
send "ucinewgame\n"
send "position fen 8/5R2/2K1P3/4k3/8/b1PPpp1B/5p2/8 w - - moves c6d7 f2f1q\n"
send "go depth 18\n"
expect "score mate 1 * pv f7f5"
expect "bestmove f7f5"
send "ucinewgame\n"
send "position fen 8/5R2/2K1P3/4k3/8/b1PPpp1B/5p2/8 w - -\n"
send "go depth 18 searchmoves c6d7\n"
expect "score mate 2 * pv c6d7 * f7f5"
expect "bestmove c6d7"
send "ucinewgame\n"
send "position fen 8/5R2/2K1P3/4k3/8/b1PPpp1B/5p2/8 w - - moves c6d7\n"
send "go depth 18 searchmoves e3e2\n"
expect "score mate -1 * pv e3e2 f7f5"
expect "bestmove e3e2"
send "setoption name EvalFile value verify.nnue\n"
send "position startpos\n"
send "go depth 5\n"
expect "bestmove"
send "setoption name MultiPV value 4\n"
send "position startpos\n"
send "go depth 5\n"
expect "bestmove"
send "setoption name Skill Level value 10\n"
send "position startpos\n"
send "go depth 5\n"
expect "bestmove"
send "setoption name Skill Level value 20\n"
send "quit\n"
expect eof
# return error code of the spawned program, useful for Valgrind
lassign [wait] pid spawnid os_error_flag value
exit \$value
EOF
#download TB as needed
if [ ! -d ../tests/syzygy ]; then
curl -sL https://api.github.com/repos/niklasf/python-chess/tarball/9b9aa13f9f36d08aadfabff872882f4ab1494e95 | tar -xzf -
mv niklasf-python-chess-9b9aa13 ../tests/syzygy
fi
cat << EOF > syzygy.exp
set timeout 240
# to correctly catch eof we need the following line
# expect_before timeout { exit 2 } eof { exit 3 }
expect_before timeout { exit 2 }
spawn $exeprefix ./stockfish
expect "Stockfish"
send "uci\n"
send "setoption name SyzygyPath value ../tests/syzygy/\n"
expect "info string Found 35 tablebases"
send "bench 128 1 8 default depth\n"
expect "Nodes searched :"
send "ucinewgame\n"
send "position fen 4k3/PP6/8/8/8/8/8/4K3 w - - 0 1\n"
send "go depth 5\n"
expect -re {score cp 20000|score mate}
expect "bestmove"
send "ucinewgame\n"
send "position fen 8/1P6/2B5/8/4K3/8/6k1/8 w - - 0 1\n"
send "go depth 5\n"
expect -re {score cp 20000|score mate}
expect "bestmove"
send "ucinewgame\n"
send "position fen 8/1P6/2B5/8/4K3/8/6k1/8 b - - 0 1\n"
send "go depth 5\n"
expect -re {score cp -20000|score mate}
expect "bestmove"
send "quit\n"
expect eof
# return error code of the spawned program, useful for Valgrind
lassign [wait] pid spawnid os_error_flag value
exit \$value
EOF
for exp in game.exp syzygy.exp
do
echo "======== $exp =============="
cat $exp
echo "============================"
echo "$prefix expect $exp $postfix"
eval "$prefix expect $exp $postfix"
rm $exp
done
rm -f tsan.supp bench_tmp.epd
echo "instrumented testing OK"

View file

@ -1,5 +1,5 @@
#!/bin/bash #!/bin/bash
# verify perft numbers (positions from www.chessprogramming.org/Perft_Results) # verify perft numbers (positions from https://www.chessprogramming.org/Perft_Results)
error() error()
{ {

378
tests/testing.py Normal file
View file

@ -0,0 +1,378 @@
import subprocess
from typing import List
import os
import collections
import time
import sys
import traceback
import fnmatch
from functools import wraps
from contextlib import redirect_stdout
import io
import tarfile
import pathlib
import concurrent.futures
import tempfile
import shutil
import requests
CYAN_COLOR = "\033[36m"
GRAY_COLOR = "\033[2m"
RED_COLOR = "\033[31m"
GREEN_COLOR = "\033[32m"
RESET_COLOR = "\033[0m"
WHITE_BOLD = "\033[1m"
MAX_TIMEOUT = 60 * 5
PATH = pathlib.Path(__file__).parent.resolve()
class Valgrind:
@staticmethod
def get_valgrind_command():
return [
"valgrind",
"--error-exitcode=42",
"--errors-for-leak-kinds=all",
"--leak-check=full",
]
@staticmethod
def get_valgrind_thread_command():
return ["valgrind", "--error-exitcode=42", "--fair-sched=try"]
class TSAN:
@staticmethod
def set_tsan_option():
with open(f"tsan.supp", "w") as f:
f.write(
"""
race:Stockfish::TTEntry::read
race:Stockfish::TTEntry::save
race:Stockfish::TranspositionTable::probe
race:Stockfish::TranspositionTable::hashfull
"""
)
os.environ["TSAN_OPTIONS"] = "suppressions=./tsan.supp"
@staticmethod
def unset_tsan_option():
os.environ.pop("TSAN_OPTIONS", None)
os.remove(f"tsan.supp")
class EPD:
@staticmethod
def create_bench_epd():
with open(f"{os.path.join(PATH,'bench_tmp.epd')}", "w") as f:
f.write(
"""
Rn6/1rbq1bk1/2p2n1p/2Bp1p2/3Pp1pP/1N2P1P1/2Q1NPB1/6K1 w - - 2 26
rnbqkb1r/ppp1pp2/5n1p/3p2p1/P2PP3/5P2/1PP3PP/RNBQKBNR w KQkq - 0 3
3qnrk1/4bp1p/1p2p1pP/p2bN3/1P1P1B2/P2BQ3/5PP1/4R1K1 w - - 9 28
r4rk1/1b2ppbp/pq4pn/2pp1PB1/1p2P3/1P1P1NN1/1PP3PP/R2Q1RK1 w - - 0 13
"""
)
@staticmethod
def delete_bench_epd():
os.remove(f"{os.path.join(PATH,'bench_tmp.epd')}")
class Syzygy:
@staticmethod
def get_syzygy_path():
return os.path.abspath("syzygy")
@staticmethod
def download_syzygy():
if not os.path.isdir(os.path.join(PATH, "syzygy")):
url = "https://api.github.com/repos/niklasf/python-chess/tarball/9b9aa13f9f36d08aadfabff872882f4ab1494e95"
file = "niklasf-python-chess-9b9aa13"
with tempfile.TemporaryDirectory() as tmpdirname:
tarball_path = os.path.join(tmpdirname, f"{file}.tar.gz")
response = requests.get(url, stream=True)
with open(tarball_path, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
with tarfile.open(tarball_path, "r:gz") as tar:
tar.extractall(tmpdirname)
shutil.move(os.path.join(tmpdirname, file), os.path.join(PATH, "syzygy"))
class OrderedClassMembers(type):
@classmethod
def __prepare__(self, name, bases):
return collections.OrderedDict()
def __new__(self, name, bases, classdict):
classdict["__ordered__"] = [
key for key in classdict.keys() if key not in ("__module__", "__qualname__")
]
return type.__new__(self, name, bases, classdict)
class TimeoutException(Exception):
def __init__(self, message: str, timeout: int):
self.message = message
self.timeout = timeout
def timeout_decorator(timeout: float):
def decorator(func):
@wraps(func)
def wrapper(*args, **kwargs):
with concurrent.futures.ThreadPoolExecutor() as executor:
future = executor.submit(func, *args, **kwargs)
try:
result = future.result(timeout=timeout)
except concurrent.futures.TimeoutError:
raise TimeoutException(
f"Function {func.__name__} timed out after {timeout} seconds",
timeout,
)
return result
return wrapper
return decorator
class MiniTestFramework:
def __init__(self):
self.passed_test_suites = 0
self.failed_test_suites = 0
self.passed_tests = 0
self.failed_tests = 0
def has_failed(self) -> bool:
return self.failed_test_suites > 0
def run(self, classes: List[type]) -> bool:
self.start_time = time.time()
for test_class in classes:
with tempfile.TemporaryDirectory() as tmpdirname:
original_cwd = os.getcwd()
os.chdir(tmpdirname)
try:
if self.__run(test_class):
self.failed_test_suites += 1
else:
self.passed_test_suites += 1
finally:
os.chdir(original_cwd)
self.__print_summary(round(time.time() - self.start_time, 2))
return self.has_failed()
def __run(self, test_class) -> bool:
test_instance = test_class()
test_name = test_instance.__class__.__name__
test_methods = [m for m in test_instance.__ordered__ if m.startswith("test_")]
print(f"\nTest Suite: {test_name}")
if hasattr(test_instance, "beforeAll"):
test_instance.beforeAll()
fails = 0
for method in test_methods:
fails += self.__run_test_method(test_instance, method)
if hasattr(test_instance, "afterAll"):
test_instance.afterAll()
self.failed_tests += fails
return fails > 0
def __run_test_method(self, test_instance, method: str) -> int:
print(f" Running {method}... \r", end="", flush=True)
buffer = io.StringIO()
fails = 0
try:
t0 = time.time()
with redirect_stdout(buffer):
if hasattr(test_instance, "beforeEach"):
test_instance.beforeEach()
getattr(test_instance, method)()
if hasattr(test_instance, "afterEach"):
test_instance.afterEach()
duration = time.time() - t0
self.print_success(f" {method} ({duration * 1000:.2f}ms)")
self.passed_tests += 1
except Exception as e:
if isinstance(e, TimeoutException):
self.print_failure(
f" {method} (hit execution limit of {e.timeout} seconds)"
)
if isinstance(e, AssertionError):
self.__handle_assertion_error(t0, method)
fails += 1
finally:
self.__print_buffer_output(buffer)
return fails
def __handle_assertion_error(self, start_time, method: str):
duration = time.time() - start_time
self.print_failure(f" {method} ({duration * 1000:.2f}ms)")
traceback_output = "".join(traceback.format_tb(sys.exc_info()[2]))
colored_traceback = "\n".join(
f" {CYAN_COLOR}{line}{RESET_COLOR}"
for line in traceback_output.splitlines()
)
print(colored_traceback)
def __print_buffer_output(self, buffer: io.StringIO):
output = buffer.getvalue()
if output:
indented_output = "\n".join(f" {line}" for line in output.splitlines())
print(f" {RED_COLOR}⎯⎯⎯⎯⎯OUTPUT⎯⎯⎯⎯⎯{RESET_COLOR}")
print(f"{GRAY_COLOR}{indented_output}{RESET_COLOR}")
print(f" {RED_COLOR}⎯⎯⎯⎯⎯OUTPUT⎯⎯⎯⎯⎯{RESET_COLOR}")
def __print_summary(self, duration: float):
print(f"\n{WHITE_BOLD}Test Summary{RESET_COLOR}\n")
print(
f" Test Suites: {GREEN_COLOR}{self.passed_test_suites} passed{RESET_COLOR}, {RED_COLOR}{self.failed_test_suites} failed{RESET_COLOR}, {self.passed_test_suites + self.failed_test_suites} total"
)
print(
f" Tests: {GREEN_COLOR}{self.passed_tests} passed{RESET_COLOR}, {RED_COLOR}{self.failed_tests} failed{RESET_COLOR}, {self.passed_tests + self.failed_tests} total"
)
print(f" Time: {duration}s\n")
def print_failure(self, add: str):
print(f" {RED_COLOR}{RESET_COLOR}{add}", flush=True)
def print_success(self, add: str):
print(f" {GREEN_COLOR}{RESET_COLOR}{add}", flush=True)
class Stockfish:
def __init__(
self,
prefix: List[str],
path: str,
args: List[str] = [],
cli: bool = False,
):
self.path = path
self.process = None
self.args = args
self.cli = cli
self.prefix = prefix
self.output = []
self.start()
def start(self):
if self.cli:
self.process = subprocess.run(
self.prefix + [self.path] + self.args,
capture_output=True,
text=True,
)
self.process.stdout
return
self.process = subprocess.Popen(
self.prefix + [self.path] + self.args,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
universal_newlines=True,
bufsize=1,
)
def setoption(self, name: str, value: str):
self.send_command(f"setoption name {name} value {value}")
def send_command(self, command: str):
if not self.process:
raise RuntimeError("Stockfish process is not started")
self.process.stdin.write(command + "\n")
self.process.stdin.flush()
@timeout_decorator(MAX_TIMEOUT)
def equals(self, expected_output: str):
for line in self.readline():
if line == expected_output:
return
@timeout_decorator(MAX_TIMEOUT)
def expect(self, expected_output: str):
for line in self.readline():
if fnmatch.fnmatch(line, expected_output):
return
@timeout_decorator(MAX_TIMEOUT)
def contains(self, expected_output: str):
for line in self.readline():
if expected_output in line:
return
@timeout_decorator(MAX_TIMEOUT)
def starts_with(self, expected_output: str):
for line in self.readline():
if line.startswith(expected_output):
return
@timeout_decorator(MAX_TIMEOUT)
def check_output(self, callback):
if not callback:
raise ValueError("Callback function is required")
for line in self.readline():
if callback(line) == True:
return
def readline(self):
if not self.process:
raise RuntimeError("Stockfish process is not started")
while True:
line = self.process.stdout.readline().strip()
self.output.append(line)
yield line
def clear_output(self):
self.output = []
def get_output(self) -> List[str]:
return self.output
def quit(self):
self.send_command("quit")
def close(self):
if self.process:
self.process.stdin.close()
self.process.stdout.close()
return self.process.wait()
return 0