diff --git a/.clang-format b/.clang-format new file mode 100644 index 00000000..c71f0368 --- /dev/null +++ b/.clang-format @@ -0,0 +1,44 @@ +AccessModifierOffset: -1 +AlignAfterOpenBracket: Align +AlignConsecutiveAssignments: Consecutive +AlignConsecutiveDeclarations: Consecutive +AlignEscapedNewlines: DontAlign +AlignOperands: AlignAfterOperator +AlignTrailingComments: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortCaseLabelsOnASingleLine: false +AllowShortEnumsOnASingleLine: false +AllowShortIfStatementsOnASingleLine: false +AlwaysBreakTemplateDeclarations: Yes +BasedOnStyle: WebKit +BitFieldColonSpacing: After +BinPackParameters: false +BreakBeforeBinaryOperators: NonAssignment +BreakBeforeBraces: Custom +BraceWrapping: + AfterFunction: false + AfterClass: false + AfterControlStatement: true + BeforeElse: true +BreakBeforeTernaryOperators: true +BreakConstructorInitializers: AfterColon +BreakStringLiterals: false +ColumnLimit: 100 +ContinuationIndentWidth: 2 +Cpp11BracedListStyle: true +IndentGotoLabels: false +IndentPPDirectives: BeforeHash +IndentWidth: 4 +MaxEmptyLinesToKeep: 2 +NamespaceIndentation: None +PackConstructorInitializers: Never +ReflowComments: false +SortIncludes: false +SortUsingDeclarations: false +SpaceAfterCStyleCast: true +SpaceAfterTemplateKeyword: false +SpaceBeforeCaseColon: true +SpaceBeforeCpp11BracedList: false +SpaceBeforeInheritanceColon: false +SpaceInEmptyBlock: false +SpacesBeforeTrailingComments: 2 diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs new file mode 100644 index 00000000..d2d6cfe6 --- /dev/null +++ b/.git-blame-ignore-revs @@ -0,0 +1,7 @@ +# .git-blame-ignore-revs +# Ignore commit which added clang-format +2d0237db3f0e596fb06e3ffbadba84dcc4e018f6 + +# Post commit formatting fixes +0fca5605fa2e5e7240fde5e1aae50952b2612231 +08ed4c90db31959521b7ef3186c026edd1e90307 \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/BUG-REPORT.yml b/.github/ISSUE_TEMPLATE/BUG-REPORT.yml new file mode 100644 index 00000000..e46d2bf8 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/BUG-REPORT.yml @@ -0,0 +1,65 @@ +name: Report issue +description: Create a report to help us fix issues with the engine +body: +- type: textarea + attributes: + label: Describe the issue + description: A clear and concise description of what you're experiencing. + validations: + required: true + +- type: textarea + attributes: + label: Expected behavior + description: A clear and concise description of what you expected to happen. + validations: + required: true + +- type: textarea + attributes: + label: Steps to reproduce + description: | + Steps to reproduce the behavior. + You can also use this section to paste the command line output. + placeholder: | + ``` + position startpos moves g2g4 e7e5 f2f3 + go mate 1 + info string NNUE evaluation using nn-6877cd24400e.nnue enabled + info depth 1 seldepth 1 multipv 1 score mate 1 nodes 33 nps 11000 tbhits 0 time 3 pv d8h4 + bestmove d8h4 + ``` + validations: + required: true + +- type: textarea + attributes: + label: Anything else? + description: | + Anything that will give us more context about the issue you are encountering. + You can also use this section to propose ideas on how to solve the issue. + validations: + required: false + +- type: dropdown + attributes: + label: Operating system + options: + - All + - Windows + - Linux + - MacOS + - Android + - Other or N/A + validations: + required: true + +- type: input + attributes: + label: Stockfish version + description: | + This can be found by running the engine. + You can also use the commit ID. + placeholder: Stockfish 15 / e6e324e + validations: + required: true diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 00000000..0666eb32 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,8 @@ +blank_issues_enabled: false +contact_links: + - name: Discord server + url: https://discord.gg/GWDRS3kU6R + about: Feel free to ask for support or have a chat with us on our Discord server! + - name: Discussions, Q&A, ideas, show us something... + url: https://github.com/official-stockfish/Stockfish/discussions/new + about: Do you have an idea for Stockfish? Do you want to show something that you made? Please open a discussion about it! diff --git a/.github/ci/arm_matrix.json b/.github/ci/arm_matrix.json new file mode 100644 index 00000000..70f2efaa --- /dev/null +++ b/.github/ci/arm_matrix.json @@ -0,0 +1,51 @@ +{ + "config": [ + { + "name": "Android NDK aarch64", + "os": "ubuntu-22.04", + "simple_name": "android", + "compiler": "aarch64-linux-android21-clang++", + "emu": "qemu-aarch64", + "comp": "ndk", + "shell": "bash", + "archive_ext": "tar" + }, + { + "name": "Android NDK arm", + "os": "ubuntu-22.04", + "simple_name": "android", + "compiler": "armv7a-linux-androideabi21-clang++", + "emu": "qemu-arm", + "comp": "ndk", + "shell": "bash", + "archive_ext": "tar" + } + ], + "binaries": ["armv8-dotprod", "armv8", "armv7", "armv7-neon"], + "exclude": [ + { + "binaries": "armv8-dotprod", + "config": { + "compiler": "armv7a-linux-androideabi21-clang++" + } + }, + { + "binaries": "armv8", + "config": { + "compiler": "armv7a-linux-androideabi21-clang++" + } + }, + { + "binaries": "armv7", + "config": { + "compiler": "aarch64-linux-android21-clang++" + } + }, + { + "binaries": "armv7-neon", + "config": { + "compiler": "aarch64-linux-android21-clang++" + } + } + ] +} diff --git a/.github/ci/libcxx17.imp b/.github/ci/libcxx17.imp new file mode 100644 index 00000000..7bdcf5bc --- /dev/null +++ b/.github/ci/libcxx17.imp @@ -0,0 +1,21 @@ +[ + # Mappings for libcxx's internal headers + { include: [ "<__fwd/fstream.h>", private, "", public ] }, + { include: [ "<__fwd/ios.h>", private, "", public ] }, + { include: [ "<__fwd/istream.h>", private, "", public ] }, + { include: [ "<__fwd/ostream.h>", private, "", public ] }, + { include: [ "<__fwd/sstream.h>", private, "", public ] }, + { include: [ "<__fwd/streambuf.h>", private, "", public ] }, + { include: [ "<__fwd/string_view.h>", private, "", public ] }, + + # Mappings for includes between public headers + { include: [ "", public, "", public ] }, + { include: [ "", public, "", public ] }, + { include: [ "", public, "", public ] }, + { include: [ "", public, "", public ] }, + { include: [ "", public, "", public ] }, + + # Missing mappings in include-what-you-use's libcxx.imp + { include: ["@<__condition_variable/.*>", private, "", public ] }, + { include: ["@<__mutex/.*>", private, "", public ] }, +] diff --git a/.github/ci/matrix.json b/.github/ci/matrix.json new file mode 100644 index 00000000..c6563ead --- /dev/null +++ b/.github/ci/matrix.json @@ -0,0 +1,160 @@ +{ + "config": [ + { + "name": "Ubuntu 20.04 GCC", + "os": "ubuntu-20.04", + "simple_name": "ubuntu", + "compiler": "g++", + "comp": "gcc", + "shell": "bash", + "archive_ext": "tar", + "sde": "/home/runner/work/Stockfish/Stockfish/.output/sde-temp-files/sde-external-9.27.0-2023-09-13-lin/sde -future --" + }, + { + "name": "MacOS 13 Apple Clang", + "os": "macos-13", + "simple_name": "macos", + "compiler": "clang++", + "comp": "clang", + "shell": "bash", + "archive_ext": "tar" + }, + { + "name": "MacOS 14 Apple Clang M1", + "os": "macos-14", + "simple_name": "macos-m1", + "compiler": "clang++", + "comp": "clang", + "shell": "bash", + "archive_ext": "tar" + }, + { + "name": "Windows 2022 Mingw-w64 GCC x86_64", + "os": "windows-2022", + "simple_name": "windows", + "compiler": "g++", + "comp": "mingw", + "msys_sys": "mingw64", + "msys_env": "x86_64-gcc", + "shell": "msys2 {0}", + "ext": ".exe", + "sde": "/d/a/Stockfish/Stockfish/.output/sde-temp-files/sde-external-9.27.0-2023-09-13-win/sde.exe -future --", + "archive_ext": "zip" + } + ], + "binaries": [ + "x86-64", + "x86-64-sse41-popcnt", + "x86-64-avx2", + "x86-64-bmi2", + "x86-64-avxvnni", + "x86-64-avx512", + "x86-64-vnni256", + "x86-64-vnni512", + "apple-silicon" + ], + "exclude": [ + { + "binaries": "x86-64", + "config": { + "os": "macos-14" + } + }, + { + "binaries": "x86-64-sse41-popcnt", + "config": { + "os": "macos-14" + } + }, + { + "binaries": "x86-64-avx2", + "config": { + "os": "macos-14" + } + }, + { + "binaries": "x86-64-bmi2", + "config": { + "os": "macos-14" + } + }, + { + "binaries": "x86-64-avxvnni", + "config": { + "os": "macos-14" + } + }, + { + "binaries": "x86-64-avxvnni", + "config": { + "os": "macos-14" + } + }, + { + "binaries": "x86-64-avx512", + "config": { + "os": "macos-14" + } + }, + { + "binaries": "x86-64-vnni256", + "config": { + "os": "macos-14" + } + }, + { + "binaries": "x86-64-vnni512", + "config": { + "os": "macos-14" + } + }, + { + "binaries": "x86-64-avxvnni", + "config": { + "ubuntu-20.04": null + } + }, + { + "binaries": "x86-64-avxvnni", + "config": { + "os": "macos-13" + } + }, + { + "binaries": "x86-64-avx512", + "config": { + "os": "macos-13" + } + }, + { + "binaries": "x86-64-vnni256", + "config": { + "os": "macos-13" + } + }, + { + "binaries": "x86-64-vnni512", + "config": { + "os": "macos-13" + } + }, + { + "binaries": "apple-silicon", + "config": { + "os": "windows-2022" + } + }, + { + "binaries": "apple-silicon", + "config": { + "os": "macos-13" + } + }, + { + "binaries": "apple-silicon", + "config": { + "os": "ubuntu-20.04" + } + } + ] +} diff --git a/.github/workflows/arm_compilation.yml b/.github/workflows/arm_compilation.yml new file mode 100644 index 00000000..3934ac2d --- /dev/null +++ b/.github/workflows/arm_compilation.yml @@ -0,0 +1,94 @@ +name: Compilation +on: + workflow_call: + inputs: + matrix: + type: string + required: true +jobs: + Compilation: + name: ${{ matrix.config.name }} ${{ matrix.binaries }} + runs-on: ${{ matrix.config.os }} + env: + COMPCXX: ${{ matrix.config.compiler }} + COMP: ${{ matrix.config.comp }} + EMU: ${{ matrix.config.emu }} + EXT: ${{ matrix.config.ext }} + BINARY: ${{ matrix.binaries }} + strategy: + fail-fast: false + matrix: ${{ fromJson(inputs.matrix) }} + defaults: + run: + working-directory: src + shell: ${{ matrix.config.shell }} + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Download required linux packages + if: runner.os == 'Linux' + run: | + sudo apt update + sudo apt install qemu-user + + - name: Install NDK + if: runner.os == 'Linux' + run: | + if [ $COMP == ndk ]; then + NDKV="21.4.7075529" + ANDROID_ROOT=/usr/local/lib/android + ANDROID_SDK_ROOT=$ANDROID_ROOT/sdk + SDKMANAGER=$ANDROID_SDK_ROOT/cmdline-tools/latest/bin/sdkmanager + echo "y" | $SDKMANAGER "ndk;$NDKV" + ANDROID_NDK_ROOT=$ANDROID_SDK_ROOT/ndk/$NDKV + ANDROID_NDK_BIN=$ANDROID_NDK_ROOT/toolchains/llvm/prebuilt/linux-x86_64/bin + echo "ANDROID_NDK_BIN=$ANDROID_NDK_BIN" >> $GITHUB_ENV + fi + + - name: Extract the bench number from the commit history + run: | + for hash in $(git rev-list -100 HEAD); do + benchref=$(git show -s $hash | tac | grep -m 1 -o -x '[[:space:]]*\b[Bb]ench[ :]\+[1-9][0-9]\{5,7\}\b[[:space:]]*' | sed 's/[^0-9]//g') && break || true + done + [[ -n "$benchref" ]] && echo "benchref=$benchref" >> $GITHUB_ENV && echo "From commit: $hash" && echo "Reference bench: $benchref" || echo "No bench found" + + - name: Download the used network from the fishtest framework + run: make net + + - name: Check compiler + run: | + if [ $COMP == ndk ]; then + export PATH=${{ env.ANDROID_NDK_BIN }}:$PATH + fi + $COMPCXX -v + + - name: Test help target + run: make help + + - name: Check git + run: git --version + + # Compile profile guided builds + + - name: Compile ${{ matrix.binaries }} build + run: | + if [ $COMP == ndk ]; then + export PATH=${{ env.ANDROID_NDK_BIN }}:$PATH + export LDFLAGS="-static -Wno-unused-command-line-argument" + fi + make clean + make -j4 profile-build ARCH=$BINARY COMP=$COMP WINE_PATH=$EMU + make strip ARCH=$BINARY COMP=$COMP + WINE_PATH=$EMU ../tests/signature.sh $benchref + mv ./stockfish$EXT ../stockfish-android-$BINARY$EXT + + - name: Remove non src files + run: git clean -fx + + - name: Upload artifact for (pre)-release + uses: actions/upload-artifact@v4 + with: + name: ${{ matrix.config.simple_name }} ${{ matrix.binaries }} + path: . diff --git a/.github/workflows/clang-format.yml b/.github/workflows/clang-format.yml new file mode 100644 index 00000000..e20e0d5d --- /dev/null +++ b/.github/workflows/clang-format.yml @@ -0,0 +1,51 @@ +# This workflow will run clang-format and comment on the PR. +# Because of security reasons, it is crucial that this workflow +# executes no shell script nor runs make. +# Read this before editing: https://securitylab.github.com/research/github-actions-preventing-pwn-requests/ + +name: Clang-Format +on: + pull_request_target: + branches: + - "master" + paths: + - "**.cpp" + - "**.h" +jobs: + Clang-Format: + name: Clang-Format + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ github.event.pull_request.head.sha }} + + - name: Run clang-format style check + uses: jidicula/clang-format-action@f62da5e3d3a2d88ff364771d9d938773a618ab5e # @v4.11.0 + id: clang-format + continue-on-error: true + with: + clang-format-version: "17" + exclude-regex: "incbin" + + - name: Comment on PR + if: steps.clang-format.outcome == 'failure' + uses: thollander/actions-comment-pull-request@fabd468d3a1a0b97feee5f6b9e499eab0dd903f6 # @v2.5.0 + with: + message: | + clang-format 17 needs to be run on this PR. + If you do not have clang-format installed, the maintainer will run it when merging. + For the exact version please see https://packages.ubuntu.com/mantic/clang-format-17. + + _(execution **${{ github.run_id }}** / attempt **${{ github.run_attempt }}**)_ + comment_tag: execution + + - name: Comment on PR + if: steps.clang-format.outcome != 'failure' + uses: thollander/actions-comment-pull-request@fabd468d3a1a0b97feee5f6b9e499eab0dd903f6 # @v2.5.0 + with: + message: | + _(execution **${{ github.run_id }}** / attempt **${{ github.run_attempt }}**)_ + create_if_not_exists: false + comment_tag: execution + mode: delete diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml new file mode 100644 index 00000000..d949a5a7 --- /dev/null +++ b/.github/workflows/codeql.yml @@ -0,0 +1,53 @@ +name: "CodeQL" + +on: + push: + branches: ["master"] + pull_request: + # The branches below must be a subset of the branches above + branches: ["master"] + schedule: + - cron: "17 18 * * 1" + +jobs: + analyze: + name: Analyze + runs-on: ubuntu-latest + permissions: + actions: read + contents: read + security-events: write + + strategy: + fail-fast: false + matrix: + language: ["cpp"] + # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ] + # Use only 'java' to analyze code written in Java, Kotlin, or both + # Use only 'javascript' to analyze code written in JavaScript, TypeScript or both + # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + # Initializes the CodeQL tools for scanning. + - name: Initialize CodeQL + uses: github/codeql-action/init@v3 + with: + languages: ${{ matrix.language }} + # If you wish to specify custom queries, you can do so here or in a config file. + # By default, queries listed here will override any specified in a config file. + # Prefix the list here with "+" to use these queries and those in the config file. + + # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs + # queries: security-extended,security-and-quality + + - name: Build + working-directory: src + run: make -j build ARCH=x86-64-modern + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v3 + with: + category: "/language:${{matrix.language}}" diff --git a/.github/workflows/compilation.yml b/.github/workflows/compilation.yml new file mode 100644 index 00000000..3524d5e9 --- /dev/null +++ b/.github/workflows/compilation.yml @@ -0,0 +1,89 @@ +name: Compilation +on: + workflow_call: + inputs: + matrix: + type: string + required: true +jobs: + Compilation: + name: ${{ matrix.config.name }} ${{ matrix.binaries }} + runs-on: ${{ matrix.config.os }} + env: + COMPCXX: ${{ matrix.config.compiler }} + COMP: ${{ matrix.config.comp }} + EXT: ${{ matrix.config.ext }} + NAME: ${{ matrix.config.simple_name }} + BINARY: ${{ matrix.binaries }} + SDE: ${{ matrix.config.sde }} + strategy: + fail-fast: false + matrix: ${{ fromJson(inputs.matrix) }} + defaults: + run: + working-directory: src + shell: ${{ matrix.config.shell }} + steps: + - uses: actions/checkout@v4 + + - name: Install fixed GCC on Linux + if: runner.os == 'Linux' + uses: egor-tensin/setup-gcc@eaa888eb19115a521fa72b65cd94fe1f25bbcaac # @v1.3 + with: + version: 11 + + - name: Setup msys and install required packages + if: runner.os == 'Windows' + uses: msys2/setup-msys2@v2 + with: + msystem: ${{ matrix.config.msys_sys }} + install: mingw-w64-${{ matrix.config.msys_env }} make git zip + + - name: Download SDE package + if: runner.os == 'Linux' || runner.os == 'Windows' + uses: petarpetrovt/setup-sde@91a1a03434384e064706634125a15f7446d2aafb # @v2.3 + with: + environmentVariableName: SDE_DIR + sdeVersion: 9.27.0 + + - name: Download the used network from the fishtest framework + run: make net + + - name: Check compiler + run: $COMPCXX -v + + - name: Test help target + run: make help + + - name: Check git + run: git --version + + - name: Check compiler + run: $COMPCXX -v + + - name: Show g++ cpu info + if: runner.os != 'macOS' + run: g++ -Q -march=native --help=target + + - name: Show clang++ cpu info + if: runner.os == 'macOS' + run: clang++ -E - -march=native -### + + # x86-64 with newer extensions tests + + - name: Compile ${{ matrix.config.binaries }} build + run: | + make clean + make -j4 profile-build ARCH=$BINARY COMP=$COMP WINE_PATH="$SDE" + make strip ARCH=$BINARY COMP=$COMP + WINE_PATH="$SDE" ../tests/signature.sh $benchref + mv ./stockfish$EXT ../stockfish-$NAME-$BINARY$EXT + + - name: Remove non src files + run: git clean -fx + + - name: Upload artifact for (pre)-release + uses: actions/upload-artifact@v4 + with: + name: ${{ matrix.config.simple_name }} ${{ matrix.binaries }} + path: . diff --git a/.github/workflows/iwyu.yml b/.github/workflows/iwyu.yml new file mode 100644 index 00000000..0552a598 --- /dev/null +++ b/.github/workflows/iwyu.yml @@ -0,0 +1,47 @@ +name: IWYU +on: + workflow_call: +jobs: + Analyzers: + name: Check includes + runs-on: ubuntu-22.04 + defaults: + run: + working-directory: Stockfish/src + shell: bash + steps: + - name: Checkout Stockfish + uses: actions/checkout@v4 + with: + path: Stockfish + + - name: Checkout include-what-you-use + uses: actions/checkout@v4 + with: + repository: include-what-you-use/include-what-you-use + ref: f25caa280dc3277c4086ec345ad279a2463fea0f + path: include-what-you-use + + - name: Download required linux packages + run: | + sudo add-apt-repository 'deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-17 main' + wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add - + sudo apt update + sudo apt install -y libclang-17-dev clang-17 libc++-17-dev + + - name: Set up include-what-you-use + run: | + mkdir build && cd build + cmake -G "Unix Makefiles" -DCMAKE_PREFIX_PATH="/usr/lib/llvm-17" .. + sudo make install + working-directory: include-what-you-use + + - name: Check include-what-you-use + run: include-what-you-use --version + + - name: Check includes + run: > + make analyze + COMP=clang + CXX=include-what-you-use + CXXFLAGS="-stdlib=libc++ -Xiwyu --comment_style=long -Xiwyu --mapping='${{ github.workspace }}/Stockfish/.github/ci/libcxx17.imp' -Xiwyu --error" diff --git a/.github/workflows/sanitizers.yml b/.github/workflows/sanitizers.yml new file mode 100644 index 00000000..b75c06cf --- /dev/null +++ b/.github/workflows/sanitizers.yml @@ -0,0 +1,76 @@ +name: Sanitizers +on: + workflow_call: +jobs: + Test-under-sanitizers: + name: ${{ matrix.sanitizers.name }} + runs-on: ${{ matrix.config.os }} + env: + COMPCXX: ${{ matrix.config.compiler }} + COMP: ${{ matrix.config.comp }} + CXXFLAGS: "-Werror" + strategy: + fail-fast: false + matrix: + config: + - name: Ubuntu 22.04 GCC + os: ubuntu-22.04 + compiler: g++ + comp: gcc + shell: bash + sanitizers: + - name: Run with thread sanitizer + make_option: sanitize=thread + instrumented_option: sanitizer-thread + - name: Run with UB sanitizer + make_option: sanitize=undefined + instrumented_option: sanitizer-undefined + - name: Run under valgrind + make_option: "" + instrumented_option: valgrind + - name: Run under valgrind-thread + make_option: "" + instrumented_option: valgrind-thread + - name: Run non-instrumented + make_option: "" + instrumented_option: none + defaults: + run: + working-directory: src + shell: ${{ matrix.config.shell }} + steps: + - uses: actions/checkout@v4 + + - name: Download required linux packages + run: | + sudo apt update + sudo apt install expect valgrind g++-multilib + + - name: Download the used network from the fishtest framework + run: make net + + - name: Check compiler + run: $COMPCXX -v + + - name: Test help target + run: make help + + - name: Check git + run: git --version + + # Since Linux Kernel 6.5 we are getting false positives from the ci, + # lower the ALSR entropy to disable ALSR, which works as a temporary workaround. + # https://github.com/google/sanitizers/issues/1716 + # https://bugs.launchpad.net/ubuntu/+source/linux/+bug/2056762 + + - name: Lower ALSR entropy + run: sudo sysctl -w vm.mmap_rnd_bits=28 + + # Sanitizers + + - name: ${{ matrix.sanitizers.name }} + run: | + export CXXFLAGS="-O1 -fno-inline" + make clean + make -j4 ARCH=x86-64-sse41-popcnt ${{ matrix.sanitizers.make_option }} debug=yes optimize=no build > /dev/null + ../tests/instrumented.sh --${{ matrix.sanitizers.instrumented_option }} diff --git a/.github/workflows/stockfish.yml b/.github/workflows/stockfish.yml index f1741ed8..13d57f9e 100644 --- a/.github/workflows/stockfish.yml +++ b/.github/workflows/stockfish.yml @@ -1,333 +1,104 @@ name: Stockfish on: push: + tags: + - "*" branches: - master - tools - github_ci - - github_ci_armv7 pull_request: branches: - master - tools jobs: - Stockfish: - name: ${{ matrix.config.name }} - runs-on: ${{ matrix.config.os }} - env: - COMPILER: ${{ matrix.config.compiler }} - COMP: ${{ matrix.config.comp }} - CXXFLAGS: "-Werror" - strategy: - matrix: - config: - # set the variable for the required tests: - # run_expensive_tests: true - # run_32bit_tests: true - # run_64bit_tests: true - # run_armv8_tests: true - # run_armv7_tests: true - - { - name: "Ubuntu 20.04 GCC", - os: ubuntu-20.04, - compiler: g++, - comp: gcc, - run_expensive_tests: true, - run_32bit_tests: true, - run_64bit_tests: true, - shell: 'bash {0}' - } - - { - name: "Ubuntu 20.04 Clang", - os: ubuntu-20.04, - compiler: clang++, - comp: clang, - run_32bit_tests: true, - run_64bit_tests: true, - shell: 'bash {0}' - } - - { - name: "Ubuntu 20.04 NDK armv8", - os: ubuntu-20.04, - compiler: aarch64-linux-android21-clang++, - comp: ndk, - run_armv8_tests: true, - shell: 'bash {0}' - } - - { - name: "Ubuntu 20.04 NDK armv7", - os: ubuntu-20.04, - compiler: armv7a-linux-androideabi21-clang++, - comp: ndk, - run_armv7_tests: true, - shell: 'bash {0}' - } - - { - name: "MacOS 10.15 Apple Clang", - os: macos-10.15, - compiler: clang++, - comp: clang, - run_64bit_tests: true, - shell: 'bash {0}' - } - - { - name: "MacOS 10.15 GCC 10", - os: macos-10.15, - compiler: g++-10, - comp: gcc, - run_64bit_tests: true, - shell: 'bash {0}' - } - - { - name: "Windows 2022 Mingw-w64 GCC x86_64", - os: windows-2022, - compiler: g++, - comp: gcc, - run_64bit_tests: true, - msys_sys: 'mingw64', - msys_env: 'x86_64', - shell: 'msys2 {0}' - } - - { - name: "Windows 2022 Mingw-w64 GCC i686", - os: windows-2022, - compiler: g++, - comp: gcc, - run_32bit_tests: true, - msys_sys: 'mingw32', - msys_env: 'i686', - shell: 'msys2 {0}' - } - - { - name: "Windows 2022 Mingw-w64 Clang x86_64", - os: windows-2022, - compiler: clang++, - comp: clang, - run_64bit_tests: true, - msys_sys: 'clang64', - msys_env: 'clang-x86_64', - shell: 'msys2 {0}' - } - - defaults: - run: - working-directory: src - shell: ${{ matrix.config.shell }} + Prerelease: + if: github.repository == 'official-stockfish/Stockfish' && (github.ref == 'refs/heads/master' || (startsWith(github.ref_name, 'sf_') && github.ref_type == 'tag')) + runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 + + # returns null if no pre-release exists + - name: Get Commit SHA of Latest Pre-release + run: | + # Install required packages + sudo apt-get update + sudo apt-get install -y curl jq + + echo "COMMIT_SHA_TAG=$(jq -r 'map(select(.prerelease)) | first | .tag_name' <<< $(curl -s https://api.github.com/repos/${{ github.repository_owner }}/Stockfish/releases))" >> $GITHUB_ENV + + # delete old previous pre-release and tag + - run: gh release delete ${{ env.COMMIT_SHA_TAG }} --cleanup-tag + if: env.COMMIT_SHA_TAG != 'null' + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + # Make sure that an old ci that still runs on master doesn't recreate a prerelease + - name: Check Pullable Commits + id: check_commits + run: | + git fetch + CHANGES=$(git rev-list HEAD..origin/master --count) + echo "CHANGES=$CHANGES" >> $GITHUB_ENV + + - name: Get last commit SHA + id: last_commit + run: echo "COMMIT_SHA=$(git rev-parse HEAD | cut -c 1-8)" >> $GITHUB_ENV + + - name: Get commit date + id: commit_date + run: echo "COMMIT_DATE=$(git show -s --date=format:'%Y%m%d' --format=%cd HEAD)" >> $GITHUB_ENV + + # Create a new pre-release, the other upload_binaries.yml will upload the binaries + # to this pre-release. + - name: Create Prerelease + if: github.ref_name == 'master' && env.CHANGES == '0' + uses: softprops/action-gh-release@4634c16e79c963813287e889244c50009e7f0981 with: - fetch-depth: 0 + name: Stockfish dev-${{ env.COMMIT_DATE }}-${{ env.COMMIT_SHA }} + tag_name: stockfish-dev-${{ env.COMMIT_DATE }}-${{ env.COMMIT_SHA }} + prerelease: true - - name: Download required linux packages - if: runner.os == 'Linux' + Matrix: + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.set-matrix.outputs.matrix }} + arm_matrix: ${{ steps.set-arm-matrix.outputs.arm_matrix }} + steps: + - uses: actions/checkout@v4 + - id: set-matrix run: | - sudo apt update - sudo apt install expect valgrind g++-multilib qemu-user - - - name: Setup msys and install required packages - if: runner.os == 'Windows' - uses: msys2/setup-msys2@v2 - with: - msystem: ${{matrix.config.msys_sys}} - install: mingw-w64-${{matrix.config.msys_env}}-${{matrix.config.comp}} make git expect - - - name: Download the used network from the fishtest framework + TASKS=$(echo $(cat .github/ci/matrix.json) ) + echo "MATRIX=$TASKS" >> $GITHUB_OUTPUT + - id: set-arm-matrix run: | - make net - - - name: Extract the bench number from the commit history - run: | - git log HEAD | grep "\b[Bb]ench[ :]\+[0-9]\{7\}" | head -n 1 | sed "s/[^0-9]*\([0-9]*\).*/\1/g" > git_sig - [ -s git_sig ] && echo "benchref=$(cat git_sig)" >> $GITHUB_ENV && echo "Reference bench:" $(cat git_sig) || echo "No bench found" - - - name: Check compiler - run: | - export PATH=$PATH:$ANDROID_NDK_HOME/toolchains/llvm/prebuilt/linux-x86_64/bin - $COMPILER -v - - - name: Test help target - run: | - make help - - # x86-32 tests - - - name: Test debug x86-32 build - if: ${{ matrix.config.run_32bit_tests }} - run: | - export CXXFLAGS="-Werror -D_GLIBCXX_DEBUG" - make clean - make -j2 ARCH=x86-32 optimize=no debug=yes build - ../tests/signature.sh $benchref - - - name: Test x86-32 build - if: ${{ matrix.config.run_32bit_tests }} - run: | - make clean - make -j2 ARCH=x86-32 build - ../tests/signature.sh $benchref - - - name: Test x86-32-sse41-popcnt build - if: ${{ matrix.config.run_32bit_tests }} - run: | - make clean - make -j2 ARCH=x86-32-sse41-popcnt build - ../tests/signature.sh $benchref - - - name: Test x86-32-sse2 build - if: ${{ matrix.config.run_32bit_tests }} - run: | - make clean - make -j2 ARCH=x86-32-sse2 build - ../tests/signature.sh $benchref - - - name: Test general-32 build - if: ${{ matrix.config.run_32bit_tests }} - run: | - make clean - make -j2 ARCH=general-32 build - ../tests/signature.sh $benchref - - # x86-64 tests - - - name: Test debug x86-64-modern build - if: ${{ matrix.config.run_64bit_tests }} - run: | - export CXXFLAGS="-Werror -D_GLIBCXX_DEBUG" - make clean - make -j2 ARCH=x86-64-modern optimize=no debug=yes build - ../tests/signature.sh $benchref - - - name: Test x86-64-modern build - if: ${{ matrix.config.run_64bit_tests }} - run: | - make clean - make -j2 ARCH=x86-64-modern build - ../tests/signature.sh $benchref - - - name: Test x86-64-ssse3 build - if: ${{ matrix.config.run_64bit_tests }} - run: | - make clean - make -j2 ARCH=x86-64-ssse3 build - ../tests/signature.sh $benchref - - - name: Test x86-64-sse3-popcnt build - if: ${{ matrix.config.run_64bit_tests }} - run: | - make clean - make -j2 ARCH=x86-64-sse3-popcnt build - ../tests/signature.sh $benchref - - - name: Test x86-64 build - if: ${{ matrix.config.run_64bit_tests }} - run: | - make clean - make -j2 ARCH=x86-64 build - ../tests/signature.sh $benchref - - - name: Test general-64 build - if: matrix.config.run_64bit_tests - run: | - make clean - make -j2 ARCH=general-64 build - ../tests/signature.sh $benchref - - # x86-64 with newer extensions tests - - - name: Compile x86-64-avx2 build - if: ${{ matrix.config.run_64bit_tests }} - run: | - make clean - make -j2 ARCH=x86-64-avx2 build - - - name: Compile x86-64-bmi2 build - if: ${{ matrix.config.run_64bit_tests }} - run: | - make clean - make -j2 ARCH=x86-64-bmi2 build - - - name: Compile x86-64-avx512 build - if: ${{ matrix.config.run_64bit_tests }} - run: | - make clean - make -j2 ARCH=x86-64-avx512 build - - - name: Compile x86-64-vnni512 build - if: ${{ matrix.config.run_64bit_tests }} - run: | - make clean - make -j2 ARCH=x86-64-vnni512 build - - - name: Compile x86-64-vnni256 build - if: ${{ matrix.config.run_64bit_tests }} - run: | - make clean - make -j2 ARCH=x86-64-vnni256 build - - # armv8 tests - - - name: Test armv8 build - if: ${{ matrix.config.run_armv8_tests }} - run: | - export PATH=$ANDROID_NDK_HOME/toolchains/llvm/prebuilt/linux-x86_64/bin:$PATH - export LDFLAGS="-static -Wno-unused-command-line-argument" - make clean - make -j2 ARCH=armv8 build - ../tests/signature.sh $benchref - - # armv7 tests - - - name: Test armv7 build - if: ${{ matrix.config.run_armv7_tests }} - run: | - export PATH=$ANDROID_NDK_HOME/toolchains/llvm/prebuilt/linux-x86_64/bin:$PATH - export LDFLAGS="-static -Wno-unused-command-line-argument" - make clean - make -j2 ARCH=armv7 build - ../tests/signature.sh $benchref - - - name: Test armv7-neon build - if: ${{ matrix.config.run_armv7_tests }} - run: | - export PATH=$ANDROID_NDK_HOME/toolchains/llvm/prebuilt/linux-x86_64/bin:$PATH - export LDFLAGS="-static -Wno-unused-command-line-argument" - make clean - make -j2 ARCH=armv7-neon build - ../tests/signature.sh $benchref - - # Other tests - - - name: Check perft and search reproducibility - if: ${{ matrix.config.run_64bit_tests }} - run: | - make clean - make -j2 ARCH=x86-64-modern build - ../tests/perft.sh - ../tests/reprosearch.sh - - # Sanitizers - - - name: Run under valgrind - if: ${{ matrix.config.run_expensive_tests }} - run: | - export CXXFLAGS="-O1 -fno-inline" - make clean - make -j2 ARCH=x86-64-modern debug=yes optimize=no build > /dev/null - ../tests/instrumented.sh --valgrind - ../tests/instrumented.sh --valgrind-thread - - - name: Run with UB sanitizer - if: ${{ matrix.config.run_expensive_tests }} - run: | - export CXXFLAGS="-O1 -fno-inline" - make clean - make -j2 ARCH=x86-64-modern sanitize=undefined optimize=no debug=yes build > /dev/null - ../tests/instrumented.sh --sanitizer-undefined - - - name: Run with thread sanitizer - if: ${{ matrix.config.run_expensive_tests }} - run: | - export CXXFLAGS="-O1 -fno-inline" - make clean - make -j2 ARCH=x86-64-modern sanitize=thread optimize=no debug=yes build > /dev/null - ../tests/instrumented.sh --sanitizer-thread + TASKS_ARM=$(echo $(cat .github/ci/arm_matrix.json) ) + echo "ARM_MATRIX=$TASKS_ARM" >> $GITHUB_OUTPUT + Compilation: + needs: [Matrix] + uses: ./.github/workflows/compilation.yml + with: + matrix: ${{ needs.Matrix.outputs.matrix }} + ARMCompilation: + needs: [Matrix] + uses: ./.github/workflows/arm_compilation.yml + with: + matrix: ${{ needs.Matrix.outputs.arm_matrix }} + IWYU: + uses: ./.github/workflows/iwyu.yml + Sanitizers: + uses: ./.github/workflows/sanitizers.yml + Tests: + uses: ./.github/workflows/tests.yml + Binaries: + if: github.repository == 'official-stockfish/Stockfish' + needs: [Matrix, Prerelease, Compilation] + uses: ./.github/workflows/upload_binaries.yml + with: + matrix: ${{ needs.Matrix.outputs.matrix }} + ARM_Binaries: + if: github.repository == 'official-stockfish/Stockfish' + needs: [Matrix, Prerelease, ARMCompilation] + uses: ./.github/workflows/upload_binaries.yml + with: + matrix: ${{ needs.Matrix.outputs.arm_matrix }} diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 00000000..328c9cf9 --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,365 @@ +name: Tests +on: + workflow_call: +jobs: + Test-Targets: + name: ${{ matrix.config.name }} + runs-on: ${{ matrix.config.os }} + env: + COMPCXX: ${{ matrix.config.compiler }} + COMP: ${{ matrix.config.comp }} + CXXFLAGS: "-Werror" + strategy: + fail-fast: false + matrix: + config: + - name: Ubuntu 20.04 GCC + os: ubuntu-20.04 + compiler: g++ + comp: gcc + run_32bit_tests: true + run_64bit_tests: true + shell: bash + - name: Ubuntu 20.04 Clang + os: ubuntu-20.04 + compiler: clang++ + comp: clang + run_32bit_tests: true + run_64bit_tests: true + shell: bash + - name: Android NDK aarch64 + os: ubuntu-22.04 + compiler: aarch64-linux-android21-clang++ + comp: ndk + run_armv8_tests: true + shell: bash + - name: Android NDK arm + os: ubuntu-22.04 + compiler: armv7a-linux-androideabi21-clang++ + comp: ndk + run_armv7_tests: true + shell: bash + - name: Linux GCC riscv64 + os: ubuntu-22.04 + compiler: g++ + comp: gcc + run_riscv64_tests: true + base_image: "riscv64/alpine:edge" + platform: linux/riscv64 + shell: bash + - name: Linux GCC ppc64 + os: ubuntu-22.04 + compiler: g++ + comp: gcc + run_ppc64_tests: true + base_image: "ppc64le/alpine:latest" + platform: linux/ppc64le + shell: bash + - name: MacOS 13 Apple Clang + os: macos-13 + compiler: clang++ + comp: clang + run_64bit_tests: true + shell: bash + - name: MacOS 14 Apple Clang M1 + os: macos-14 + compiler: clang++ + comp: clang + run_64bit_tests: false + run_m1_tests: true + shell: bash + - name: MacOS 13 GCC 11 + os: macos-13 + compiler: g++-11 + comp: gcc + run_64bit_tests: true + shell: bash + - name: Windows 2022 Mingw-w64 GCC x86_64 + os: windows-2022 + compiler: g++ + comp: mingw + run_64bit_tests: true + msys_sys: mingw64 + msys_env: x86_64-gcc + shell: msys2 {0} + - name: Windows 2022 Mingw-w64 GCC i686 + os: windows-2022 + compiler: g++ + comp: mingw + run_32bit_tests: true + msys_sys: mingw32 + msys_env: i686-gcc + shell: msys2 {0} + - name: Windows 2022 Mingw-w64 Clang x86_64 + os: windows-2022 + compiler: clang++ + comp: clang + run_64bit_tests: true + msys_sys: clang64 + msys_env: clang-x86_64-clang + shell: msys2 {0} + defaults: + run: + working-directory: src + shell: ${{ matrix.config.shell }} + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Download required linux packages + if: runner.os == 'Linux' + run: | + sudo apt update + sudo apt install expect valgrind g++-multilib qemu-user-static + + - name: Install NDK + if: runner.os == 'Linux' + run: | + if [ $COMP == ndk ]; then + NDKV="21.4.7075529" + ANDROID_ROOT=/usr/local/lib/android + ANDROID_SDK_ROOT=$ANDROID_ROOT/sdk + SDKMANAGER=$ANDROID_SDK_ROOT/cmdline-tools/latest/bin/sdkmanager + echo "y" | $SDKMANAGER "ndk;$NDKV" + ANDROID_NDK_ROOT=$ANDROID_SDK_ROOT/ndk/$NDKV + ANDROID_NDK_BIN=$ANDROID_NDK_ROOT/toolchains/llvm/prebuilt/linux-x86_64/bin + echo "ANDROID_NDK_BIN=$ANDROID_NDK_BIN" >> $GITHUB_ENV + fi + + - name: Set up QEMU + if: matrix.config.base_image + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + if: matrix.config.base_image + uses: docker/setup-buildx-action@v3 + + - name: Build Docker container + if: matrix.config.base_image + run: | + docker buildx build --load -t sf_builder - << EOF + FROM ${{ matrix.config.base_image }} + WORKDIR /app + RUN apk update && apk add make g++ + CMD ["sh", "script.sh"] + EOF + + - name: Download required macOS packages + if: runner.os == 'macOS' + run: brew install coreutils + + - name: Setup msys and install required packages + if: runner.os == 'Windows' + uses: msys2/setup-msys2@v2 + with: + msystem: ${{ matrix.config.msys_sys }} + install: mingw-w64-${{ matrix.config.msys_env }} make git expect + + - name: Download the used network from the fishtest framework + run: make net + + - name: Extract the bench number from the commit history + run: | + for hash in $(git rev-list -100 HEAD); do + benchref=$(git show -s $hash | tac | grep -m 1 -o -x '[[:space:]]*\b[Bb]ench[ :]\+[1-9][0-9]\{5,7\}\b[[:space:]]*' | sed 's/[^0-9]//g') && break || true + done + [[ -n "$benchref" ]] && echo "benchref=$benchref" >> $GITHUB_ENV && echo "From commit: $hash" && echo "Reference bench: $benchref" || echo "No bench found" + + - name: Check compiler + run: | + if [ -z "${{ matrix.config.base_image }}" ]; then + if [ $COMP == ndk ]; then + export PATH=${{ env.ANDROID_NDK_BIN }}:$PATH + fi + $COMPCXX -v + else + echo "$COMPCXX -v" > script.sh + docker run --rm --platform ${{ matrix.config.platform }} -v ${{ github.workspace }}/src:/app sf_builder + fi + + - name: Test help target + run: make help + + - name: Check git + run: git --version + + # x86-32 tests + + - name: Test debug x86-32 build + if: matrix.config.run_32bit_tests + run: | + export CXXFLAGS="-Werror -D_GLIBCXX_DEBUG" + make clean + make -j4 ARCH=x86-32 optimize=no debug=yes build + ../tests/signature.sh $benchref + + - name: Test x86-32 build + if: matrix.config.run_32bit_tests + run: | + make clean + make -j4 ARCH=x86-32 build + ../tests/signature.sh $benchref + + - name: Test x86-32-sse41-popcnt build + if: matrix.config.run_32bit_tests + run: | + make clean + make -j4 ARCH=x86-32-sse41-popcnt build + ../tests/signature.sh $benchref + + - name: Test x86-32-sse2 build + if: matrix.config.run_32bit_tests + run: | + make clean + make -j4 ARCH=x86-32-sse2 build + ../tests/signature.sh $benchref + + - name: Test general-32 build + if: matrix.config.run_32bit_tests + run: | + make clean + make -j4 ARCH=general-32 build + ../tests/signature.sh $benchref + + # x86-64 tests + + - name: Test debug x86-64-avx2 build + if: matrix.config.run_64bit_tests + run: | + export CXXFLAGS="-Werror -D_GLIBCXX_DEBUG" + make clean + make -j4 ARCH=x86-64-avx2 optimize=no debug=yes build + ../tests/signature.sh $benchref + + - name: Test x86-64-bmi2 build + if: matrix.config.run_64bit_tests + run: | + make clean + make -j4 ARCH=x86-64-bmi2 build + ../tests/signature.sh $benchref + + - name: Test x86-64-avx2 build + if: matrix.config.run_64bit_tests + run: | + make clean + make -j4 ARCH=x86-64-avx2 build + ../tests/signature.sh $benchref + + # Test a deprecated arch + - name: Test x86-64-modern build + if: matrix.config.run_64bit_tests + run: | + make clean + make -j4 ARCH=x86-64-modern build + ../tests/signature.sh $benchref + + - name: Test x86-64-sse41-popcnt build + if: matrix.config.run_64bit_tests + run: | + make clean + make -j4 ARCH=x86-64-sse41-popcnt build + ../tests/signature.sh $benchref + + - name: Test x86-64-ssse3 build + if: matrix.config.run_64bit_tests + run: | + make clean + make -j4 ARCH=x86-64-ssse3 build + ../tests/signature.sh $benchref + + - name: Test x86-64-sse3-popcnt build + if: matrix.config.run_64bit_tests + run: | + make clean + make -j4 ARCH=x86-64-sse3-popcnt build + ../tests/signature.sh $benchref + + - name: Test x86-64 build + if: matrix.config.run_64bit_tests + run: | + make clean + make -j4 ARCH=x86-64 build + ../tests/signature.sh $benchref + + - name: Test general-64 build + if: matrix.config.run_64bit_tests + run: | + make clean + make -j4 ARCH=general-64 build + ../tests/signature.sh $benchref + + - name: Test apple-silicon build + if: matrix.config.run_m1_tests + run: | + make clean + make -j4 ARCH=apple-silicon build + ../tests/signature.sh $benchref + + # armv8 tests + + - name: Test armv8 build + if: matrix.config.run_armv8_tests + run: | + export PATH=${{ env.ANDROID_NDK_BIN }}:$PATH + export LDFLAGS="-static -Wno-unused-command-line-argument" + make clean + make -j4 ARCH=armv8 build + ../tests/signature.sh $benchref + + - name: Test armv8-dotprod build + if: matrix.config.run_armv8_tests + run: | + export PATH=${{ env.ANDROID_NDK_BIN }}:$PATH + export LDFLAGS="-static -Wno-unused-command-line-argument" + make clean + make -j4 ARCH=armv8-dotprod build + ../tests/signature.sh $benchref + + # armv7 tests + + - name: Test armv7 build + if: matrix.config.run_armv7_tests + run: | + export PATH=${{ env.ANDROID_NDK_BIN }}:$PATH + export LDFLAGS="-static -Wno-unused-command-line-argument" + make clean + make -j4 ARCH=armv7 build + ../tests/signature.sh $benchref + + - name: Test armv7-neon build + if: matrix.config.run_armv7_tests + run: | + export PATH=${{ env.ANDROID_NDK_BIN }}:$PATH + export LDFLAGS="-static -Wno-unused-command-line-argument" + make clean + make -j4 ARCH=armv7-neon build + ../tests/signature.sh $benchref + + # riscv64 tests + + - name: Test riscv64 build + if: matrix.config.run_riscv64_tests + run: | + echo "export LDFLAGS='-static' && make clean && make -j4 ARCH=riscv64 build" > script.sh + docker run --rm --platform ${{ matrix.config.platform }} -v ${{ github.workspace }}/src:/app sf_builder + ../tests/signature.sh $benchref + + # ppc64 tests + + - name: Test ppc64 build + if: matrix.config.run_ppc64_tests + run: | + echo "export LDFLAGS='-static' && make clean && make -j4 ARCH=ppc-64 build" > script.sh + docker run --rm --platform ${{ matrix.config.platform }} -v ${{ github.workspace }}/src:/app sf_builder + ../tests/signature.sh $benchref + + # Other tests + + - name: Check perft and search reproducibility + if: matrix.config.run_64bit_tests + run: | + make clean + make -j4 ARCH=x86-64-avx2 build + ../tests/perft.sh + ../tests/reprosearch.sh diff --git a/.github/workflows/upload_binaries.yml b/.github/workflows/upload_binaries.yml new file mode 100644 index 00000000..acf91a8f --- /dev/null +++ b/.github/workflows/upload_binaries.yml @@ -0,0 +1,106 @@ +name: Upload Binaries +on: + workflow_call: + inputs: + matrix: + type: string + required: true + +jobs: + Artifacts: + name: ${{ matrix.config.name }} ${{ matrix.binaries }} + runs-on: ${{ matrix.config.os }} + env: + COMPCXX: ${{ matrix.config.compiler }} + COMP: ${{ matrix.config.comp }} + EXT: ${{ matrix.config.ext }} + NAME: ${{ matrix.config.simple_name }} + BINARY: ${{ matrix.binaries }} + SDE: ${{ matrix.config.sde }} + strategy: + fail-fast: false + matrix: ${{ fromJson(inputs.matrix) }} + defaults: + run: + shell: ${{ matrix.config.shell }} + steps: + - uses: actions/checkout@v4 + + - name: Download artifact from compilation + uses: actions/download-artifact@v4 + with: + name: ${{ matrix.config.simple_name }} ${{ matrix.binaries }} + path: ${{ matrix.config.simple_name }} ${{ matrix.binaries }} + + - name: Setup msys and install required packages + if: runner.os == 'Windows' + uses: msys2/setup-msys2@v2 + with: + msystem: ${{ matrix.config.msys_sys }} + install: mingw-w64-${{ matrix.config.msys_env }} make git zip + + - name: Create Package + run: | + mkdir stockfish + + - name: Download wiki + run: | + git clone https://github.com/official-stockfish/Stockfish.wiki.git wiki + rm -rf wiki/.git + mv wiki stockfish/ + + - name: Copy files + run: | + mv "${{ matrix.config.simple_name }} ${{ matrix.binaries }}" stockfish-workflow + cd stockfish-workflow + cp -r src ../stockfish/ + cp stockfish-$NAME-$BINARY$EXT ../stockfish/ + cp "Top CPU Contributors.txt" ../stockfish/ + cp Copying.txt ../stockfish/ + cp AUTHORS ../stockfish/ + cp CITATION.cff ../stockfish/ + cp README.md ../stockfish/ + cp CONTRIBUTING.md ../stockfish/ + + - name: Create tar + if: runner.os != 'Windows' + run: | + chmod +x ./stockfish/stockfish-$NAME-$BINARY$EXT + tar -cvf stockfish-$NAME-$BINARY.tar stockfish + + - name: Create zip + if: runner.os == 'Windows' + run: | + zip -r stockfish-$NAME-$BINARY.zip stockfish + + - name: Release + if: startsWith(github.ref_name, 'sf_') && github.ref_type == 'tag' + uses: softprops/action-gh-release@4634c16e79c963813287e889244c50009e7f0981 + with: + files: stockfish-${{ matrix.config.simple_name }}-${{ matrix.binaries }}.${{ matrix.config.archive_ext }} + + - name: Get last commit sha + id: last_commit + run: echo "COMMIT_SHA=$(git rev-parse HEAD | cut -c 1-8)" >> $GITHUB_ENV + + - name: Get commit date + id: commit_date + run: echo "COMMIT_DATE=$(git show -s --date=format:'%Y%m%d' --format=%cd HEAD)" >> $GITHUB_ENV + + # Make sure that an old ci that still runs on master doesn't recreate a prerelease + - name: Check Pullable Commits + id: check_commits + run: | + git fetch + CHANGES=$(git rev-list HEAD..origin/master --count) + echo "CHANGES=$CHANGES" >> $GITHUB_ENV + + - name: Prerelease + if: github.ref_name == 'master' && env.CHANGES == '0' + continue-on-error: true + uses: softprops/action-gh-release@4634c16e79c963813287e889244c50009e7f0981 + with: + name: Stockfish dev-${{ env.COMMIT_DATE }}-${{ env.COMMIT_SHA }} + tag_name: stockfish-dev-${{ env.COMMIT_DATE }}-${{ env.COMMIT_SHA }} + prerelease: true + files: stockfish-${{ matrix.config.simple_name }}-${{ matrix.binaries }}.${{ matrix.config.archive_ext }} diff --git a/AUTHORS b/AUTHORS index f49c1db0..36b2b6f7 100644 --- a/AUTHORS +++ b/AUTHORS @@ -1,19 +1,18 @@ -# List of authors for Stockfish - -# Founders of the Stockfish project and fishtest infrastructure +# Founders of the Stockfish project and Fishtest infrastructure Tord Romstad (romstad) Marco Costalba (mcostalba) Joona Kiiski (zamar) Gary Linscott (glinscott) -# Authors and inventors of NNUE, training, NNUE port +# Authors and inventors of NNUE, training, and NNUE port Yu Nasu (ynasu87) Motohiro Isozaki (yaneurao) Hisayori Noda (nodchip) -# all other authors of the code in alphabetical order +# All other authors of Stockfish code (in alphabetical order) Aditya (absimaldata) Adrian Petrescu (apetresc) +Ahmed Kerimov (wcdbmv) Ajith Chandy Jose (ajithcj) Alain Savard (Rocky640) Alayan Feh (Alayan-stk-2) @@ -21,6 +20,7 @@ Alexander Kure Alexander Pagel (Lolligerhans) Alfredo Menezes (lonfom169) Ali AlZhrani (Cooffe) +Andreas Matthies (Matthies) Andrei Vetrov (proukornew) Andrew Grant (AndyGrant) Andrey Neporada (nepal) @@ -30,22 +30,33 @@ Aram Tumanian (atumanian) Arjun Temurnikar Artem Solopiy (EntityFX) Auguste Pop +Balazs Szilagyi Balint Pfliegel +Ben Chaney (Chaneybenjamini) Ben Koshy (BKSpurgeon) Bill Henry (VoyagerOne) Bojun Guo (noobpwnftw, Nooby) +borg323 +Boštjan Mejak (PedanticHacker) braich Brian Sheppard (SapphireBrand, briansheppard-toast) Bruno de Melo Costa (BM123499) +Bruno Pellanda (pellanda) Bryan Cross (crossbr) candirufish Chess13234 Chris Cain (ceebo) +Ciekce +clefrks +Clemens L. (rn5f107s2) +Cody Ho (aesrentai) Dale Weiler (graphitemaster) -Dan Schmidt (dfannius) Daniel Axtens (daxtens) Daniel Dugovic (ddugovic) +Daniel Monroe (Ergodice) +Dan Schmidt (dfannius) Dariusz Orzechowski (dorzechowski) +David (dav1312) David Zar Daylen Yang (daylen) Deshawn Mohan-Smith (GoldenRare) @@ -54,55 +65,62 @@ DiscanX Dominik Schlösser (domschl) double-beep Douglas Matos Gomes (dsmsgms) +Dubslow Eduardo Cáceres (eduherminio) Eelco de Groot (KingDefender) Elvin Liu (solarlight2) erbsenzaehler Ernesto Gatti -Linmiao Xu (linrock) Fabian Beuke (madnight) Fabian Fichter (ianfab) Fanael Linithien (Fanael) fanon -Fauzi Akram Dabat (FauziAkram) +Fauzi Akram Dabat (fauzi2) Felix Wittmann gamander +Gabriele Lombardo (gabe) +Gahtan Nahdi Gary Heckman (gheckman) George Sobala (gsobala) gguliash Giacomo Lorenzetti (G-Lorenz) Gian-Carlo Pascutto (gcp) +Goh CJ (cj5716) Gontran Lemaire (gonlem) Goodkov Vasiliy Aleksandrovich (goodkov) Gregor Cramer GuardianRM -Günther Demetz (pb00067, pb00068) Guy Vreuls (gvreuls) +Günther Demetz (pb00067, pb00068) Henri Wiechers Hiraoka Takuya (HiraokaTakuya) homoSapiensSapiens Hongzhi Cheng Ivan Ivec (IIvec) Jacques B. (Timshel) +Jake Senne (w1wwwwww) Jan Ondruš (hxim) -Jared Kish (Kurtbusch) +Jared Kish (Kurtbusch, kurt22i) Jarrod Torriero (DU-jdto) -Jean Gauthier (OuaisBla) +Jasper Shovelton (Beanie496) Jean-Francois Romang (jromang) +Jean Gauthier (OuaisBla) Jekaa Jerry Donald Watson (jerrydonaldwatson) jjoshua2 -Jonathan Calovski (Mysseno) Jonathan Buladas Dumale (SFisGOD) +Jonathan Calovski (Mysseno) +Jonathan McDermid (jonathanmcdermid) Joost VandeVondele (vondele) -Jörg Oster (joergoster) Joseph Ellis (jhellis3) Joseph R. Prostko +Jörg Oster (joergoster) Julian Willemer (NightlyKing) jundery Justin Blanchard (UncombedCoconut) Kelly Wilson Ken Takusagawa +Kian E (KJE-98) kinderchocolate Kiran Panditrao (Krgp) Kojirion @@ -110,6 +128,7 @@ Krystian Kuzniarek (kuzkry) Leonardo Ljubičić (ICCF World Champion) Leonid Pechenik (lp--) Liam Keegan (lkeegan) +Linmiao Xu (linrock) Linus Arver (listx) loco-loco Lub van den Berg (ElbertoOne) @@ -123,6 +142,8 @@ marotear Matt Ginsberg (mattginsberg) Matthew Lai (matthewlai) Matthew Sullivan (Matt14916) +Max A. (Disservin) +Maxim Masiutin (maximmasiutin) Maxim Molchanov (Maxim) Michael An (man) Michael Byrne (MichaelB7) @@ -137,34 +158,40 @@ Mira Miroslav Fontán (Hexik) Moez Jellouli (MJZ1977) Mohammed Li (tthsqe12) +Muzhen J (XInTheDark) Nathan Rugg (nmrugg) -Nick Pelling (nickpelling) +Nguyen Pham (nguyenpham) Nicklas Persson (NicklasPersson) +Nick Pelling (nickpelling) Niklas Fiekas (niklasf) Nikolay Kostov (NikolayIT) -Nguyen Pham (nguyenpham) Norman Schmidt (FireFather) notruck Ofek Shochat (OfekShochat, ghostway) Ondrej Mosnáček (WOnder93) +Ondřej Mišina (AndrovT) Oskar Werkelin Ahlin Pablo Vazquez Panthee Pascal Romaret Pasquale Pigazzini (ppigazzini) Patrick Jansen (mibere) -pellanda +Peter Schneider (pschneider1968) Peter Zsifkovits (CoffeeOne) +PikaCat Praveen Kumar Tummala (praveentml) +Prokop Randáček (ProkopRandacek) Rahul Dsilva (silversolver1) Ralph Stößer (Ralph Stoesser) Raminder Singh renouve -Reuven Peleg -Richard Lloyd +Reuven Peleg (R-Peleg) +Richard Lloyd (Richard-Lloyd) +Robert Nürnberg (robertnurnberg) Rodrigo Exterckötter Tjäder -Ron Britvich (Britvich) +Rodrigo Roim (roim) Ronald de Man (syzygy1, syzygy) +Ron Britvich (Britvich) rqs Rui Coelho (ruicoelhopedro) Ryan Schmitt @@ -175,20 +202,28 @@ Sergei Antonov (saproj) Sergei Ivanov (svivanov72) Sergio Vieri (sergiovieri) sf-x +Shahin M. Shahin (peregrine) Shane Booth (shane31) Shawn Varghese (xXH4CKST3RXx) +Shawn Xu (xu-shawn) Siad Daboul (Topologist) Stefan Geschwentner (locutus2) Stefano Cardanobile (Stefano80) +Stefano Di Martino (StefanoD) Steinar Gunderson (sesse) Stéphane Nicolet (snicolet) -Prokop Randáček (ProkopRandacek) +Stephen Touset (stouset) +Syine Mineta (MinetaS) +Taras Vuk (TarasVuk) Thanar2 thaspel theo77186 +TierynnB +Ting-Hsuan Huang (fffelix-huang) +Tobias Steinmann +Tomasz Sobczyk (Sopel97) Tom Truscott Tom Vijlbrief (tomtor) -Tomasz Sobczyk (Sopel97) Torsten Franz (torfranz, tfranzer) Torsten Hellwig (Torom) Tracey Emery (basepr1me) @@ -196,11 +231,13 @@ tttak Unai Corzo (unaiic) Uri Blass (uriblass) Vince Negri (cuddlestmonkey) +Viren +windfishballad xefoci7612 +Xiang Wang (KatyushaScarlet) zz4032 - # Additionally, we acknowledge the authors and maintainers of fishtest, -# an amazing and essential framework for the development of Stockfish! +# an amazing and essential framework for Stockfish development! # -# https://github.com/glinscott/fishtest/blob/master/AUTHORS +# https://github.com/official-stockfish/fishtest/blob/master/AUTHORS diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 00000000..bc0889a8 --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,23 @@ +# This CITATION.cff file was generated with cffinit. +# Visit https://bit.ly/cffinit to generate yours today! + +cff-version: 1.2.0 +title: Stockfish +message: >- + Please cite this software using the metadata from this + file. +type: software +authors: + - name: The Stockfish developers (see AUTHORS file) +repository-code: 'https://github.com/official-stockfish/Stockfish' +url: 'https://stockfishchess.org/' +repository-artifact: 'https://stockfishchess.org/download/' +abstract: Stockfish is a free and strong UCI chess engine. +keywords: + - chess + - artificial intelligence (AI) + - tree search + - alpha-beta search + - neural networks (NN) + - efficiently updatable neural networks (NNUE) +license: GPL-3.0 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 00000000..cf9cecda --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,97 @@ +# Contributing to Stockfish + +Welcome to the Stockfish project! We are excited that you are interested in +contributing. This document outlines the guidelines and steps to follow when +making contributions to Stockfish. + +## Table of Contents + +- [Building Stockfish](#building-stockfish) +- [Making Contributions](#making-contributions) + - [Reporting Issues](#reporting-issues) + - [Submitting Pull Requests](#submitting-pull-requests) +- [Code Style](#code-style) +- [Community and Communication](#community-and-communication) +- [License](#license) + +## Building Stockfish + +In case you do not have a C++ compiler installed, you can follow the +instructions from our wiki. + +- [Ubuntu][ubuntu-compiling-link] +- [Windows][windows-compiling-link] +- [macOS][macos-compiling-link] + +## Making Contributions + +### Reporting Issues + +If you find a bug, please open an issue on the +[issue tracker][issue-tracker-link]. Be sure to include relevant information +like your operating system, build environment, and a detailed description of the +problem. + +_Please note that Stockfish's development is not focused on adding new features. +Thus any issue regarding missing features will potentially be closed without +further discussion._ + +### Submitting Pull Requests + +- Functional changes need to be tested on fishtest. See + [Creating my First Test][creating-my-first-test] for more details. + The accompanying pull request should include a link to the test results and + the new bench. + +- Non-functional changes (e.g. refactoring, code style, documentation) do not + need to be tested on fishtest, unless they might impact performance. + +- Provide a clear and concise description of the changes in the pull request + description. + +_First time contributors should add their name to [AUTHORS](../AUTHORS)._ + +_Stockfish's development is not focused on adding new features. Thus any pull +request introducing new features will potentially be closed without further +discussion._ + +## Code Style + +Changes to Stockfish C++ code should respect our coding style defined by +[.clang-format](.clang-format). You can format your changes by running +`make format`. This requires clang-format version 17 to be installed on your system. + +## Navigate + +For experienced Git users who frequently use git blame, it is recommended to +configure the blame.ignoreRevsFile setting. +This setting is useful for excluding noisy formatting commits. + +```bash +git config blame.ignoreRevsFile .git-blame-ignore-revs +``` + +## Community and Communication + +- Join the [Stockfish discord][discord-link] to discuss ideas, issues, and + development. +- Participate in the [Stockfish GitHub discussions][discussions-link] for + broader conversations. + +## License + +By contributing to Stockfish, you agree that your contributions will be licensed +under the GNU General Public License v3.0. See [Copying.txt][copying-link] for +more details. + +Thank you for contributing to Stockfish and helping us make it even better! + + +[copying-link]: https://github.com/official-stockfish/Stockfish/blob/master/Copying.txt +[discord-link]: https://discord.gg/GWDRS3kU6R +[discussions-link]: https://github.com/official-stockfish/Stockfish/discussions/new +[creating-my-first-test]: https://github.com/official-stockfish/fishtest/wiki/Creating-my-first-test#create-your-test +[issue-tracker-link]: https://github.com/official-stockfish/Stockfish/issues +[ubuntu-compiling-link]: https://github.com/official-stockfish/Stockfish/wiki/Developers#user-content-installing-a-compiler-1 +[windows-compiling-link]: https://github.com/official-stockfish/Stockfish/wiki/Developers#user-content-installing-a-compiler +[macos-compiling-link]: https://github.com/official-stockfish/Stockfish/wiki/Developers#user-content-installing-a-compiler-2 diff --git a/Copying.txt b/Copying.txt index 818433ec..f288702d 100644 --- a/Copying.txt +++ b/Copying.txt @@ -1,674 +1,674 @@ - GNU GENERAL PUBLIC LICENSE - Version 3, 29 June 2007 - - Copyright (C) 2007 Free Software Foundation, Inc. - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The GNU General Public License is a free, copyleft license for -software and other kinds of works. - - The licenses for most software and other practical works are designed -to take away your freedom to share and change the works. By contrast, -the GNU General Public License is intended to guarantee your freedom to -share and change all versions of a program--to make sure it remains free -software for all its users. We, the Free Software Foundation, use the -GNU General Public License for most of our software; it applies also to -any other work released this way by its authors. You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -them if you wish), that you receive source code or can get it if you -want it, that you can change the software or use pieces of it in new -free programs, and that you know you can do these things. - - To protect your rights, we need to prevent others from denying you -these rights or asking you to surrender the rights. Therefore, you have -certain responsibilities if you distribute copies of the software, or if -you modify it: responsibilities to respect the freedom of others. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must pass on to the recipients the same -freedoms that you received. You must make sure that they, too, receive -or can get the source code. And you must show them these terms so they -know their rights. - - Developers that use the GNU GPL protect your rights with two steps: -(1) assert copyright on the software, and (2) offer you this License -giving you legal permission to copy, distribute and/or modify it. - - For the developers' and authors' protection, the GPL clearly explains -that there is no warranty for this free software. For both users' and -authors' sake, the GPL requires that modified versions be marked as -changed, so that their problems will not be attributed erroneously to -authors of previous versions. - - Some devices are designed to deny users access to install or run -modified versions of the software inside them, although the manufacturer -can do so. This is fundamentally incompatible with the aim of -protecting users' freedom to change the software. The systematic -pattern of such abuse occurs in the area of products for individuals to -use, which is precisely where it is most unacceptable. Therefore, we -have designed this version of the GPL to prohibit the practice for those -products. If such problems arise substantially in other domains, we -stand ready to extend this provision to those domains in future versions -of the GPL, as needed to protect the freedom of users. - - Finally, every program is threatened constantly by software patents. -States should not allow patents to restrict development and use of -software on general-purpose computers, but in those that do, we wish to -avoid the special danger that patents applied to a free program could -make it effectively proprietary. To prevent this, the GPL assures that -patents cannot be used to render the program non-free. - - The precise terms and conditions for copying, distribution and -modification follow. - - TERMS AND CONDITIONS - - 0. Definitions. - - "This License" refers to version 3 of the GNU General Public License. - - "Copyright" also means copyright-like laws that apply to other kinds of -works, such as semiconductor masks. - - "The Program" refers to any copyrightable work licensed under this -License. Each licensee is addressed as "you". "Licensees" and -"recipients" may be individuals or organizations. - - To "modify" a work means to copy from or adapt all or part of the work -in a fashion requiring copyright permission, other than the making of an -exact copy. The resulting work is called a "modified version" of the -earlier work or a work "based on" the earlier work. - - A "covered work" means either the unmodified Program or a work based -on the Program. - - To "propagate" a work means to do anything with it that, without -permission, would make you directly or secondarily liable for -infringement under applicable copyright law, except executing it on a -computer or modifying a private copy. Propagation includes copying, -distribution (with or without modification), making available to the -public, and in some countries other activities as well. - - To "convey" a work means any kind of propagation that enables other -parties to make or receive copies. Mere interaction with a user through -a computer network, with no transfer of a copy, is not conveying. - - An interactive user interface displays "Appropriate Legal Notices" -to the extent that it includes a convenient and prominently visible -feature that (1) displays an appropriate copyright notice, and (2) -tells the user that there is no warranty for the work (except to the -extent that warranties are provided), that licensees may convey the -work under this License, and how to view a copy of this License. If -the interface presents a list of user commands or options, such as a -menu, a prominent item in the list meets this criterion. - - 1. Source Code. - - The "source code" for a work means the preferred form of the work -for making modifications to it. "Object code" means any non-source -form of a work. - - A "Standard Interface" means an interface that either is an official -standard defined by a recognized standards body, or, in the case of -interfaces specified for a particular programming language, one that -is widely used among developers working in that language. - - The "System Libraries" of an executable work include anything, other -than the work as a whole, that (a) is included in the normal form of -packaging a Major Component, but which is not part of that Major -Component, and (b) serves only to enable use of the work with that -Major Component, or to implement a Standard Interface for which an -implementation is available to the public in source code form. A -"Major Component", in this context, means a major essential component -(kernel, window system, and so on) of the specific operating system -(if any) on which the executable work runs, or a compiler used to -produce the work, or an object code interpreter used to run it. - - The "Corresponding Source" for a work in object code form means all -the source code needed to generate, install, and (for an executable -work) run the object code and to modify the work, including scripts to -control those activities. However, it does not include the work's -System Libraries, or general-purpose tools or generally available free -programs which are used unmodified in performing those activities but -which are not part of the work. For example, Corresponding Source -includes interface definition files associated with source files for -the work, and the source code for shared libraries and dynamically -linked subprograms that the work is specifically designed to require, -such as by intimate data communication or control flow between those -subprograms and other parts of the work. - - The Corresponding Source need not include anything that users -can regenerate automatically from other parts of the Corresponding -Source. - - The Corresponding Source for a work in source code form is that -same work. - - 2. Basic Permissions. - - All rights granted under this License are granted for the term of -copyright on the Program, and are irrevocable provided the stated -conditions are met. This License explicitly affirms your unlimited -permission to run the unmodified Program. The output from running a -covered work is covered by this License only if the output, given its -content, constitutes a covered work. This License acknowledges your -rights of fair use or other equivalent, as provided by copyright law. - - You may make, run and propagate covered works that you do not -convey, without conditions so long as your license otherwise remains -in force. You may convey covered works to others for the sole purpose -of having them make modifications exclusively for you, or provide you -with facilities for running those works, provided that you comply with -the terms of this License in conveying all material for which you do -not control copyright. Those thus making or running the covered works -for you must do so exclusively on your behalf, under your direction -and control, on terms that prohibit them from making any copies of -your copyrighted material outside their relationship with you. - - Conveying under any other circumstances is permitted solely under -the conditions stated below. Sublicensing is not allowed; section 10 -makes it unnecessary. - - 3. Protecting Users' Legal Rights From Anti-Circumvention Law. - - No covered work shall be deemed part of an effective technological -measure under any applicable law fulfilling obligations under article -11 of the WIPO copyright treaty adopted on 20 December 1996, or -similar laws prohibiting or restricting circumvention of such -measures. - - When you convey a covered work, you waive any legal power to forbid -circumvention of technological measures to the extent such circumvention -is effected by exercising rights under this License with respect to -the covered work, and you disclaim any intention to limit operation or -modification of the work as a means of enforcing, against the work's -users, your or third parties' legal rights to forbid circumvention of -technological measures. - - 4. Conveying Verbatim Copies. - - You may convey verbatim copies of the Program's source code as you -receive it, in any medium, provided that you conspicuously and -appropriately publish on each copy an appropriate copyright notice; -keep intact all notices stating that this License and any -non-permissive terms added in accord with section 7 apply to the code; -keep intact all notices of the absence of any warranty; and give all -recipients a copy of this License along with the Program. - - You may charge any price or no price for each copy that you convey, -and you may offer support or warranty protection for a fee. - - 5. Conveying Modified Source Versions. - - You may convey a work based on the Program, or the modifications to -produce it from the Program, in the form of source code under the -terms of section 4, provided that you also meet all of these conditions: - - a) The work must carry prominent notices stating that you modified - it, and giving a relevant date. - - b) The work must carry prominent notices stating that it is - released under this License and any conditions added under section - 7. This requirement modifies the requirement in section 4 to - "keep intact all notices". - - c) You must license the entire work, as a whole, under this - License to anyone who comes into possession of a copy. This - License will therefore apply, along with any applicable section 7 - additional terms, to the whole of the work, and all its parts, - regardless of how they are packaged. This License gives no - permission to license the work in any other way, but it does not - invalidate such permission if you have separately received it. - - d) If the work has interactive user interfaces, each must display - Appropriate Legal Notices; however, if the Program has interactive - interfaces that do not display Appropriate Legal Notices, your - work need not make them do so. - - A compilation of a covered work with other separate and independent -works, which are not by their nature extensions of the covered work, -and which are not combined with it such as to form a larger program, -in or on a volume of a storage or distribution medium, is called an -"aggregate" if the compilation and its resulting copyright are not -used to limit the access or legal rights of the compilation's users -beyond what the individual works permit. Inclusion of a covered work -in an aggregate does not cause this License to apply to the other -parts of the aggregate. - - 6. Conveying Non-Source Forms. - - You may convey a covered work in object code form under the terms -of sections 4 and 5, provided that you also convey the -machine-readable Corresponding Source under the terms of this License, -in one of these ways: - - a) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by the - Corresponding Source fixed on a durable physical medium - customarily used for software interchange. - - b) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by a - written offer, valid for at least three years and valid for as - long as you offer spare parts or customer support for that product - model, to give anyone who possesses the object code either (1) a - copy of the Corresponding Source for all the software in the - product that is covered by this License, on a durable physical - medium customarily used for software interchange, for a price no - more than your reasonable cost of physically performing this - conveying of source, or (2) access to copy the - Corresponding Source from a network server at no charge. - - c) Convey individual copies of the object code with a copy of the - written offer to provide the Corresponding Source. This - alternative is allowed only occasionally and noncommercially, and - only if you received the object code with such an offer, in accord - with subsection 6b. - - d) Convey the object code by offering access from a designated - place (gratis or for a charge), and offer equivalent access to the - Corresponding Source in the same way through the same place at no - further charge. You need not require recipients to copy the - Corresponding Source along with the object code. If the place to - copy the object code is a network server, the Corresponding Source - may be on a different server (operated by you or a third party) - that supports equivalent copying facilities, provided you maintain - clear directions next to the object code saying where to find the - Corresponding Source. Regardless of what server hosts the - Corresponding Source, you remain obligated to ensure that it is - available for as long as needed to satisfy these requirements. - - e) Convey the object code using peer-to-peer transmission, provided - you inform other peers where the object code and Corresponding - Source of the work are being offered to the general public at no - charge under subsection 6d. - - A separable portion of the object code, whose source code is excluded -from the Corresponding Source as a System Library, need not be -included in conveying the object code work. - - A "User Product" is either (1) a "consumer product", which means any -tangible personal property which is normally used for personal, family, -or household purposes, or (2) anything designed or sold for incorporation -into a dwelling. In determining whether a product is a consumer product, -doubtful cases shall be resolved in favor of coverage. For a particular -product received by a particular user, "normally used" refers to a -typical or common use of that class of product, regardless of the status -of the particular user or of the way in which the particular user -actually uses, or expects or is expected to use, the product. A product -is a consumer product regardless of whether the product has substantial -commercial, industrial or non-consumer uses, unless such uses represent -the only significant mode of use of the product. - - "Installation Information" for a User Product means any methods, -procedures, authorization keys, or other information required to install -and execute modified versions of a covered work in that User Product from -a modified version of its Corresponding Source. The information must -suffice to ensure that the continued functioning of the modified object -code is in no case prevented or interfered with solely because -modification has been made. - - If you convey an object code work under this section in, or with, or -specifically for use in, a User Product, and the conveying occurs as -part of a transaction in which the right of possession and use of the -User Product is transferred to the recipient in perpetuity or for a -fixed term (regardless of how the transaction is characterized), the -Corresponding Source conveyed under this section must be accompanied -by the Installation Information. But this requirement does not apply -if neither you nor any third party retains the ability to install -modified object code on the User Product (for example, the work has -been installed in ROM). - - The requirement to provide Installation Information does not include a -requirement to continue to provide support service, warranty, or updates -for a work that has been modified or installed by the recipient, or for -the User Product in which it has been modified or installed. Access to a -network may be denied when the modification itself materially and -adversely affects the operation of the network or violates the rules and -protocols for communication across the network. - - Corresponding Source conveyed, and Installation Information provided, -in accord with this section must be in a format that is publicly -documented (and with an implementation available to the public in -source code form), and must require no special password or key for -unpacking, reading or copying. - - 7. Additional Terms. - - "Additional permissions" are terms that supplement the terms of this -License by making exceptions from one or more of its conditions. -Additional permissions that are applicable to the entire Program shall -be treated as though they were included in this License, to the extent -that they are valid under applicable law. If additional permissions -apply only to part of the Program, that part may be used separately -under those permissions, but the entire Program remains governed by -this License without regard to the additional permissions. - - When you convey a copy of a covered work, you may at your option -remove any additional permissions from that copy, or from any part of -it. (Additional permissions may be written to require their own -removal in certain cases when you modify the work.) You may place -additional permissions on material, added by you to a covered work, -for which you have or can give appropriate copyright permission. - - Notwithstanding any other provision of this License, for material you -add to a covered work, you may (if authorized by the copyright holders of -that material) supplement the terms of this License with terms: - - a) Disclaiming warranty or limiting liability differently from the - terms of sections 15 and 16 of this License; or - - b) Requiring preservation of specified reasonable legal notices or - author attributions in that material or in the Appropriate Legal - Notices displayed by works containing it; or - - c) Prohibiting misrepresentation of the origin of that material, or - requiring that modified versions of such material be marked in - reasonable ways as different from the original version; or - - d) Limiting the use for publicity purposes of names of licensors or - authors of the material; or - - e) Declining to grant rights under trademark law for use of some - trade names, trademarks, or service marks; or - - f) Requiring indemnification of licensors and authors of that - material by anyone who conveys the material (or modified versions of - it) with contractual assumptions of liability to the recipient, for - any liability that these contractual assumptions directly impose on - those licensors and authors. - - All other non-permissive additional terms are considered "further -restrictions" within the meaning of section 10. If the Program as you -received it, or any part of it, contains a notice stating that it is -governed by this License along with a term that is a further -restriction, you may remove that term. If a license document contains -a further restriction but permits relicensing or conveying under this -License, you may add to a covered work material governed by the terms -of that license document, provided that the further restriction does -not survive such relicensing or conveying. - - If you add terms to a covered work in accord with this section, you -must place, in the relevant source files, a statement of the -additional terms that apply to those files, or a notice indicating -where to find the applicable terms. - - Additional terms, permissive or non-permissive, may be stated in the -form of a separately written license, or stated as exceptions; -the above requirements apply either way. - - 8. Termination. - - You may not propagate or modify a covered work except as expressly -provided under this License. Any attempt otherwise to propagate or -modify it is void, and will automatically terminate your rights under -this License (including any patent licenses granted under the third -paragraph of section 11). - - However, if you cease all violation of this License, then your -license from a particular copyright holder is reinstated (a) -provisionally, unless and until the copyright holder explicitly and -finally terminates your license, and (b) permanently, if the copyright -holder fails to notify you of the violation by some reasonable means -prior to 60 days after the cessation. - - Moreover, your license from a particular copyright holder is -reinstated permanently if the copyright holder notifies you of the -violation by some reasonable means, this is the first time you have -received notice of violation of this License (for any work) from that -copyright holder, and you cure the violation prior to 30 days after -your receipt of the notice. - - Termination of your rights under this section does not terminate the -licenses of parties who have received copies or rights from you under -this License. If your rights have been terminated and not permanently -reinstated, you do not qualify to receive new licenses for the same -material under section 10. - - 9. Acceptance Not Required for Having Copies. - - You are not required to accept this License in order to receive or -run a copy of the Program. Ancillary propagation of a covered work -occurring solely as a consequence of using peer-to-peer transmission -to receive a copy likewise does not require acceptance. However, -nothing other than this License grants you permission to propagate or -modify any covered work. These actions infringe copyright if you do -not accept this License. Therefore, by modifying or propagating a -covered work, you indicate your acceptance of this License to do so. - - 10. Automatic Licensing of Downstream Recipients. - - Each time you convey a covered work, the recipient automatically -receives a license from the original licensors, to run, modify and -propagate that work, subject to this License. You are not responsible -for enforcing compliance by third parties with this License. - - An "entity transaction" is a transaction transferring control of an -organization, or substantially all assets of one, or subdividing an -organization, or merging organizations. If propagation of a covered -work results from an entity transaction, each party to that -transaction who receives a copy of the work also receives whatever -licenses to the work the party's predecessor in interest had or could -give under the previous paragraph, plus a right to possession of the -Corresponding Source of the work from the predecessor in interest, if -the predecessor has it or can get it with reasonable efforts. - - You may not impose any further restrictions on the exercise of the -rights granted or affirmed under this License. For example, you may -not impose a license fee, royalty, or other charge for exercise of -rights granted under this License, and you may not initiate litigation -(including a cross-claim or counterclaim in a lawsuit) alleging that -any patent claim is infringed by making, using, selling, offering for -sale, or importing the Program or any portion of it. - - 11. Patents. - - A "contributor" is a copyright holder who authorizes use under this -License of the Program or a work on which the Program is based. The -work thus licensed is called the contributor's "contributor version". - - A contributor's "essential patent claims" are all patent claims -owned or controlled by the contributor, whether already acquired or -hereafter acquired, that would be infringed by some manner, permitted -by this License, of making, using, or selling its contributor version, -but do not include claims that would be infringed only as a -consequence of further modification of the contributor version. For -purposes of this definition, "control" includes the right to grant -patent sublicenses in a manner consistent with the requirements of -this License. - - Each contributor grants you a non-exclusive, worldwide, royalty-free -patent license under the contributor's essential patent claims, to -make, use, sell, offer for sale, import and otherwise run, modify and -propagate the contents of its contributor version. - - In the following three paragraphs, a "patent license" is any express -agreement or commitment, however denominated, not to enforce a patent -(such as an express permission to practice a patent or covenant not to -sue for patent infringement). To "grant" such a patent license to a -party means to make such an agreement or commitment not to enforce a -patent against the party. - - If you convey a covered work, knowingly relying on a patent license, -and the Corresponding Source of the work is not available for anyone -to copy, free of charge and under the terms of this License, through a -publicly available network server or other readily accessible means, -then you must either (1) cause the Corresponding Source to be so -available, or (2) arrange to deprive yourself of the benefit of the -patent license for this particular work, or (3) arrange, in a manner -consistent with the requirements of this License, to extend the patent -license to downstream recipients. "Knowingly relying" means you have -actual knowledge that, but for the patent license, your conveying the -covered work in a country, or your recipient's use of the covered work -in a country, would infringe one or more identifiable patents in that -country that you have reason to believe are valid. - - If, pursuant to or in connection with a single transaction or -arrangement, you convey, or propagate by procuring conveyance of, a -covered work, and grant a patent license to some of the parties -receiving the covered work authorizing them to use, propagate, modify -or convey a specific copy of the covered work, then the patent license -you grant is automatically extended to all recipients of the covered -work and works based on it. - - A patent license is "discriminatory" if it does not include within -the scope of its coverage, prohibits the exercise of, or is -conditioned on the non-exercise of one or more of the rights that are -specifically granted under this License. You may not convey a covered -work if you are a party to an arrangement with a third party that is -in the business of distributing software, under which you make payment -to the third party based on the extent of your activity of conveying -the work, and under which the third party grants, to any of the -parties who would receive the covered work from you, a discriminatory -patent license (a) in connection with copies of the covered work -conveyed by you (or copies made from those copies), or (b) primarily -for and in connection with specific products or compilations that -contain the covered work, unless you entered into that arrangement, -or that patent license was granted, prior to 28 March 2007. - - Nothing in this License shall be construed as excluding or limiting -any implied license or other defenses to infringement that may -otherwise be available to you under applicable patent law. - - 12. No Surrender of Others' Freedom. - - If conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot convey a -covered work so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you may -not convey it at all. For example, if you agree to terms that obligate you -to collect a royalty for further conveying from those to whom you convey -the Program, the only way you could satisfy both those terms and this -License would be to refrain entirely from conveying the Program. - - 13. Use with the GNU Affero General Public License. - - Notwithstanding any other provision of this License, you have -permission to link or combine any covered work with a work licensed -under version 3 of the GNU Affero General Public License into a single -combined work, and to convey the resulting work. The terms of this -License will continue to apply to the part which is the covered work, -but the special requirements of the GNU Affero General Public License, -section 13, concerning interaction through a network will apply to the -combination as such. - - 14. Revised Versions of this License. - - The Free Software Foundation may publish revised and/or new versions of -the GNU General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - - Each version is given a distinguishing version number. If the -Program specifies that a certain numbered version of the GNU General -Public License "or any later version" applies to it, you have the -option of following the terms and conditions either of that numbered -version or of any later version published by the Free Software -Foundation. If the Program does not specify a version number of the -GNU General Public License, you may choose any version ever published -by the Free Software Foundation. - - If the Program specifies that a proxy can decide which future -versions of the GNU General Public License can be used, that proxy's -public statement of acceptance of a version permanently authorizes you -to choose that version for the Program. - - Later license versions may give you additional or different -permissions. However, no additional obligations are imposed on any -author or copyright holder as a result of your choosing to follow a -later version. - - 15. Disclaimer of Warranty. - - THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY -APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT -HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY -OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, -THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM -IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF -ALL NECESSARY SERVICING, REPAIR OR CORRECTION. - - 16. Limitation of Liability. - - IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS -THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY -GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE -USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF -DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD -PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), -EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF -SUCH DAMAGES. - - 17. Interpretation of Sections 15 and 16. - - If the disclaimer of warranty and limitation of liability provided -above cannot be given local legal effect according to their terms, -reviewing courts shall apply local law that most closely approximates -an absolute waiver of all civil liability in connection with the -Program, unless a warranty or assumption of liability accompanies a -copy of the Program in return for a fee. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -state the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - - Copyright (C) - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . - -Also add information on how to contact you by electronic and paper mail. - - If the program does terminal interaction, make it output a short -notice like this when it starts in an interactive mode: - - Copyright (C) - This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. - -The hypothetical commands `show w' and `show c' should show the appropriate -parts of the General Public License. Of course, your program's commands -might be different; for a GUI interface, you would use an "about box". - - You should also get your employer (if you work as a programmer) or school, -if any, to sign a "copyright disclaimer" for the program, if necessary. -For more information on this, and how to apply and follow the GNU GPL, see -. - - The GNU General Public License does not permit incorporating your program -into proprietary programs. If your program is a subroutine library, you -may consider it more useful to permit linking proprietary applications with -the library. If this is what you want to do, use the GNU Lesser General -Public License instead of this License. But first, please read -. + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. diff --git a/README.md b/README.md index f9054f4d..1530264b 100644 --- a/README.md +++ b/README.md @@ -1,333 +1,156 @@ Modified stockfish to play the worst move. Play against the bot at https://lichess.org/@/KateFish +
+ + [![Stockfish][stockfish128-logo]][website-link] + +

Stockfish

+ + A free and strong UCI chess engine. +
+ [Explore Stockfish docs »][wiki-link] +
+
+ [Report bug][issue-link] + · + [Open a discussion][discussions-link] + · + [Discord][discord-link] + · + [Blog][website-blog-link] + + [![Build][build-badge]][build-link] + [![License][license-badge]][license-link] +
+ [![Release][release-badge]][release-link] + [![Commits][commits-badge]][commits-link] +
+ [![Website][website-badge]][website-link] + [![Fishtest][fishtest-badge]][fishtest-link] + [![Discord][discord-badge]][discord-link] + +
+ ## Overview -[![Build Status](https://github.com/official-stockfish/Stockfish/actions/workflows/stockfish.yml/badge.svg)](https://github.com/official-stockfish/Stockfish/actions) -[![Build Status](https://ci.appveyor.com/api/projects/status/github/official-stockfish/Stockfish?branch=master&svg=true)](https://ci.appveyor.com/project/mcostalba/stockfish/branch/master) +[Stockfish][website-link] is a **free and strong UCI chess engine** derived from +Glaurung 2.1 that analyzes chess positions and computes the optimal moves. -[Stockfish](https://stockfishchess.org) is a free, powerful UCI chess engine -derived from Glaurung 2.1. Stockfish is not a complete chess program and requires a -UCI-compatible graphical user interface (GUI) (e.g. XBoard with PolyGlot, Scid, -Cute Chess, eboard, Arena, Sigma Chess, Shredder, Chess Partner or Fritz) in order -to be used comfortably. Read the documentation for your GUI of choice for information -about how to use Stockfish with it. - -The Stockfish engine features two evaluation functions for chess, the classical -evaluation based on handcrafted terms, and the NNUE evaluation based on efficiently -updatable neural networks. The classical evaluation runs efficiently on almost all -CPU architectures, while the NNUE evaluation benefits from the vector -intrinsics available on most CPUs (sse2, avx2, neon, or similar). +Stockfish **does not include a graphical user interface** (GUI) that is required +to display a chessboard and to make it easy to input moves. These GUIs are +developed independently from Stockfish and are available online. **Read the +documentation for your GUI** of choice for information about how to use +Stockfish with it. +See also the Stockfish [documentation][wiki-usage-link] for further usage help. ## Files This distribution of Stockfish consists of the following files: - * [Readme.md](https://github.com/official-stockfish/Stockfish/blob/master/README.md), the file you are currently reading. + * [README.md][readme-link], the file you are currently reading. - * [Copying.txt](https://github.com/official-stockfish/Stockfish/blob/master/Copying.txt), a text file containing the GNU General Public License version 3. + * [Copying.txt][license-link], a text file containing the GNU General Public + License version 3. - * [AUTHORS](https://github.com/official-stockfish/Stockfish/blob/master/AUTHORS), a text file with the list of authors for the project + * [AUTHORS][authors-link], a text file with the list of authors for the project. - * [src](https://github.com/official-stockfish/Stockfish/tree/master/src), a subdirectory containing the full source code, including a Makefile - that can be used to compile Stockfish on Unix-like systems. + * [src][src-link], a subdirectory containing the full source code, including a + Makefile that can be used to compile Stockfish on Unix-like systems. * a file with the .nnue extension, storing the neural network for the NNUE evaluation. Binary distributions will have this file embedded. -## The UCI protocol and available options +## Contributing -The Universal Chess Interface (UCI) is a standard protocol used to communicate with -a chess engine, and is the recommended way to do so for typical graphical user interfaces -(GUI) or chess tools. Stockfish implements the majority of it options as described -in [the UCI protocol](https://www.shredderchess.com/download/div/uci.zip). - -Developers can see the default values for UCI options available in Stockfish by typing -`./stockfish uci` in a terminal, but the majority of users will typically see them and -change them via a chess GUI. This is a list of available UCI options in Stockfish: - - * #### Threads - The number of CPU threads used for searching a position. For best performance, set - this equal to the number of CPU cores available. - - * #### Hash - The size of the hash table in MB. It is recommended to set Hash after setting Threads. - - * #### Clear Hash - Clear the hash table. - - * #### Ponder - Let Stockfish ponder its next move while the opponent is thinking. - - * #### MultiPV - Output the N best lines (principal variations, PVs) when searching. - Leave at 1 for best performance. - - * #### Use NNUE - Toggle between the NNUE and classical evaluation functions. If set to "true", - the network parameters must be available to load from file (see also EvalFile), - if they are not embedded in the binary. - - * #### EvalFile - The name of the file of the NNUE evaluation parameters. Depending on the GUI the - filename might have to include the full path to the folder/directory that contains the file. - Other locations, such as the directory that contains the binary and the working directory, - are also searched. - - * #### UCI_AnalyseMode - An option handled by your GUI. - - * #### UCI_Chess960 - An option handled by your GUI. If true, Stockfish will play Chess960. - - * #### UCI_ShowWDL - If enabled, show approximate WDL statistics as part of the engine output. - These WDL numbers model expected game outcomes for a given evaluation and - game ply for engine self-play at fishtest LTC conditions (60+0.6s per game). - - * #### UCI_LimitStrength - Enable weaker play aiming for an Elo rating as set by UCI_Elo. This option overrides Skill Level. - - * #### UCI_Elo - If enabled by UCI_LimitStrength, aim for an engine strength of the given Elo. - This Elo rating has been calibrated at a time control of 60s+0.6s and anchored to CCRL 40/4. - - * #### Skill Level - Lower the Skill Level in order to make Stockfish play weaker (see also UCI_LimitStrength). - Internally, MultiPV is enabled, and with a certain probability depending on the Skill Level a - weaker move will be played. - - * #### SyzygyPath - Path to the folders/directories storing the Syzygy tablebase files. Multiple - directories are to be separated by ";" on Windows and by ":" on Unix-based - operating systems. Do not use spaces around the ";" or ":". - - Example: `C:\tablebases\wdl345;C:\tablebases\wdl6;D:\tablebases\dtz345;D:\tablebases\dtz6` - - It is recommended to store .rtbw files on an SSD. There is no loss in storing - the .rtbz files on a regular HD. It is recommended to verify all md5 checksums - of the downloaded tablebase files (`md5sum -c checksum.md5`) as corruption will - lead to engine crashes. - - * #### SyzygyProbeDepth - Minimum remaining search depth for which a position is probed. Set this option - to a higher value to probe less aggressively if you experience too much slowdown - (in terms of nps) due to tablebase probing. - - * #### Syzygy50MoveRule - Disable to let fifty-move rule draws detected by Syzygy tablebase probes count - as wins or losses. This is useful for ICCF correspondence games. - - * #### SyzygyProbeLimit - Limit Syzygy tablebase probing to positions with at most this many pieces left - (including kings and pawns). - - * #### Move Overhead - Assume a time delay of x ms due to network and GUI overheads. This is useful to - avoid losses on time in those cases. - - * #### Slow Mover - Lower values will make Stockfish take less time in games, higher values will - make it think longer. - - * #### nodestime - Tells the engine to use nodes searched instead of wall time to account for - elapsed time. Useful for engine testing. - - * #### Debug Log File - Write all communication to and from the engine into a text file. - -For developers the following non-standard commands might be of interest, mainly useful for debugging: - - * #### bench *ttSize threads limit fenFile limitType evalType* - Performs a standard benchmark using various options. The signature of a version (standard node - count) is obtained using all defaults. `bench` is currently `bench 16 1 13 default depth mixed`. - - * #### compiler - Give information about the compiler and environment used for building a binary. - - * #### d - Display the current position, with ascii art and fen. - - * #### eval - Return the evaluation of the current position. - - * #### export_net [filename] - Exports the currently loaded network to a file. - If the currently loaded network is the embedded network and the filename - is not specified then the network is saved to the file matching the name - of the embedded network, as defined in evaluate.h. - If the currently loaded network is not the embedded network (some net set - through the UCI setoption) then the filename parameter is required and the - network is saved into that file. - - * #### flip - Flips the side to move. - - -## A note on classical evaluation versus NNUE evaluation - -Both approaches assign a value to a position that is used in alpha-beta (PVS) search -to find the best move. The classical evaluation computes this value as a function -of various chess concepts, handcrafted by experts, tested and tuned using fishtest. -The NNUE evaluation computes this value with a neural network based on basic -inputs (e.g. piece positions only). The network is optimized and trained -on the evaluations of millions of positions at moderate search depth. - -The NNUE evaluation was first introduced in shogi, and ported to Stockfish afterward. -It can be evaluated efficiently on CPUs, and exploits the fact that only parts -of the neural network need to be updated after a typical chess move. -[The nodchip repository](https://github.com/nodchip/Stockfish) provided the first version of -the needed tools to train and develop the NNUE networks. Today, more advanced training tools are available -in [the nnue-pytorch repository](https://github.com/glinscott/nnue-pytorch/), while data generation tools -are available in [a dedicated branch](https://github.com/official-stockfish/Stockfish/tree/tools). - -On CPUs supporting modern vector instructions -(avx2 and similar), the NNUE evaluation results in much stronger playing strength, even -if the nodes per second computed by the engine is somewhat lower (roughly 80% of nps -is typical). - -Notes: - -1) the NNUE evaluation depends on the Stockfish binary and the network parameter -file (see the EvalFile UCI option). Not every parameter file is compatible with a given -Stockfish binary, but the default value of the EvalFile UCI option is the name of a network -that is guaranteed to be compatible with that binary. - -2) to use the NNUE evaluation, the additional data file with neural network parameters -needs to be available. Normally, this file is already embedded in the binary or it -can be downloaded. The filename for the default (recommended) net can be found as the default -value of the `EvalFile` UCI option, with the format `nn-[SHA256 first 12 digits].nnue` -(for instance, `nn-c157e0a5755b.nnue`). This file can be downloaded from -``` -https://tests.stockfishchess.org/api/nn/[filename] -``` -replacing `[filename]` as needed. - -## What to expect from the Syzygy tablebases? - -If the engine is searching a position that is not in the tablebases (e.g. -a position with 8 pieces), it will access the tablebases during the search. -If the engine reports a very large score (typically 153.xx), this means -it has found a winning line into a tablebase position. - -If the engine is given a position to search that is in the tablebases, it -will use the tablebases at the beginning of the search to preselect all -good moves, i.e. all moves that preserve the win or preserve the draw while -taking into account the 50-move rule. -It will then perform a search only on those moves. **The engine will not move -immediately**, unless there is only a single good move. **The engine likely -will not report a mate score, even if the position is known to be won.** - -It is therefore clear that this behaviour is not identical to what one might -be used to with Nalimov tablebases. There are technical reasons for this -difference, the main technical reason being that Nalimov tablebases use the -DTM metric (distance-to-mate), while the Syzygy tablebases use a variation of the -DTZ metric (distance-to-zero, zero meaning any move that resets the 50-move -counter). This special metric is one of the reasons that the Syzygy tablebases are -more compact than Nalimov tablebases, while still storing all information -needed for optimal play and in addition being able to take into account -the 50-move rule. - -## Large Pages - -Stockfish supports large pages on Linux and Windows. Large pages make -the hash access more efficient, improving the engine speed, especially -on large hash sizes. Typical increases are 5..10% in terms of nodes per -second, but speed increases up to 30% have been measured. The support is -automatic. Stockfish attempts to use large pages when available and -will fall back to regular memory allocation when this is not the case. - -### Support on Linux - -Large page support on Linux is obtained by the Linux kernel -transparent huge pages functionality. Typically, transparent huge pages -are already enabled, and no configuration is needed. - -### Support on Windows - -The use of large pages requires "Lock Pages in Memory" privilege. See -[Enable the Lock Pages in Memory Option (Windows)](https://docs.microsoft.com/en-us/sql/database-engine/configure-windows/enable-the-lock-pages-in-memory-option-windows) -on how to enable this privilege, then run [RAMMap](https://docs.microsoft.com/en-us/sysinternals/downloads/rammap) -to double-check that large pages are used. We suggest that you reboot -your computer after you have enabled large pages, because long Windows -sessions suffer from memory fragmentation, which may prevent Stockfish -from getting large pages: a fresh session is better in this regard. - -## Compiling Stockfish yourself from the sources - -Stockfish has support for 32 or 64-bit CPUs, certain hardware -instructions, big-endian machines such as Power PC, and other platforms. - -On Unix-like systems, it should be easy to compile Stockfish -directly from the source code with the included Makefile in the folder -`src`. In general it is recommended to run `make help` to see a list of make -targets with corresponding descriptions. - -``` - cd src - make help - make net - make build ARCH=x86-64-modern -``` - -When not using the Makefile to compile (for instance, with Microsoft MSVC) you -need to manually set/unset some switches in the compiler command line; see -file *types.h* for a quick reference. - -When reporting an issue or a bug, please tell us which Stockfish version -and which compiler you used to create your executable. This information -can be found by typing the following command in a console: - -``` - ./stockfish compiler -``` - -## Understanding the code base and participating in the project - -Stockfish's improvement over the last decade has been a great community -effort. There are a few ways to help contribute to its growth. +__See [Contributing Guide](CONTRIBUTING.md).__ ### Donating hardware -Improving Stockfish requires a massive amount of testing. You can donate -your hardware resources by installing the [Fishtest Worker](https://github.com/glinscott/fishtest/wiki/Running-the-worker:-overview) -and view the current tests on [Fishtest](https://tests.stockfishchess.org/tests). +Improving Stockfish requires a massive amount of testing. You can donate your +hardware resources by installing the [Fishtest Worker][worker-link] and viewing +the current tests on [Fishtest][fishtest-link]. ### Improving the code -If you want to help improve the code, there are several valuable resources: - -* [In this wiki,](https://www.chessprogramming.org) many techniques used in +In the [chessprogramming wiki][programming-link], many techniques used in Stockfish are explained with a lot of background information. +The [section on Stockfish][programmingsf-link] describes many features +and techniques used by Stockfish. However, it is generic rather than +focused on Stockfish's precise implementation. -* [The section on Stockfish](https://www.chessprogramming.org/Stockfish) -describes many features and techniques used by Stockfish. However, it is -generic rather than being focused on Stockfish's precise implementation. -Nevertheless, a helpful resource. - -* The latest source can always be found on [GitHub](https://github.com/official-stockfish/Stockfish). -Discussions about Stockfish take place these days mainly in the [FishCooking](https://groups.google.com/forum/#!forum/fishcooking) -group and on the [Stockfish Discord channel](https://discord.gg/nv8gDtt). -The engine testing is done on [Fishtest](https://tests.stockfishchess.org/tests). -If you want to help improve Stockfish, please read this [guideline](https://github.com/glinscott/fishtest/wiki/Creating-my-first-test) +The engine testing is done on [Fishtest][fishtest-link]. +If you want to help improve Stockfish, please read this [guideline][guideline-link] first, where the basics of Stockfish development are explained. +Discussions about Stockfish take place these days mainly in the Stockfish +[Discord server][discord-link]. This is also the best place to ask questions +about the codebase and how to improve it. + +## Compiling Stockfish + +Stockfish has support for 32 or 64-bit CPUs, certain hardware instructions, +big-endian machines such as Power PC, and other platforms. + +On Unix-like systems, it should be easy to compile Stockfish directly from the +source code with the included Makefile in the folder `src`. In general, it is +recommended to run `make help` to see a list of make targets with corresponding +descriptions. An example suitable for most Intel and AMD chips: + +``` +cd src +make -j profile-build ARCH=x86-64-avx2 +``` + +Detailed compilation instructions for all platforms can be found in our +[documentation][wiki-compile-link]. Our wiki also has information about +the [UCI commands][wiki-uci-link] supported by Stockfish. ## Terms of use -Stockfish is free, and distributed under the **GNU General Public License version 3** -(GPL v3). Essentially, this means you are free to do almost exactly -what you want with the program, including distributing it among your -friends, making it available for download from your website, selling -it (either by itself or as part of some bigger software package), or -using it as the starting point for a software project of your own. +Stockfish is free and distributed under the +[**GNU General Public License version 3**][license-link] (GPL v3). Essentially, +this means you are free to do almost exactly what you want with the program, +including distributing it among your friends, making it available for download +from your website, selling it (either by itself or as part of some bigger +software package), or using it as the starting point for a software project of +your own. -The only real limitation is that whenever you distribute Stockfish in -some way, you MUST always include the full source code, or a pointer -to where the source code can be found, to generate the exact binary -you are distributing. If you make any changes to the source code, -these changes must also be made available under the GPL. +The only real limitation is that whenever you distribute Stockfish in some way, +you MUST always include the license and the full source code (or a pointer to +where the source code can be found) to generate the exact binary you are +distributing. If you make any changes to the source code, these changes must +also be made available under GPL v3. -For full details, read the copy of the GPL v3 found in the file named -[*Copying.txt*](https://github.com/official-stockfish/Stockfish/blob/master/Copying.txt). + +[authors-link]: https://github.com/official-stockfish/Stockfish/blob/master/AUTHORS +[build-link]: https://github.com/official-stockfish/Stockfish/actions/workflows/stockfish.yml +[commits-link]: https://github.com/official-stockfish/Stockfish/commits/master +[discord-link]: https://discord.gg/GWDRS3kU6R +[issue-link]: https://github.com/official-stockfish/Stockfish/issues/new?assignees=&labels=&template=BUG-REPORT.yml +[discussions-link]: https://github.com/official-stockfish/Stockfish/discussions/new +[fishtest-link]: https://tests.stockfishchess.org/tests +[guideline-link]: https://github.com/official-stockfish/fishtest/wiki/Creating-my-first-test +[license-link]: https://github.com/official-stockfish/Stockfish/blob/master/Copying.txt +[programming-link]: https://www.chessprogramming.org/Main_Page +[programmingsf-link]: https://www.chessprogramming.org/Stockfish +[readme-link]: https://github.com/official-stockfish/Stockfish/blob/master/README.md +[release-link]: https://github.com/official-stockfish/Stockfish/releases/latest +[src-link]: https://github.com/official-stockfish/Stockfish/tree/master/src +[stockfish128-logo]: https://stockfishchess.org/images/logo/icon_128x128.png +[uci-link]: https://backscattering.de/chess/uci/ +[website-link]: https://stockfishchess.org +[website-blog-link]: https://stockfishchess.org/blog/ +[wiki-link]: https://github.com/official-stockfish/Stockfish/wiki +[wiki-compile-link]: https://github.com/official-stockfish/Stockfish/wiki/Compiling-from-source +[wiki-uci-link]: https://github.com/official-stockfish/Stockfish/wiki/UCI-&-Commands +[wiki-usage-link]: https://github.com/official-stockfish/Stockfish/wiki/Download-and-usage +[worker-link]: https://github.com/official-stockfish/fishtest/wiki/Running-the-worker + +[build-badge]: https://img.shields.io/github/actions/workflow/status/official-stockfish/Stockfish/stockfish.yml?branch=master&style=for-the-badge&label=stockfish&logo=github +[commits-badge]: https://img.shields.io/github/commits-since/official-stockfish/Stockfish/latest?style=for-the-badge +[discord-badge]: https://img.shields.io/discord/435943710472011776?style=for-the-badge&label=discord&logo=Discord +[fishtest-badge]: https://img.shields.io/website?style=for-the-badge&down_color=red&down_message=Offline&label=Fishtest&up_color=success&up_message=Online&url=https%3A%2F%2Ftests.stockfishchess.org%2Ftests%2Ffinished +[license-badge]: https://img.shields.io/github/license/official-stockfish/Stockfish?style=for-the-badge&label=license&color=success +[release-badge]: https://img.shields.io/github/v/release/official-stockfish/Stockfish?style=for-the-badge&label=official%20release +[website-badge]: https://img.shields.io/website?style=for-the-badge&down_color=red&down_message=Offline&label=website&up_color=success&up_message=Online&url=https%3A%2F%2Fstockfishchess.org diff --git a/Top CPU Contributors.txt b/Top CPU Contributors.txt index 4bc96cde..11636e84 100644 --- a/Top CPU Contributors.txt +++ b/Top CPU Contributors.txt @@ -1,100 +1,126 @@ -Contributors to Fishtest with >10,000 CPU hours, as of 2022-02-05. +Contributors to Fishtest with >10,000 CPU hours, as of 2024-02-24. Thank you! Username CPU Hours Games played ------------------------------------------------------------------ -noobpwnftw 30730952 2158431735 -mlang 2729669 187335452 -technologov 1696847 74478658 -dew 1635640 97483012 -grandphish2 1062754 64955639 -tvijlbrief 795993 51894442 -okrout 773704 63465204 -TueRens 766198 47770388 +noobpwnftw 39302472 3055513453 +technologov 20845762 994893444 +linrock 8616428 560281417 +mlang 3026000 200065824 +okrout 2332151 222639518 +pemo 1800019 60274069 +dew 1689162 100033738 +TueRens 1474943 75121774 +grandphish2 1463002 91616949 +JojoM 1109702 72927902 +olafm 978631 71037944 +sebastronomy 939955 44920556 +tvijlbrief 796125 51897690 +gvreuls 711320 49142318 mibere 703840 46867607 -JojoM 703005 42689868 -pemo 634102 29868807 -linrock 626939 17408017 -gvreuls 517442 33605006 -cw 503905 33850487 -fastgm 482847 29004732 -crunchy 427035 27344275 -CSU_Dynasty 415864 28116776 -ctoks 403102 26737127 -oz 357710 26490208 -bcross 331095 23165889 +oz 646268 46293638 +rpngn 572571 38928563 +leszek 531858 39316505 +cw 518116 34894291 +fastgm 503862 30260818 +CSU_Dynasty 468784 31385034 +ctoks 434591 28520597 +maximmasiutin 429983 27066286 +crunchy 427414 27371625 +bcross 415724 29061187 +velislav 342588 22140902 +mgrabiak 338763 23999170 Fisherman 327231 21829379 -velislav 321708 20729264 -leszek 303654 19063973 -Dantist 251015 15843226 -mgrabiak 231973 15162494 -glinscott 217799 13780820 -robal 213960 13665726 -nordlandia 211692 13484886 -drabel 200914 13755384 +robal 299836 20213182 +Dantist 296386 18031762 +ncfish1 267604 17881149 +nordlandia 249322 16420192 +marrco 234581 17714473 +tolkki963 233490 19773930 +glinscott 208125 13277240 +drabel 204167 13930674 +mhoram 202894 12601997 bking_US 198894 11876016 -mhoram 180229 11610075 +Calis007 188631 12795784 Thanar 179852 12365359 +Fifis 176209 10638245 vdv 175544 9904472 spams 157128 10319326 -marrco 150300 9402229 +DesolatedDodo 156659 10210328 +armo9494 155355 10566898 sqrt2 147963 9724586 -vdbergh 137429 8955089 +jcAEie 140086 10603658 +vdbergh 139746 9172061 CoffeeOne 137100 5024116 malala 136182 8002293 xoto 133759 9159372 -rpngn 131285 8657757 -davar 122661 7996937 +davar 129023 8376525 +DMBK 122960 8980062 dsmith 122059 7570238 -amicic 119659 7937885 +javran 121564 10144656 +amicic 119661 7938029 +sschnee 118107 7389266 +Wolfgang 114616 8070494 Data 113305 8220352 BrunoBanani 112960 7436849 -CypressChess 108321 7759588 +Wencey 111502 5991676 +cuistot 108503 7006992 +CypressChess 108331 7759788 +skiminki 107583 7218170 MaZePallas 102823 6633619 sterni1971 100532 5880772 sunu 100167 7040199 -DesolatedDodo 99038 6414626 +zeryl 99331 6221261 +thirdlife 99156 2245320 ElbertoOne 99028 7023771 -skiminki 98123 6478402 +Dubslow 98600 6903242 +markkulix 97010 7643900 +bigpen0r 94809 6529203 brabos 92118 6186135 -cuistot 90358 5351004 +Maxim 90818 3283364 psk 89957 5984901 -racerschmacer 85712 6119648 +megaman7de 88822 6052132 +racerschmacer 85805 6122790 +maposora 85710 7778146 Vizvezdenec 83761 5344740 -sschnee 83003 4840890 0x3C33 82614 5271253 BRAVONE 81239 5054681 nssy 76497 5259388 +jromang 76106 5236025 teddybaer 75125 5407666 Pking_cda 73776 5293873 -zeryl 73335 4774257 -jromang 72192 5057715 +yurikvelo 73516 5036928 +MarcusTullius 71053 4803477 +Bobo1239 70579 4794999 solarlight 70517 5028306 dv8silencer 70287 3883992 -Bobo1239 68515 4652287 +Spprtr 69646 4806763 +Mineta 66325 4537742 manap 66273 4121774 +szupaw 65468 5669742 tinker 64333 4268790 -yurikvelo 63371 4335060 qurashee 61208 3429862 +woutboat 59496 4906352 +AGI 58195 4329580 robnjr 57262 4053117 -Wolfgang 57014 3561352 Freja 56938 3733019 +MaxKlaxxMiner 56879 3423958 ttruscott 56010 3680085 rkl 55132 4164467 +jmdana 54697 4012593 renouve 53811 3501516 +notchris 52433 4044590 finfish 51360 3370515 eva42 51272 3599691 -Calis007 51182 3131552 -eastorwest 51058 3451555 +eastorwest 51117 3454811 +Goatminola 51004 4432492 rap 49985 3219146 -pb00067 49727 3298270 -Spprtr 48260 3141959 -bigpen0r 47667 3336927 +pb00067 49733 3298934 +GPUex 48686 3684998 +OuaisBla 48626 3445134 ronaldjerum 47654 3240695 -MaxKlaxxMiner 47584 2972142 biffhero 46564 3111352 -megaman7de 45992 2952006 -Fifis 45843 3088497 +oryx 45533 3539290 VoyagerOne 45476 3452465 speedycpu 43842 3003273 jbwiebe 43305 2805433 @@ -102,129 +128,159 @@ Antihistamine 41788 2761312 mhunt 41735 2691355 homyur 39893 2850481 gri 39871 2515779 -oryx 38867 2976992 +Garf 37741 2999686 SC 37299 2731694 -Garf 37213 2986270 +Sylvain27 36520 1467082 csnodgrass 36207 2688994 -jmdana 36157 2210661 +Gaster319 35655 3149442 strelock 34716 2074055 EthanOConnor 33370 2090311 slakovv 32915 2021889 -armo9494 32129 2551682 -tolkki963 32114 1932256 +gopeto 31884 2076712 +Gelma 31771 1551204 +kdave 31157 2198362 manapbk 30987 1810399 -DMBK 30675 2383552 +ZacHFX 30551 2238078 Prcuvu 30377 2170122 anst 30301 2190091 jkiiski 30136 1904470 -gopeto 29886 1979118 +spcc 29925 1901692 hyperbolic.tom 29840 2017394 chuckstablers 29659 2093438 Pyafue 29650 1902349 -OuaisBla 27636 1578800 +belzedar94 28846 1811530 +votoanthuan 27978 2285818 +shawnxu 27438 2465810 chriswk 26902 1868317 +xwziegtm 26897 2124586 achambord 26582 1767323 Patrick_G 26276 1801617 yorkman 26193 1992080 +Ulysses 25397 1701264 +Jopo12321 25227 1652482 SFTUser 25182 1675689 -nabildanial 24942 1519409 +nabildanial 25068 1531665 Sharaf_DG 24765 1786697 -ncfish1 24411 1520927 -rodneyc 24275 1410450 +rodneyc 24376 1416402 +jsys14 24297 1721230 agg177 23890 1395014 -belzedar94 23707 1593860 +srowen 23842 1342508 +Ente 23752 1678188 +jojo2357 23479 2061238 JanErik 23408 1703875 Isidor 23388 1680691 -Norabor 23339 1602636 -Ente 23093 1642458 -cisco2015 22897 1762669 +Norabor 23371 1603244 +cisco2015 22920 1763301 Zirie 22542 1472937 +Nullvalue 22490 1970374 +AndreasKrug 22485 1769491 team-oh 22272 1636708 +Roady 22220 1465606 MazeOfGalious 21978 1629593 -sg4032 21947 1643265 +sg4032 21947 1643353 ianh2105 21725 1632562 xor12 21628 1680365 dex 21612 1467203 nesoneg 21494 1463031 +user213718 21454 1404128 sphinx 21211 1384728 +qoo_charly_cai 21135 1514907 jjoshua2 21001 1423089 +Zake9298 20938 1565848 horst.prack 20878 1465656 -user213718 20783 1379584 0xB00B1ES 20590 1208666 +Serpensin 20487 1729674 +Dinde 20440 1292390 j3corre 20405 941444 Adrian.Schmidt123 20316 1281436 wei 19973 1745989 -Roady 19848 1335928 +fishtester 19617 1257388 rstoesser 19569 1293588 eudhan 19274 1283717 vulcan 18871 1729392 +Karpovbot 18766 1053178 +WoodMan777 18556 1628264 jundery 18445 1115855 -iisiraider 18247 1101015 ville 17883 1384026 chris 17698 1487385 purplefishies 17595 1092533 -dju 17353 978595 -kdave 17183 1242754 +dju 17414 981289 +ols 17291 1042003 +iisiraider 17275 1049015 +Skiff84 17111 950248 DragonLord 17014 1162790 -thirdlife 16996 447356 -spcc 16932 1130940 -fishtester 16644 1123000 -Ulysses 16490 1184400 +redstone59 16842 1461780 +Karby 16839 1010124 +Alb11747 16787 1213990 +pirt 16493 1237199 +Naven94 16414 951718 +wizardassassin 16392 1148672 IgorLeMasson 16064 1147232 +scuzzi 15757 968735 ako027ako 15671 1173203 Nikolay.IT 15154 1068349 Andrew Grant 15114 895539 OssumOpossum 14857 1007129 -Karby 14808 867120 -AndreasKrug 14608 1152093 +LunaticBFF57 14525 1190310 enedene 14476 905279 -jsys14 14340 844792 -bpfliegel 14298 884523 +IslandLambda 14393 958196 +bpfliegel 14233 882523 +YELNAMRON 14230 1128094 mpx86 14019 759568 jpulman 13982 870599 +getraideBFF 13871 1172846 +Nesa92 13806 1116101 crocogoat 13803 1117422 -joster 13794 950160 -Nesa92 13786 1114691 +joster 13710 946160 mbeier 13650 1044928 Hjax 13535 915487 Dark_wizzie 13422 1007152 Rudolphous 13244 883140 -MarcusTullius 13221 843169 Machariel 13010 863104 +infinigon 12991 943216 mabichito 12903 749391 thijsk 12886 722107 AdrianSA 12860 804972 -infinigon 12807 937332 Flopzee 12698 894821 +mschmidt 12644 863193 +korposzczur 12606 838168 +tsim67 12570 890180 +Jackfish 12553 836958 fatmurphy 12547 853210 -scuzzi 12511 845761 +Oakwen 12503 853105 SapphireBrand 12416 969604 +deflectooor 12386 579392 modolief 12386 896470 +TataneSan 12358 609332 Farseer 12249 694108 pgontarz 12151 848794 +dbernier 12103 860824 +FormazChar 11989 907809 stocky 11954 699440 -mschmidt 11941 803401 -dbernier 11609 818636 -Maxim 11543 836024 -pirt 11516 894513 +somethingintheshadows 11940 989472 +MooTheCow 11892 776126 +3cho 11842 1036786 +whelanh 11557 245188 infinity 11470 727027 -aga 11409 695071 +aga 11412 695127 torbjo 11395 729145 Thomas A. Anderson 11372 732094 savage84 11358 670860 -markkulix 11331 739098 -FormazChar 11308 847735 d64 11263 789184 -MooTheCow 11237 720174 +ali-al-zhrani 11245 779246 +ckaz 11170 680866 snicolet 11106 869170 -ali-al-zhrani 11098 768494 -whelanh 11067 235676 +dapper 11032 771402 +Ethnikoi 10993 945906 +Snuuka 10938 435504 +Karmatron 10859 678058 basepi 10637 744851 +jibarbosa 10628 857100 Cubox 10621 826448 +mecevdimitar 10609 787318 michaelrpg 10509 739239 +Def9Infinity 10427 686978 OIVAS7572 10420 995586 +wxt9861 10412 1013864 +Garruk 10365 706465 dzjp 10343 732529 -Garruk 10332 703905 -ols 10259 570669 -lbraesch 10252 647825 -Jackfish 10098 682338 diff --git a/scripts/get_native_properties.sh b/scripts/get_native_properties.sh new file mode 100755 index 00000000..fb124021 --- /dev/null +++ b/scripts/get_native_properties.sh @@ -0,0 +1,120 @@ +#!/bin/sh + +# +# Returns properties of the native system. +# best architecture as supported by the CPU +# filename of the best binary uploaded as an artifact during CI +# + +# Check if all the given flags are present in the CPU flags list +check_flags() { + for flag; do + printf '%s\n' "$flags" | grep -q -w "$flag" || return 1 + done +} + +# Set the CPU flags list +# remove underscores and points from flags, e.g. gcc uses avx512vnni, while some cpuinfo can have avx512_vnni, some systems use sse4_1 others sse4.1 +get_flags() { + flags=$(awk '/^flags[ \t]*:|^Features[ \t]*:/{gsub(/^flags[ \t]*:[ \t]*|^Features[ \t]*:[ \t]*|[_.]/, ""); line=$0} END{print line}' /proc/cpuinfo) +} + +# Check for gcc march "znver1" or "znver2" https://en.wikichip.org/wiki/amd/cpuid +check_znver_1_2() { + vendor_id=$(awk '/^vendor_id/{print $3; exit}' /proc/cpuinfo) + cpu_family=$(awk '/^cpu family/{print $4; exit}' /proc/cpuinfo) + [ "$vendor_id" = "AuthenticAMD" ] && [ "$cpu_family" = "23" ] && znver_1_2=true +} + +# Set the file CPU x86_64 architecture +set_arch_x86_64() { + if check_flags 'avx512vnni' 'avx512dq' 'avx512f' 'avx512bw' 'avx512vl'; then + true_arch='x86-64-vnni256' + elif check_flags 'avx512f' 'avx512bw'; then + true_arch='x86-64-avx512' + elif [ -z "${znver_1_2+1}" ] && check_flags 'bmi2'; then + true_arch='x86-64-bmi2' + elif check_flags 'avx2'; then + true_arch='x86-64-avx2' + elif check_flags 'sse41' && check_flags 'popcnt'; then + true_arch='x86-64-sse41-popcnt' + else + true_arch='x86-64' + fi +} + +# Check the system type +uname_s=$(uname -s) +uname_m=$(uname -m) +case $uname_s in + 'Darwin') # Mac OSX system + case $uname_m in + 'arm64') + true_arch='apple-silicon' + file_arch='x86-64-sse41-popcnt' # Supported by Rosetta 2 + ;; + 'x86_64') + flags=$(sysctl -n machdep.cpu.features machdep.cpu.leaf7_features | tr '\n' ' ' | tr '[:upper:]' '[:lower:]' | tr -d '_.') + set_arch_x86_64 + if [ "$true_arch" = 'x86-64-vnni256' ] || [ "$true_arch" = 'x86-64-avx512' ]; then + file_arch='x86-64-bmi2' + fi + ;; + esac + file_os='macos' + file_ext='tar' + ;; + 'Linux') # Linux system + get_flags + case $uname_m in + 'x86_64') + file_os='ubuntu' + check_znver_1_2 + set_arch_x86_64 + ;; + 'i686') + file_os='ubuntu' + true_arch='x86-32' + ;; + 'aarch64') + file_os='android' + true_arch='armv8' + if check_flags 'asimddp'; then + true_arch="$true_arch-dotprod" + fi + ;; + 'armv7'*) + file_os='android' + true_arch='armv7' + if check_flags 'neon'; then + true_arch="$true_arch-neon" + fi + ;; + *) # Unsupported machine type, exit with error + printf 'Unsupported machine type: %s\n' "$uname_m" + exit 1 + ;; + esac + file_ext='tar' + ;; + 'CYGWIN'*|'MINGW'*|'MSYS'*) # Windows system with POSIX compatibility layer + get_flags + check_znver_1_2 + set_arch_x86_64 + file_os='windows' + file_ext='zip' + ;; + *) + # Unknown system type, exit with error + printf 'Unsupported system type: %s\n' "$uname_s" + exit 1 + ;; +esac + +if [ -z "$file_arch" ]; then + file_arch=$true_arch +fi + +file_name="stockfish-$file_os-$file_arch.$file_ext" + +printf '%s %s\n' "$true_arch" "$file_name" diff --git a/src/Makefile b/src/Makefile index eff8baca..45f38b01 100644 --- a/src/Makefile +++ b/src/Makefile @@ -1,5 +1,5 @@ # Stockfish, a UCI chess playing engine derived from Glaurung 2.1 -# Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) +# Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file) # # Stockfish is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -20,9 +20,9 @@ ### ========================================================================== ### Establish the operating system name -KERNEL = $(shell uname -s) +KERNEL := $(shell uname -s) ifeq ($(KERNEL),Linux) - OS = $(shell uname -o) + OS := $(shell uname -o) endif ### Target Windows OS @@ -33,7 +33,7 @@ ifeq ($(OS),Windows_NT) else ifeq ($(COMP),mingw) target_windows = yes ifeq ($(WINE_PATH),) - WINE_PATH = $(shell which wine) + WINE_PATH := $(shell which wine) endif endif @@ -49,17 +49,21 @@ PREFIX = /usr/local BINDIR = $(PREFIX)/bin ### Built-in benchmark for pgo-builds -ifeq ($(SDE_PATH),) - PGOBENCH = $(WINE_PATH) ./$(EXE) bench -else - PGOBENCH = $(SDE_PATH) -- $(WINE_PATH) ./$(EXE) bench -endif +PGOBENCH = $(WINE_PATH) ./$(EXE) bench ### Source and object files -SRCS = benchmark.cpp bitbase.cpp bitboard.cpp endgame.cpp evaluate.cpp main.cpp \ - material.cpp misc.cpp movegen.cpp movepick.cpp pawns.cpp position.cpp psqt.cpp \ +SRCS = benchmark.cpp bitboard.cpp evaluate.cpp main.cpp \ + misc.cpp movegen.cpp movepick.cpp position.cpp \ search.cpp thread.cpp timeman.cpp tt.cpp uci.cpp ucioption.cpp tune.cpp syzygy/tbprobe.cpp \ - nnue/evaluate_nnue.cpp nnue/features/half_ka_v2_hm.cpp + nnue/nnue_misc.cpp nnue/features/half_ka_v2_hm.cpp nnue/network.cpp engine.cpp score.cpp + +HEADERS = benchmark.h bitboard.h evaluate.h misc.h movegen.h movepick.h \ + nnue/nnue_misc.h nnue/features/half_ka_v2_hm.h nnue/layers/affine_transform.h \ + nnue/layers/affine_transform_sparse_input.h nnue/layers/clipped_relu.h nnue/layers/simd.h \ + nnue/layers/sqr_clipped_relu.h nnue/nnue_accumulator.h nnue/nnue_architecture.h \ + nnue/nnue_common.h nnue/nnue_feature_transformer.h position.h \ + search.h syzygy/tbprobe.h thread.h thread_win32_osx.h timeman.h \ + tt.h tune.h types.h uci.h ucioption.h perft.h nnue/network.h engine.h score.h OBJS = $(notdir $(SRCS:.cpp=.o)) @@ -69,32 +73,33 @@ VPATH = syzygy:nnue:nnue/features ### Section 2. High-level Configuration ### ========================================================================== # -# flag --- Comp switch --- Description +# flag --- Comp switch --- Description # ---------------------------------------------------------------------------- # -# debug = yes/no --- -DNDEBUG --- Enable/Disable debug mode +# debug = yes/no --- -DNDEBUG --- Enable/Disable debug mode # sanitize = none/ ... (-fsanitize ) -# --- ( undefined ) --- enable undefined behavior checks -# --- ( thread ) --- enable threading error checks -# --- ( address ) --- enable memory access checks -# --- ...etc... --- see compiler documentation for supported sanitizers -# optimize = yes/no --- (-O3/-fast etc.) --- Enable/Disable optimizations -# arch = (name) --- (-arch) --- Target architecture -# bits = 64/32 --- -DIS_64BIT --- 64-/32-bit operating system -# prefetch = yes/no --- -DUSE_PREFETCH --- Use prefetch asm-instruction -# popcnt = yes/no --- -DUSE_POPCNT --- Use popcnt asm-instruction -# pext = yes/no --- -DUSE_PEXT --- Use pext x86_64 asm-instruction -# sse = yes/no --- -msse --- Use Intel Streaming SIMD Extensions -# mmx = yes/no --- -mmmx --- Use Intel MMX instructions -# sse2 = yes/no --- -msse2 --- Use Intel Streaming SIMD Extensions 2 -# ssse3 = yes/no --- -mssse3 --- Use Intel Supplemental Streaming SIMD Extensions 3 -# sse41 = yes/no --- -msse4.1 --- Use Intel Streaming SIMD Extensions 4.1 -# avx2 = yes/no --- -mavx2 --- Use Intel Advanced Vector Extensions 2 -# avxvnni = yes/no --- -mavxvnni --- Use Intel Vector Neural Network Instructions AVX -# avx512 = yes/no --- -mavx512bw --- Use Intel Advanced Vector Extensions 512 -# vnni256 = yes/no --- -mavx512vnni --- Use Intel Vector Neural Network Instructions 256 -# vnni512 = yes/no --- -mavx512vnni --- Use Intel Vector Neural Network Instructions 512 -# neon = yes/no --- -DUSE_NEON --- Use ARM SIMD architecture +# --- ( undefined ) --- enable undefined behavior checks +# --- ( thread ) --- enable threading error checks +# --- ( address ) --- enable memory access checks +# --- ...etc... --- see compiler documentation for supported sanitizers +# optimize = yes/no --- (-O3/-fast etc.) --- Enable/Disable optimizations +# arch = (name) --- (-arch) --- Target architecture +# bits = 64/32 --- -DIS_64BIT --- 64-/32-bit operating system +# prefetch = yes/no --- -DUSE_PREFETCH --- Use prefetch asm-instruction +# popcnt = yes/no --- -DUSE_POPCNT --- Use popcnt asm-instruction +# pext = yes/no --- -DUSE_PEXT --- Use pext x86_64 asm-instruction +# sse = yes/no --- -msse --- Use Intel Streaming SIMD Extensions +# mmx = yes/no --- -mmmx --- Use Intel MMX instructions +# sse2 = yes/no --- -msse2 --- Use Intel Streaming SIMD Extensions 2 +# ssse3 = yes/no --- -mssse3 --- Use Intel Supplemental Streaming SIMD Extensions 3 +# sse41 = yes/no --- -msse4.1 --- Use Intel Streaming SIMD Extensions 4.1 +# avx2 = yes/no --- -mavx2 --- Use Intel Advanced Vector Extensions 2 +# avxvnni = yes/no --- -mavxvnni --- Use Intel Vector Neural Network Instructions AVX +# avx512 = yes/no --- -mavx512bw --- Use Intel Advanced Vector Extensions 512 +# vnni256 = yes/no --- -mavx256vnni --- Use Intel Vector Neural Network Instructions 512 with 256bit operands +# vnni512 = yes/no --- -mavx512vnni --- Use Intel Vector Neural Network Instructions 512 +# neon = yes/no --- -DUSE_NEON --- Use ARM SIMD architecture +# dotprod = yes/no --- -DUSE_NEON_DOTPROD --- Use ARM advanced SIMD Int8 dot product instructions # # Note that Makefile is space sensitive, so when adding new architectures # or modifying existing flags, you have to make sure there are no extra spaces @@ -107,16 +112,20 @@ VPATH = syzygy:nnue:nnue/features ### 2.1. General and architecture defaults ifeq ($(ARCH),) - ARCH = x86-64-modern - help_skip_sanity = yes + ARCH = native endif + +ifeq ($(ARCH), native) + override ARCH := $(shell $(SHELL) ../scripts/get_native_properties.sh | cut -d " " -f 1) +endif + # explicitly check for the list of supported architectures (as listed with make help), # the user can override with `make ARCH=x86-32-vnni256 SUPPORTED_ARCH=true` ifeq ($(ARCH), $(filter $(ARCH), \ x86-64-vnni512 x86-64-vnni256 x86-64-avx512 x86-64-avxvnni x86-64-bmi2 \ x86-64-avx2 x86-64-sse41-popcnt x86-64-modern x86-64-ssse3 x86-64-sse3-popcnt \ x86-64 x86-32-sse41-popcnt x86-32-sse2 x86-32 ppc-64 ppc-32 e2k \ - armv7 armv7-neon armv8 apple-silicon general-64 general-32)) + armv7 armv7-neon armv8 armv8-dotprod apple-silicon general-64 general-32 riscv64 loongarch64)) SUPPORTED_ARCH=true else SUPPORTED_ARCH=false @@ -140,9 +149,16 @@ avx512 = no vnni256 = no vnni512 = no neon = no +dotprod = no arm_version = 0 STRIP = strip +ifneq ($(shell which clang-format-17 2> /dev/null),) + CLANG-FORMAT = clang-format-17 +else + CLANG-FORMAT = clang-format +endif + ### 2.2 Architecture specific ifeq ($(findstring x86,$(ARCH)),x86) @@ -191,6 +207,8 @@ ifeq ($(findstring -sse41,$(ARCH)),-sse41) endif ifeq ($(findstring -modern,$(ARCH)),-modern) + $(warning *** ARCH=$(ARCH) is deprecated, defaulting to ARCH=x86-64-sse41-popcnt. Execute `make help` for a list of available architectures. ***) + $(shell sleep 5) popcnt = yes sse = yes sse2 = yes @@ -308,11 +326,21 @@ ifeq ($(ARCH),armv8) arm_version = 8 endif +ifeq ($(ARCH),armv8-dotprod) + arch = armv8 + prefetch = yes + popcnt = yes + neon = yes + dotprod = yes + arm_version = 8 +endif + ifeq ($(ARCH),apple-silicon) arch = arm64 prefetch = yes popcnt = yes neon = yes + dotprod = yes arm_version = 8 endif @@ -338,16 +366,30 @@ ifeq ($(findstring e2k,$(ARCH)),e2k) popcnt = yes endif +ifeq ($(ARCH),riscv64) + arch = riscv64 endif +ifeq ($(ARCH),loongarch64) + arch = loongarch64 +endif +endif + + ### ========================================================================== ### Section 3. Low-level Configuration ### ========================================================================== ### 3.1 Selecting compiler (default = gcc) -CXXFLAGS += -Wall -Wcast-qual -fno-exceptions -std=c++17 $(EXTRACXXFLAGS) -DEPENDFLAGS += -std=c++17 -LDFLAGS += $(EXTRALDFLAGS) +ifeq ($(MAKELEVEL),0) + export ENV_CXXFLAGS := $(CXXFLAGS) + export ENV_DEPENDFLAGS := $(DEPENDFLAGS) + export ENV_LDFLAGS := $(LDFLAGS) +endif + +CXXFLAGS = $(ENV_CXXFLAGS) -Wall -Wcast-qual -fno-exceptions -std=c++17 $(EXTRACXXFLAGS) +DEPENDFLAGS = $(ENV_DEPENDFLAGS) -std=c++17 +LDFLAGS = $(ENV_LDFLAGS) $(EXTRALDFLAGS) ifeq ($(COMP),) COMP=gcc @@ -356,13 +398,18 @@ endif ifeq ($(COMP),gcc) comp=gcc CXX=g++ - CXXFLAGS += -pedantic -Wextra -Wshadow + CXXFLAGS += -pedantic -Wextra -Wshadow -Wmissing-declarations - ifeq ($(arch),$(filter $(arch),armv7 armv8)) + ifeq ($(arch),$(filter $(arch),armv7 armv8 riscv64)) ifeq ($(OS),Android) CXXFLAGS += -m$(bits) LDFLAGS += -m$(bits) endif + ifeq ($(ARCH),riscv64) + CXXFLAGS += -latomic + endif + else ifeq ($(ARCH),loongarch64) + CXXFLAGS += -latomic else CXXFLAGS += -m$(bits) LDFLAGS += -m$(bits) @@ -397,13 +444,14 @@ ifeq ($(COMP),mingw) CXX=i686-w64-mingw32-c++-posix endif endif - CXXFLAGS += -pedantic -Wextra -Wshadow + CXXFLAGS += -pedantic -Wextra -Wshadow -Wmissing-declarations endif -ifeq ($(COMP),icc) - comp=icc - CXX=icpc - CXXFLAGS += -diag-disable 1476,10120 -Wcheck -Wabi -Wdeprecated -strict-ansi +ifeq ($(COMP),icx) + comp=icx + CXX=icpx + CXXFLAGS += --intel -pedantic -Wextra -Wshadow -Wmissing-prototypes \ + -Wconditional-uninitialized -Wabi -Wdeprecated endif ifeq ($(COMP),clang) @@ -413,7 +461,8 @@ ifeq ($(COMP),clang) CXX=x86_64-w64-mingw32-clang++ endif - CXXFLAGS += -pedantic -Wextra -Wshadow + CXXFLAGS += -pedantic -Wextra -Wshadow -Wmissing-prototypes \ + -Wconditional-uninitialized ifeq ($(filter $(KERNEL),Darwin OpenBSD FreeBSD),) ifeq ($(target_windows),) @@ -423,11 +472,16 @@ ifeq ($(COMP),clang) endif endif - ifeq ($(arch),$(filter $(arch),armv7 armv8)) + ifeq ($(arch),$(filter $(arch),armv7 armv8 riscv64)) ifeq ($(OS),Android) CXXFLAGS += -m$(bits) LDFLAGS += -m$(bits) endif + ifeq ($(ARCH),riscv64) + CXXFLAGS += -latomic + endif + else ifeq ($(ARCH),loongarch64) + CXXFLAGS += -latomic else CXXFLAGS += -m$(bits) LDFLAGS += -m$(bits) @@ -467,12 +521,20 @@ ifeq ($(COMP),ndk) STRIP=llvm-strip endif endif + ifeq ($(arch),x86_64) + CXX=x86_64-linux-android21-clang++ + ifneq ($(shell which x86_64-linux-android-strip 2>/dev/null),) + STRIP=x86_64-linux-android-strip + else + STRIP=llvm-strip + endif + endif LDFLAGS += -static-libstdc++ -pie -lm -latomic endif -ifeq ($(comp),icc) - profile_make = icc-profile-make - profile_use = icc-profile-use +ifeq ($(comp),icx) + profile_make = icx-profile-make + profile_use = icx-profile-use else ifeq ($(comp),clang) profile_make = clang-profile-make profile_use = clang-profile-use @@ -484,11 +546,6 @@ else endif endif -### Travis CI script uses COMPILER to overwrite CXX -ifdef COMPILER - COMPCXX=$(COMPILER) -endif - ### Allow overwriting CXX from command line ifdef COMPCXX CXX=$(COMPCXX) @@ -496,8 +553,8 @@ endif ### Sometimes gcc is really clang ifeq ($(COMP),gcc) - gccversion = $(shell $(CXX) --version) - gccisclang = $(findstring clang,$(gccversion)) + gccversion := $(shell $(CXX) --version 2>/dev/null) + gccisclang := $(findstring clang,$(gccversion)) ifneq ($(gccisclang),) profile_make = clang-profile-make profile_use = clang-profile-use @@ -534,7 +591,7 @@ endif ### 3.3 Optimization ifeq ($(optimize),yes) - CXXFLAGS += -O3 + CXXFLAGS += -O3 -funroll-loops ifeq ($(comp),gcc) ifeq ($(OS), Android) @@ -542,20 +599,23 @@ ifeq ($(optimize),yes) endif endif - ifeq ($(KERNEL),Darwin) - ifeq ($(comp),$(filter $(comp),clang icc)) - CXXFLAGS += -mdynamic-no-pic - endif + ifeq ($(KERNEL),Darwin) + ifeq ($(comp),$(filter $(comp),clang icx)) + CXXFLAGS += -mdynamic-no-pic + endif - ifeq ($(comp),gcc) - ifneq ($(arch),arm64) - CXXFLAGS += -mdynamic-no-pic - endif - endif - endif + ifeq ($(comp),gcc) + ifneq ($(arch),arm64) + CXXFLAGS += -mdynamic-no-pic + endif + endif + endif ifeq ($(comp),clang) - CXXFLAGS += -fexperimental-new-pass-manager + clangmajorversion := $(shell $(CXX) -dumpversion 2>/dev/null | cut -f1 -d.) + ifeq ($(shell expr $(clangmajorversion) \< 16),1) + CXXFLAGS += -fexperimental-new-pass-manager + endif endif endif @@ -576,8 +636,6 @@ endif ifeq ($(popcnt),yes) ifeq ($(arch),$(filter $(arch),ppc64 armv7 armv8 arm64)) CXXFLAGS += -DUSE_POPCNT - else ifeq ($(comp),icc) - CXXFLAGS += -msse3 -DUSE_POPCNT else CXXFLAGS += -msse3 -mpopcnt -DUSE_POPCNT endif @@ -586,63 +644,62 @@ endif ### 3.6 SIMD architectures ifeq ($(avx2),yes) CXXFLAGS += -DUSE_AVX2 - ifeq ($(comp),$(filter $(comp),gcc clang mingw)) - CXXFLAGS += -mavx2 + ifeq ($(comp),$(filter $(comp),gcc clang mingw icx)) + CXXFLAGS += -mavx2 -mbmi endif endif ifeq ($(avxvnni),yes) CXXFLAGS += -DUSE_VNNI -DUSE_AVXVNNI - ifeq ($(comp),$(filter $(comp),gcc clang mingw)) + ifeq ($(comp),$(filter $(comp),gcc clang mingw icx)) CXXFLAGS += -mavxvnni endif endif ifeq ($(avx512),yes) CXXFLAGS += -DUSE_AVX512 - ifeq ($(comp),$(filter $(comp),gcc clang mingw)) + ifeq ($(comp),$(filter $(comp),gcc clang mingw icx)) CXXFLAGS += -mavx512f -mavx512bw endif endif ifeq ($(vnni256),yes) CXXFLAGS += -DUSE_VNNI - ifeq ($(comp),$(filter $(comp),gcc clang mingw)) + ifeq ($(comp),$(filter $(comp),gcc clang mingw icx)) CXXFLAGS += -mavx512f -mavx512bw -mavx512vnni -mavx512dq -mavx512vl -mprefer-vector-width=256 endif endif ifeq ($(vnni512),yes) CXXFLAGS += -DUSE_VNNI - ifeq ($(comp),$(filter $(comp),gcc clang mingw)) - CXXFLAGS += -mavx512vnni -mavx512dq -mavx512vl + ifeq ($(comp),$(filter $(comp),gcc clang mingw icx)) + CXXFLAGS += -mavx512f -mavx512bw -mavx512vnni -mavx512dq -mavx512vl -mprefer-vector-width=512 endif endif ifeq ($(sse41),yes) CXXFLAGS += -DUSE_SSE41 - ifeq ($(comp),$(filter $(comp),gcc clang mingw)) + ifeq ($(comp),$(filter $(comp),gcc clang mingw icx)) CXXFLAGS += -msse4.1 endif endif ifeq ($(ssse3),yes) CXXFLAGS += -DUSE_SSSE3 - ifeq ($(comp),$(filter $(comp),gcc clang mingw)) + ifeq ($(comp),$(filter $(comp),gcc clang mingw icx)) CXXFLAGS += -mssse3 endif endif ifeq ($(sse2),yes) CXXFLAGS += -DUSE_SSE2 - ifeq ($(comp),$(filter $(comp),gcc clang mingw)) + ifeq ($(comp),$(filter $(comp),gcc clang mingw icx)) CXXFLAGS += -msse2 endif endif ifeq ($(mmx),yes) - CXXFLAGS += -DUSE_MMX - ifeq ($(comp),$(filter $(comp),gcc clang mingw)) + ifeq ($(comp),$(filter $(comp),gcc clang mingw icx)) CXXFLAGS += -mmmx endif endif @@ -658,21 +715,45 @@ ifeq ($(neon),yes) endif endif +ifeq ($(dotprod),yes) + CXXFLAGS += -march=armv8.2-a+dotprod -DUSE_NEON_DOTPROD +endif + ### 3.7 pext ifeq ($(pext),yes) CXXFLAGS += -DUSE_PEXT - ifeq ($(comp),$(filter $(comp),gcc clang mingw)) + ifeq ($(comp),$(filter $(comp),gcc clang mingw icx)) CXXFLAGS += -mbmi2 endif endif -### 3.8 Link Time Optimization +### 3.8.1 Try to include git commit sha for versioning +GIT_SHA := $(shell git rev-parse HEAD 2>/dev/null | cut -c 1-8) +ifneq ($(GIT_SHA), ) + CXXFLAGS += -DGIT_SHA=$(GIT_SHA) +endif + +### 3.8.2 Try to include git commit date for versioning +GIT_DATE := $(shell git show -s --date=format:'%Y%m%d' --format=%cd HEAD 2>/dev/null) +ifneq ($(GIT_DATE), ) + CXXFLAGS += -DGIT_DATE=$(GIT_DATE) +endif + +### 3.8.3 Try to include architecture +ifneq ($(ARCH), ) + CXXFLAGS += -DARCH=$(ARCH) +endif + +### 3.9 Link Time Optimization ### This is a mix of compile and link time options because the lto link phase ### needs access to the optimization flags. ifeq ($(optimize),yes) ifeq ($(debug), no) - ifeq ($(comp),clang) - CXXFLAGS += -flto + ifeq ($(comp),$(filter $(comp),clang icx)) + CXXFLAGS += -flto=full + ifeq ($(comp),icx) + CXXFLAGS += -fwhole-program-vtables + endif ifeq ($(target_windows),yes) CXXFLAGS += -fuse-ld=lld endif @@ -682,25 +763,23 @@ ifeq ($(debug), no) # GCC on some systems. else ifeq ($(comp),gcc) ifeq ($(gccisclang),) - CXXFLAGS += -flto + CXXFLAGS += -flto -flto-partition=one LDFLAGS += $(CXXFLAGS) -flto=jobserver else - CXXFLAGS += -flto + CXXFLAGS += -flto=full LDFLAGS += $(CXXFLAGS) endif # To use LTO and static linking on Windows, # the tool chain requires gcc version 10.1 or later. else ifeq ($(comp),mingw) - ifneq ($(arch),i386) - CXXFLAGS += -flto + CXXFLAGS += -flto -flto-partition=one LDFLAGS += $(CXXFLAGS) -save-temps endif - endif endif endif -### 3.9 Android 5 can only run position independent executables. Note that this +### 3.10 Android 5 can only run position independent executables. Note that this ### breaks Android 4.0 and earlier. ifeq ($(OS), Android) CXXFLAGS += -fPIE @@ -711,81 +790,87 @@ endif ### Section 4. Public Targets ### ========================================================================== - help: @echo "" @echo "To compile stockfish, type: " @echo "" - @echo "make target ARCH=arch [COMP=compiler] [COMPCXX=cxx]" + @echo "make -j target [ARCH=arch] [COMP=compiler] [COMPCXX=cxx]" @echo "" @echo "Supported targets:" @echo "" @echo "help > Display architecture details" - @echo "build > Standard build" - @echo "net > Download the default nnue net" - @echo "profile-build > Faster build (with profile-guided optimization)" + @echo "profile-build > standard build with profile-guided optimization" + @echo "build > skip profile-guided optimization" + @echo "net > Download the default nnue nets" @echo "strip > Strip executable" @echo "install > Install executable" @echo "clean > Clean up" @echo "" @echo "Supported archs:" @echo "" - @echo "x86-64-vnni512 > x86 64-bit with vnni support 512bit wide" - @echo "x86-64-vnni256 > x86 64-bit with vnni support 256bit wide" + @echo "native > select the best architecture for the host processor (default)" + @echo "x86-64-vnni512 > x86 64-bit with vnni 512bit support" + @echo "x86-64-vnni256 > x86 64-bit with vnni 512bit support, limit operands to 256bit wide" @echo "x86-64-avx512 > x86 64-bit with avx512 support" - @echo "x86-64-avxvnni > x86 64-bit with avxvnni support" + @echo "x86-64-avxvnni > x86 64-bit with vnni 256bit support" @echo "x86-64-bmi2 > x86 64-bit with bmi2 support" @echo "x86-64-avx2 > x86 64-bit with avx2 support" @echo "x86-64-sse41-popcnt > x86 64-bit with sse41 and popcnt support" - @echo "x86-64-modern > common modern CPU, currently x86-64-sse41-popcnt" + @echo "x86-64-modern > deprecated, currently x86-64-sse41-popcnt" @echo "x86-64-ssse3 > x86 64-bit with ssse3 support" - @echo "x86-64-sse3-popcnt > x86 64-bit with sse3 and popcnt support" + @echo "x86-64-sse3-popcnt > x86 64-bit with sse3 compile and popcnt support" @echo "x86-64 > x86 64-bit generic (with sse2 support)" @echo "x86-32-sse41-popcnt > x86 32-bit with sse41 and popcnt support" @echo "x86-32-sse2 > x86 32-bit with sse2 support" - @echo "x86-32 > x86 32-bit generic (with mmx and sse support)" + @echo "x86-32 > x86 32-bit generic (with mmx compile support)" @echo "ppc-64 > PPC 64-bit" @echo "ppc-32 > PPC 32-bit" @echo "armv7 > ARMv7 32-bit" @echo "armv7-neon > ARMv7 32-bit with popcnt and neon" @echo "armv8 > ARMv8 64-bit with popcnt and neon" + @echo "armv8-dotprod > ARMv8 64-bit with popcnt, neon and dot product support" @echo "e2k > Elbrus 2000" @echo "apple-silicon > Apple silicon ARM64" @echo "general-64 > unspecified 64-bit" @echo "general-32 > unspecified 32-bit" + @echo "riscv64 > RISC-V 64-bit" + @echo "loongarch64 > LoongArch 64-bit" @echo "" @echo "Supported compilers:" @echo "" - @echo "gcc > Gnu compiler (default)" - @echo "mingw > Gnu compiler with MinGW under Windows" + @echo "gcc > GNU compiler (default)" + @echo "mingw > GNU compiler with MinGW under Windows" @echo "clang > LLVM Clang compiler" - @echo "icc > Intel compiler" + @echo "icx > Intel oneAPI DPC++/C++ Compiler" @echo "ndk > Google NDK to cross-compile for Android" @echo "" - @echo "Simple examples. If you don't know what to do, you likely want to run: " + @echo "Simple examples. If you don't know what to do, you likely want to run one of: " @echo "" - @echo "make -j build ARCH=x86-64 (A portable, slow compile for 64-bit systems)" - @echo "make -j build ARCH=x86-32 (A portable, slow compile for 32-bit systems)" + @echo "make -j profile-build ARCH=x86-64-avx2 # typically a fast compile for common systems " + @echo "make -j profile-build ARCH=x86-64-sse41-popcnt # A more portable compile for 64-bit systems " + @echo "make -j profile-build ARCH=x86-64 # A portable compile for 64-bit systems " @echo "" - @echo "Advanced examples, for experienced users looking for performance: " + @echo "Advanced examples, for experienced users: " @echo "" - @echo "make help ARCH=x86-64-bmi2" - @echo "make -j profile-build ARCH=x86-64-bmi2 COMP=gcc COMPCXX=g++-9.0" + @echo "make -j profile-build ARCH=x86-64-avxvnni" + @echo "make -j profile-build ARCH=x86-64-avxvnni COMP=gcc COMPCXX=g++-12.0" @echo "make -j build ARCH=x86-64-ssse3 COMP=clang" @echo "" - @echo "-------------------------------" -ifeq ($(SUPPORTED_ARCH)$(help_skip_sanity), true) - @echo "The selected architecture $(ARCH) will enable the following configuration: " - @$(MAKE) ARCH=$(ARCH) COMP=$(COMP) config-sanity -else +ifneq ($(SUPPORTED_ARCH), true) @echo "Specify a supported architecture with the ARCH option for more details" @echo "" endif -.PHONY: help build profile-build strip install clean net objclean profileclean \ - config-sanity icc-profile-use icc-profile-make gcc-profile-use gcc-profile-make \ - clang-profile-use clang-profile-make +.PHONY: help analyze build profile-build strip install clean net \ + objclean profileclean config-sanity \ + icx-profile-use icx-profile-make \ + gcc-profile-use gcc-profile-make \ + clang-profile-use clang-profile-make FORCE \ + format analyze + +analyze: net config-sanity objclean + $(MAKE) -k ARCH=$(ARCH) COMP=$(COMP) $(OBJS) build: net config-sanity $(MAKE) ARCH=$(ARCH) COMP=$(COMP) all @@ -796,7 +881,8 @@ profile-build: net config-sanity objclean profileclean $(MAKE) ARCH=$(ARCH) COMP=$(COMP) $(profile_make) @echo "" @echo "Step 2/4. Running benchmark for pgo-build ..." - $(PGOBENCH) 2>&1 | tail -n 4 + $(PGOBENCH) > PGOBENCH.out 2>&1 + tail -n 4 PGOBENCH.out @echo "" @echo "Step 3/4. Building optimized executable ..." $(MAKE) ARCH=$(ARCH) COMP=$(COMP) objclean @@ -817,30 +903,6 @@ install: clean: objclean profileclean @rm -f .depend *~ core -# evaluation network (nnue) -net: - $(eval nnuenet := $(shell grep EvalFileDefaultName evaluate.h | grep define | sed 's/.*\(nn-[a-z0-9]\{12\}.nnue\).*/\1/')) - @echo "Default net: $(nnuenet)" - $(eval nnuedownloadurl := https://tests.stockfishchess.org/api/nn/$(nnuenet)) - $(eval curl_or_wget := $(shell if hash curl 2>/dev/null; then echo "curl -skL"; elif hash wget 2>/dev/null; then echo "wget -qO-"; fi)) - @if test -f "$(nnuenet)"; then \ - echo "Already available."; \ - else \ - if [ "x$(curl_or_wget)" = "x" ]; then \ - echo "Automatic download failed: neither curl nor wget is installed. Install one of these tools or download the net manually"; exit 1; \ - else \ - echo "Downloading $(nnuedownloadurl)"; $(curl_or_wget) $(nnuedownloadurl) > $(nnuenet);\ - fi; \ - fi; - $(eval shasum_command := $(shell if hash shasum 2>/dev/null; then echo "shasum -a 256 "; elif hash sha256sum 2>/dev/null; then echo "sha256sum "; fi)) - @if [ "x$(shasum_command)" != "x" ]; then \ - if [ "$(nnuenet)" != "nn-"`$(shasum_command) $(nnuenet) | cut -c1-12`".nnue" ]; then \ - echo "Failed download or $(nnuenet) corrupted, please delete!"; exit 1; \ - fi \ - else \ - echo "shasum / sha256sum not found, skipping net validation"; \ - fi - # clean binaries and objects objclean: @rm -f stockfish stockfish.exe *.o ./syzygy/*.o ./nnue/*.o ./nnue/features/*.o @@ -848,13 +910,71 @@ objclean: # clean auxiliary profiling files profileclean: @rm -rf profdir - @rm -f bench.txt *.gcda *.gcno ./syzygy/*.gcda ./nnue/*.gcda ./nnue/features/*.gcda *.s + @rm -f bench.txt *.gcda *.gcno ./syzygy/*.gcda ./nnue/*.gcda ./nnue/features/*.gcda *.s PGOBENCH.out @rm -f stockfish.profdata *.profraw @rm -f stockfish.*args* @rm -f stockfish.*lt* @rm -f stockfish.res @rm -f ./-lstdc++.res +define fetch_network + @echo "Default net: $(nnuenet)" + @if [ "x$(curl_or_wget)" = "x" ]; then \ + echo "Neither curl nor wget is installed. Install one of these tools unless the net has been downloaded manually"; \ + fi + @if [ "x$(shasum_command)" = "x" ]; then \ + echo "shasum / sha256sum not found, skipping net validation"; \ + elif test -f "$(nnuenet)"; then \ + if [ "$(nnuenet)" != "nn-"`$(shasum_command) $(nnuenet) | cut -c1-12`".nnue" ]; then \ + echo "Removing invalid network"; rm -f $(nnuenet); \ + fi; \ + fi; + @for nnuedownloadurl in "$(nnuedownloadurl1)" "$(nnuedownloadurl2)"; do \ + if test -f "$(nnuenet)"; then \ + echo "$(nnuenet) available : OK"; break; \ + else \ + if [ "x$(curl_or_wget)" != "x" ]; then \ + echo "Downloading $${nnuedownloadurl}"; $(curl_or_wget) $${nnuedownloadurl} > $(nnuenet);\ + else \ + echo "No net found and download not possible"; exit 1;\ + fi; \ + fi; \ + if [ "x$(shasum_command)" != "x" ]; then \ + if [ "$(nnuenet)" != "nn-"`$(shasum_command) $(nnuenet) | cut -c1-12`".nnue" ]; then \ + echo "Removing failed download"; rm -f $(nnuenet); \ + fi; \ + fi; \ + done + @if ! test -f "$(nnuenet)"; then \ + echo "Failed to download $(nnuenet)."; \ + fi; + @if [ "x$(shasum_command)" != "x" ]; then \ + if [ "$(nnuenet)" = "nn-"`$(shasum_command) $(nnuenet) | cut -c1-12`".nnue" ]; then \ + echo "Network validated"; break; \ + fi; \ + fi; +endef + +# set up shell variables for the net stuff +define netvariables +$(eval nnuenet := $(shell grep $(1) evaluate.h | grep define | sed 's/.*\(nn-[a-z0-9]\{12\}.nnue\).*/\1/')) +$(eval nnuedownloadurl1 := https://tests.stockfishchess.org/api/nn/$(nnuenet)) +$(eval nnuedownloadurl2 := https://github.com/official-stockfish/networks/raw/master/$(nnuenet)) +$(eval curl_or_wget := $(shell if hash curl 2>/dev/null; then echo "curl -skL"; elif hash wget 2>/dev/null; then echo "wget -qO-"; fi)) +$(eval shasum_command := $(shell if hash shasum 2>/dev/null; then echo "shasum -a 256 "; elif hash sha256sum 2>/dev/null; then echo "sha256sum "; fi)) +endef + +# evaluation network (nnue) +net: + $(call netvariables, EvalFileDefaultNameBig) + $(call fetch_network) + $(call netvariables, EvalFileDefaultNameSmall) + $(call fetch_network) + +format: + $(CLANG-FORMAT) -i $(SRCS) $(HEADERS) -style=file + +# default target default: help @@ -888,7 +1008,9 @@ config-sanity: net @echo "vnni256: '$(vnni256)'" @echo "vnni512: '$(vnni512)'" @echo "neon: '$(neon)'" + @echo "dotprod: '$(dotprod)'" @echo "arm_version: '$(arm_version)'" + @echo "target_windows: '$(target_windows)'" @echo "" @echo "Flags:" @echo "CXX: $(CXX)" @@ -902,7 +1024,7 @@ config-sanity: net @test "$(SUPPORTED_ARCH)" = "true" @test "$(arch)" = "any" || test "$(arch)" = "x86_64" || test "$(arch)" = "i386" || \ test "$(arch)" = "ppc64" || test "$(arch)" = "ppc" || test "$(arch)" = "e2k" || \ - test "$(arch)" = "armv7" || test "$(arch)" = "armv8" || test "$(arch)" = "arm64" + test "$(arch)" = "armv7" || test "$(arch)" = "armv8" || test "$(arch)" = "arm64" || test "$(arch)" = "riscv64" || test "$(arch)" = "loongarch64" @test "$(bits)" = "32" || test "$(bits)" = "64" @test "$(prefetch)" = "yes" || test "$(prefetch)" = "no" @test "$(popcnt)" = "yes" || test "$(popcnt)" = "no" @@ -917,12 +1039,16 @@ config-sanity: net @test "$(vnni256)" = "yes" || test "$(vnni256)" = "no" @test "$(vnni512)" = "yes" || test "$(vnni512)" = "no" @test "$(neon)" = "yes" || test "$(neon)" = "no" - @test "$(comp)" = "gcc" || test "$(comp)" = "icc" || test "$(comp)" = "mingw" || test "$(comp)" = "clang" \ + @test "$(comp)" = "gcc" || test "$(comp)" = "icx" || test "$(comp)" = "mingw" || test "$(comp)" = "clang" \ || test "$(comp)" = "armv7a-linux-androideabi16-clang" || test "$(comp)" = "aarch64-linux-android21-clang" $(EXE): $(OBJS) +$(CXX) -o $@ $(OBJS) $(LDFLAGS) +# Force recompilation to ensure version info is up-to-date +misc.o: FORCE +FORCE: + clang-profile-make: $(MAKE) ARCH=$(ARCH) COMP=$(COMP) \ EXTRACXXFLAGS='-fprofile-instr-generate ' \ @@ -951,18 +1077,22 @@ gcc-profile-use: EXTRALDFLAGS='-lgcov' \ all -icc-profile-make: - @mkdir -p profdir +icx-profile-make: $(MAKE) ARCH=$(ARCH) COMP=$(COMP) \ - EXTRACXXFLAGS='-prof-gen=srcpos -prof_dir ./profdir' \ + EXTRACXXFLAGS='-fprofile-instr-generate ' \ + EXTRALDFLAGS=' -fprofile-instr-generate' \ all -icc-profile-use: +icx-profile-use: + $(XCRUN) llvm-profdata merge -output=stockfish.profdata *.profraw $(MAKE) ARCH=$(ARCH) COMP=$(COMP) \ - EXTRACXXFLAGS='-prof_use -prof_dir ./profdir' \ + EXTRACXXFLAGS='-fprofile-instr-use=stockfish.profdata' \ + EXTRALDFLAGS='-fprofile-use ' \ all .depend: $(SRCS) -@$(CXX) $(DEPENDFLAGS) -MM $(SRCS) > $@ 2> /dev/null +ifeq (, $(filter $(MAKECMDGOALS), help strip install clean net objclean profileclean config-sanity)) -include .depend +endif diff --git a/src/benchmark.cpp b/src/benchmark.cpp index e1c025ad..3622ac8a 100644 --- a/src/benchmark.cpp +++ b/src/benchmark.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -16,18 +16,17 @@ along with this program. If not, see . */ +#include "benchmark.h" + +#include #include #include -#include #include -#include "position.h" - -using namespace std; - namespace { -const vector Defaults = { +// clang-format off +const std::vector Defaults = { "setoption name UCI_Chess960 value false", "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1", "r3k2r/p1ppqpb1/bn2pnp1/3PN3/1p2P3/2N2Q1p/PPPBBPPP/R3K2R w KQkq - 0 10", @@ -90,86 +89,75 @@ const vector Defaults = { "nqbnrkrb/pppppppp/8/8/8/8/PPPPPPPP/NQBNRKRB w KQkq - 0 1", "setoption name UCI_Chess960 value false" }; +// clang-format on -} // namespace +} // namespace -namespace Stockfish { +namespace Stockfish::Benchmark { -/// setup_bench() builds a list of UCI commands to be run by bench. There -/// are five parameters: TT size in MB, number of search threads that -/// should be used, the limit value spent for each position, a file name -/// where to look for positions in FEN format, the type of the limit: -/// depth, perft, nodes and movetime (in millisecs), and evaluation type -/// mixed (default), classical, NNUE. -/// -/// bench -> search default positions up to depth 13 -/// bench 64 1 15 -> search default positions up to depth 15 (TT = 64MB) -/// bench 64 4 5000 current movetime -> search current position with 4 threads for 5 sec -/// bench 64 1 100000 default nodes -> search default positions for 100K nodes each -/// bench 16 1 5 default perft -> run a perft 5 on default positions +// Builds a list of UCI commands to be run by bench. There +// are five parameters: TT size in MB, number of search threads that +// should be used, the limit value spent for each position, a file name +// where to look for positions in FEN format, and the type of the limit: +// depth, perft, nodes and movetime (in milliseconds). Examples: +// +// bench : search default positions up to depth 13 +// bench 64 1 15 : search default positions up to depth 15 (TT = 64MB) +// bench 64 1 100000 default nodes : search default positions for 100K nodes each +// bench 64 4 5000 current movetime : search current position with 4 threads for 5 sec +// bench 16 1 5 blah perft : run a perft 5 on positions in file "blah" +std::vector setup_bench(const std::string& currentFen, std::istream& is) { -vector setup_bench(const Position& current, istream& is) { + std::vector fens, list; + std::string go, token; - vector fens, list; - string go, token; + // Assign default values to missing arguments + std::string ttSize = (is >> token) ? token : "16"; + std::string threads = (is >> token) ? token : "1"; + std::string limit = (is >> token) ? token : "13"; + std::string fenFile = (is >> token) ? token : "default"; + std::string limitType = (is >> token) ? token : "depth"; - // Assign default values to missing arguments - string ttSize = (is >> token) ? token : "16"; - string threads = (is >> token) ? token : "1"; - string limit = (is >> token) ? token : "13"; - string fenFile = (is >> token) ? token : "default"; - string limitType = (is >> token) ? token : "depth"; - string evalType = (is >> token) ? token : "mixed"; + go = limitType == "eval" ? "eval" : "go " + limitType + " " + limit; - go = limitType == "eval" ? "eval" : "go " + limitType + " " + limit; + if (fenFile == "default") + fens = Defaults; - if (fenFile == "default") - fens = Defaults; + else if (fenFile == "current") + fens.push_back(currentFen); - else if (fenFile == "current") - fens.push_back(current.fen()); + else + { + std::string fen; + std::ifstream file(fenFile); - else - { - string fen; - ifstream file(fenFile); + if (!file.is_open()) + { + std::cerr << "Unable to open file " << fenFile << std::endl; + exit(EXIT_FAILURE); + } - if (!file.is_open()) - { - cerr << "Unable to open file " << fenFile << endl; - exit(EXIT_FAILURE); - } + while (getline(file, fen)) + if (!fen.empty()) + fens.push_back(fen); - while (getline(file, fen)) - if (!fen.empty()) - fens.push_back(fen); + file.close(); + } - file.close(); - } + list.emplace_back("setoption name Threads value " + threads); + list.emplace_back("setoption name Hash value " + ttSize); + list.emplace_back("ucinewgame"); - list.emplace_back("setoption name Threads value " + threads); - list.emplace_back("setoption name Hash value " + ttSize); - list.emplace_back("ucinewgame"); + for (const std::string& fen : fens) + if (fen.find("setoption") != std::string::npos) + list.emplace_back(fen); + else + { + list.emplace_back("position fen " + fen); + list.emplace_back(go); + } - size_t posCounter = 0; - - for (const string& fen : fens) - if (fen.find("setoption") != string::npos) - list.emplace_back(fen); - else - { - if (evalType == "classical" || (evalType == "mixed" && posCounter % 2 == 0)) - list.emplace_back("setoption name Use NNUE value false"); - else if (evalType == "NNUE" || (evalType == "mixed" && posCounter % 2 != 0)) - list.emplace_back("setoption name Use NNUE value true"); - list.emplace_back("position fen " + fen); - list.emplace_back(go); - ++posCounter; - } - - list.emplace_back("setoption name Use NNUE value true"); - - return list; + return list; } -} // namespace Stockfish +} // namespace Stockfish \ No newline at end of file diff --git a/src/psqt.h b/src/benchmark.h similarity index 67% rename from src/psqt.h rename to src/benchmark.h index 4ee0e379..b1eba40f 100644 --- a/src/psqt.h +++ b/src/benchmark.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -16,23 +16,17 @@ along with this program. If not, see . */ +#ifndef BENCHMARK_H_INCLUDED +#define BENCHMARK_H_INCLUDED -#ifndef PSQT_H_INCLUDED -#define PSQT_H_INCLUDED +#include +#include +#include +namespace Stockfish::Benchmark { -#include "types.h" +std::vector setup_bench(const std::string&, std::istream&); +} // namespace Stockfish -namespace Stockfish::PSQT -{ - -extern Score psq[PIECE_NB][SQUARE_NB]; - -// Fill psqt array from a set of internally linked parameters -extern void init(); - -} // namespace Stockfish::PSQT - - -#endif // PSQT_H_INCLUDED +#endif // #ifndef BENCHMARK_H_INCLUDED diff --git a/src/bitbase.cpp b/src/bitbase.cpp deleted file mode 100644 index 84300baf..00000000 --- a/src/bitbase.cpp +++ /dev/null @@ -1,172 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include -#include -#include - -#include "bitboard.h" -#include "types.h" - -namespace Stockfish { - -namespace { - - // There are 24 possible pawn squares: files A to D and ranks from 2 to 7. - // Positions with the pawn on files E to H will be mirrored before probing. - constexpr unsigned MAX_INDEX = 2*24*64*64; // stm * psq * wksq * bksq = 196608 - - std::bitset KPKBitbase; - - // A KPK bitbase index is an integer in [0, IndexMax] range - // - // Information is mapped in a way that minimizes the number of iterations: - // - // bit 0- 5: white king square (from SQ_A1 to SQ_H8) - // bit 6-11: black king square (from SQ_A1 to SQ_H8) - // bit 12: side to move (WHITE or BLACK) - // bit 13-14: white pawn file (from FILE_A to FILE_D) - // bit 15-17: white pawn RANK_7 - rank (from RANK_7 - RANK_7 to RANK_7 - RANK_2) - unsigned index(Color stm, Square bksq, Square wksq, Square psq) { - return int(wksq) | (bksq << 6) | (stm << 12) | (file_of(psq) << 13) | ((RANK_7 - rank_of(psq)) << 15); - } - - enum Result { - INVALID = 0, - UNKNOWN = 1, - DRAW = 2, - WIN = 4 - }; - - Result& operator|=(Result& r, Result v) { return r = Result(r | v); } - - struct KPKPosition { - KPKPosition() = default; - explicit KPKPosition(unsigned idx); - operator Result() const { return result; } - Result classify(const std::vector& db); - - Color stm; - Square ksq[COLOR_NB], psq; - Result result; - }; - -} // namespace - -bool Bitbases::probe(Square wksq, Square wpsq, Square bksq, Color stm) { - - assert(file_of(wpsq) <= FILE_D); - - return KPKBitbase[index(stm, bksq, wksq, wpsq)]; -} - - -void Bitbases::init() { - - std::vector db(MAX_INDEX); - unsigned idx, repeat = 1; - - // Initialize db with known win / draw positions - for (idx = 0; idx < MAX_INDEX; ++idx) - db[idx] = KPKPosition(idx); - - // Iterate through the positions until none of the unknown positions can be - // changed to either wins or draws (15 cycles needed). - while (repeat) - for (repeat = idx = 0; idx < MAX_INDEX; ++idx) - repeat |= (db[idx] == UNKNOWN && db[idx].classify(db) != UNKNOWN); - - // Fill the bitbase with the decisive results - for (idx = 0; idx < MAX_INDEX; ++idx) - if (db[idx] == WIN) - KPKBitbase.set(idx); -} - -namespace { - - KPKPosition::KPKPosition(unsigned idx) { - - ksq[WHITE] = Square((idx >> 0) & 0x3F); - ksq[BLACK] = Square((idx >> 6) & 0x3F); - stm = Color ((idx >> 12) & 0x01); - psq = make_square(File((idx >> 13) & 0x3), Rank(RANK_7 - ((idx >> 15) & 0x7))); - - // Invalid if two pieces are on the same square or if a king can be captured - if ( distance(ksq[WHITE], ksq[BLACK]) <= 1 - || ksq[WHITE] == psq - || ksq[BLACK] == psq - || (stm == WHITE && (pawn_attacks_bb(WHITE, psq) & ksq[BLACK]))) - result = INVALID; - - // Win if the pawn can be promoted without getting captured - else if ( stm == WHITE - && rank_of(psq) == RANK_7 - && ksq[WHITE] != psq + NORTH - && ( distance(ksq[BLACK], psq + NORTH) > 1 - || (distance(ksq[WHITE], psq + NORTH) == 1))) - result = WIN; - - // Draw if it is stalemate or the black king can capture the pawn - else if ( stm == BLACK - && ( !(attacks_bb(ksq[BLACK]) & ~(attacks_bb(ksq[WHITE]) | pawn_attacks_bb(WHITE, psq))) - || (attacks_bb(ksq[BLACK]) & ~attacks_bb(ksq[WHITE]) & psq))) - result = DRAW; - - // Position will be classified later - else - result = UNKNOWN; - } - - Result KPKPosition::classify(const std::vector& db) { - - // White to move: If one move leads to a position classified as WIN, the result - // of the current position is WIN. If all moves lead to positions classified - // as DRAW, the current position is classified as DRAW, otherwise the current - // position is classified as UNKNOWN. - // - // Black to move: If one move leads to a position classified as DRAW, the result - // of the current position is DRAW. If all moves lead to positions classified - // as WIN, the position is classified as WIN, otherwise the current position is - // classified as UNKNOWN. - const Result Good = (stm == WHITE ? WIN : DRAW); - const Result Bad = (stm == WHITE ? DRAW : WIN); - - Result r = INVALID; - Bitboard b = attacks_bb(ksq[stm]); - - while (b) - r |= stm == WHITE ? db[index(BLACK, ksq[BLACK], pop_lsb(b), psq)] - : db[index(WHITE, pop_lsb(b), ksq[WHITE], psq)]; - - if (stm == WHITE) - { - if (rank_of(psq) < RANK_7) // Single push - r |= db[index(BLACK, ksq[BLACK], ksq[WHITE], psq + NORTH)]; - - if ( rank_of(psq) == RANK_2 // Double push - && psq + NORTH != ksq[WHITE] - && psq + NORTH != ksq[BLACK]) - r |= db[index(BLACK, ksq[BLACK], ksq[WHITE], psq + NORTH + NORTH)]; - } - - return result = r & Good ? Good : r & UNKNOWN ? UNKNOWN : Bad; - } - -} // namespace - -} // namespace Stockfish diff --git a/src/bitboard.cpp b/src/bitboard.cpp index fd0ba235..32c626d4 100644 --- a/src/bitboard.cpp +++ b/src/bitboard.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -16,10 +16,12 @@ along with this program. If not, see . */ +#include "bitboard.h" + #include #include +#include -#include "bitboard.h" #include "misc.h" namespace Stockfish { @@ -27,7 +29,6 @@ namespace Stockfish { uint8_t PopCnt16[1 << 16]; uint8_t SquareDistance[SQUARE_NB][SQUARE_NB]; -Bitboard SquareBB[SQUARE_NB]; Bitboard LineBB[SQUARE_NB][SQUARE_NB]; Bitboard BetweenBB[SQUARE_NB][SQUARE_NB]; Bitboard PseudoAttacks[PIECE_TYPE_NB][SQUARE_NB]; @@ -38,93 +39,86 @@ Magic BishopMagics[SQUARE_NB]; namespace { - Bitboard RookTable[0x19000]; // To store rook attacks - Bitboard BishopTable[0x1480]; // To store bishop attacks +Bitboard RookTable[0x19000]; // To store rook attacks +Bitboard BishopTable[0x1480]; // To store bishop attacks - void init_magics(PieceType pt, Bitboard table[], Magic magics[]); +void init_magics(PieceType pt, Bitboard table[], Magic magics[]); -} - -/// safe_destination() returns the bitboard of target square for the given step -/// from the given square. If the step is off the board, returns empty bitboard. - -inline Bitboard safe_destination(Square s, int step) { +// Returns the bitboard of target square for the given step +// from the given square. If the step is off the board, returns empty bitboard. +Bitboard safe_destination(Square s, int step) { Square to = Square(s + step); return is_ok(to) && distance(s, to) <= 2 ? square_bb(to) : Bitboard(0); } +} - -/// Bitboards::pretty() returns an ASCII representation of a bitboard suitable -/// to be printed to standard output. Useful for debugging. - +// Returns an ASCII representation of a bitboard suitable +// to be printed to standard output. Useful for debugging. std::string Bitboards::pretty(Bitboard b) { - std::string s = "+---+---+---+---+---+---+---+---+\n"; + std::string s = "+---+---+---+---+---+---+---+---+\n"; - for (Rank r = RANK_8; r >= RANK_1; --r) - { - for (File f = FILE_A; f <= FILE_H; ++f) - s += b & make_square(f, r) ? "| X " : "| "; + for (Rank r = RANK_8; r >= RANK_1; --r) + { + for (File f = FILE_A; f <= FILE_H; ++f) + s += b & make_square(f, r) ? "| X " : "| "; - s += "| " + std::to_string(1 + r) + "\n+---+---+---+---+---+---+---+---+\n"; - } - s += " a b c d e f g h\n"; + s += "| " + std::to_string(1 + r) + "\n+---+---+---+---+---+---+---+---+\n"; + } + s += " a b c d e f g h\n"; - return s; + return s; } -/// Bitboards::init() initializes various bitboard tables. It is called at -/// startup and relies on global objects to be already zero-initialized. - +// Initializes various bitboard tables. It is called at +// startup and relies on global objects to be already zero-initialized. void Bitboards::init() { - for (unsigned i = 0; i < (1 << 16); ++i) - PopCnt16[i] = uint8_t(std::bitset<16>(i).count()); + for (unsigned i = 0; i < (1 << 16); ++i) + PopCnt16[i] = uint8_t(std::bitset<16>(i).count()); - for (Square s = SQ_A1; s <= SQ_H8; ++s) - SquareBB[s] = (1ULL << s); + for (Square s1 = SQ_A1; s1 <= SQ_H8; ++s1) + for (Square s2 = SQ_A1; s2 <= SQ_H8; ++s2) + SquareDistance[s1][s2] = std::max(distance(s1, s2), distance(s1, s2)); - for (Square s1 = SQ_A1; s1 <= SQ_H8; ++s1) - for (Square s2 = SQ_A1; s2 <= SQ_H8; ++s2) - SquareDistance[s1][s2] = std::max(distance(s1, s2), distance(s1, s2)); + init_magics(ROOK, RookTable, RookMagics); + init_magics(BISHOP, BishopTable, BishopMagics); - init_magics(ROOK, RookTable, RookMagics); - init_magics(BISHOP, BishopTable, BishopMagics); + for (Square s1 = SQ_A1; s1 <= SQ_H8; ++s1) + { + PawnAttacks[WHITE][s1] = pawn_attacks_bb(square_bb(s1)); + PawnAttacks[BLACK][s1] = pawn_attacks_bb(square_bb(s1)); - for (Square s1 = SQ_A1; s1 <= SQ_H8; ++s1) - { - PawnAttacks[WHITE][s1] = pawn_attacks_bb(square_bb(s1)); - PawnAttacks[BLACK][s1] = pawn_attacks_bb(square_bb(s1)); + for (int step : {-9, -8, -7, -1, 1, 7, 8, 9}) + PseudoAttacks[KING][s1] |= safe_destination(s1, step); - for (int step : {-9, -8, -7, -1, 1, 7, 8, 9} ) - PseudoAttacks[KING][s1] |= safe_destination(s1, step); + for (int step : {-17, -15, -10, -6, 6, 10, 15, 17}) + PseudoAttacks[KNIGHT][s1] |= safe_destination(s1, step); - for (int step : {-17, -15, -10, -6, 6, 10, 15, 17} ) - PseudoAttacks[KNIGHT][s1] |= safe_destination(s1, step); + PseudoAttacks[QUEEN][s1] = PseudoAttacks[BISHOP][s1] = attacks_bb(s1, 0); + PseudoAttacks[QUEEN][s1] |= PseudoAttacks[ROOK][s1] = attacks_bb(s1, 0); - PseudoAttacks[QUEEN][s1] = PseudoAttacks[BISHOP][s1] = attacks_bb(s1, 0); - PseudoAttacks[QUEEN][s1] |= PseudoAttacks[ ROOK][s1] = attacks_bb< ROOK>(s1, 0); - - for (PieceType pt : { BISHOP, ROOK }) - for (Square s2 = SQ_A1; s2 <= SQ_H8; ++s2) - { - if (PseudoAttacks[pt][s1] & s2) - { - LineBB[s1][s2] = (attacks_bb(pt, s1, 0) & attacks_bb(pt, s2, 0)) | s1 | s2; - BetweenBB[s1][s2] = (attacks_bb(pt, s1, square_bb(s2)) & attacks_bb(pt, s2, square_bb(s1))); - } - BetweenBB[s1][s2] |= s2; - } - } + for (PieceType pt : {BISHOP, ROOK}) + for (Square s2 = SQ_A1; s2 <= SQ_H8; ++s2) + { + if (PseudoAttacks[pt][s1] & s2) + { + LineBB[s1][s2] = (attacks_bb(pt, s1, 0) & attacks_bb(pt, s2, 0)) | s1 | s2; + BetweenBB[s1][s2] = + (attacks_bb(pt, s1, square_bb(s2)) & attacks_bb(pt, s2, square_bb(s1))); + } + BetweenBB[s1][s2] |= s2; + } + } } namespace { - Bitboard sliding_attack(PieceType pt, Square sq, Bitboard occupied) { +Bitboard sliding_attack(PieceType pt, Square sq, Bitboard occupied) { - Bitboard attacks = 0; - Direction RookDirections[4] = {NORTH, SOUTH, EAST, WEST}; + Bitboard attacks = 0; + Direction RookDirections[4] = {NORTH, SOUTH, EAST, WEST}; Direction BishopDirections[4] = {NORTH_EAST, SOUTH_EAST, SOUTH_WEST, NORTH_WEST}; for (Direction d : (pt == ROOK ? RookDirections : BishopDirections)) @@ -135,22 +129,21 @@ namespace { } return attacks; - } +} - // init_magics() computes all rook and bishop attacks at startup. Magic - // bitboards are used to look up attacks of sliding pieces. As a reference see - // www.chessprogramming.org/Magic_Bitboards. In particular, here we use the so - // called "fancy" approach. - - void init_magics(PieceType pt, Bitboard table[], Magic magics[]) { +// Computes all rook and bishop attacks at startup. Magic +// bitboards are used to look up attacks of sliding pieces. As a reference see +// www.chessprogramming.org/Magic_Bitboards. In particular, here we use the so +// called "fancy" approach. +void init_magics(PieceType pt, Bitboard table[], Magic magics[]) { // Optimal PRNG seeds to pick the correct magics in the shortest time - int seeds[][RANK_NB] = { { 8977, 44560, 54343, 38998, 5731, 95205, 104912, 17020 }, - { 728, 10316, 55013, 32803, 12281, 15100, 16645, 255 } }; + int seeds[][RANK_NB] = {{8977, 44560, 54343, 38998, 5731, 95205, 104912, 17020}, + {728, 10316, 55013, 32803, 12281, 15100, 16645, 255}}; Bitboard occupancy[4096], reference[4096], edges, b; - int epoch[4096] = {}, cnt = 0, size = 0; + int epoch[4096] = {}, cnt = 0, size = 0; for (Square s = SQ_A1; s <= SQ_H8; ++s) { @@ -163,8 +156,8 @@ namespace { // the number of 1s of the mask. Hence we deduce the size of the shift to // apply to the 64 or 32 bits word to get the index. Magic& m = magics[s]; - m.mask = sliding_attack(pt, s, 0) & ~edges; - m.shift = (Is64Bit ? 64 : 32) - popcount(m.mask); + m.mask = sliding_attack(pt, s, 0) & ~edges; + m.shift = (Is64Bit ? 64 : 32) - popcount(m.mask); // Set the offset for the attacks table of the square. We have individual // table sizes for each square with "Fancy Magic Bitboards". @@ -173,7 +166,8 @@ namespace { // Use Carry-Rippler trick to enumerate all subsets of masks[s] and // store the corresponding sliding attack bitboard in reference[]. b = size = 0; - do { + do + { occupancy[size] = b; reference[size] = sliding_attack(pt, s, b); @@ -191,9 +185,9 @@ namespace { // Find a magic for square 's' picking up an (almost) random number // until we find the one that passes the verification test. - for (int i = 0; i < size; ) + for (int i = 0; i < size;) { - for (m.magic = 0; popcount((m.magic * m.mask) >> 56) < 6; ) + for (m.magic = 0; popcount((m.magic * m.mask) >> 56) < 6;) m.magic = rng.sparse_rand(); // A good magic must map every possible occupancy to an index that @@ -208,7 +202,7 @@ namespace { if (epoch[idx] < cnt) { - epoch[idx] = cnt; + epoch[idx] = cnt; m.attacks[idx] = reference[i]; } else if (m.attacks[idx] != reference[i]) @@ -216,7 +210,7 @@ namespace { } } } - } +} } -} // namespace Stockfish +} // namespace Stockfish diff --git a/src/bitboard.h b/src/bitboard.h index 2b6e2a69..cdff4c75 100644 --- a/src/bitboard.h +++ b/src/bitboard.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,28 +19,23 @@ #ifndef BITBOARD_H_INCLUDED #define BITBOARD_H_INCLUDED +#include +#include +#include +#include +#include #include #include "types.h" namespace Stockfish { -namespace Bitbases { - -void init(); -bool probe(Square wksq, Square wpsq, Square bksq, Color us); - -} // namespace Stockfish::Bitbases - namespace Bitboards { -void init(); +void init(); std::string pretty(Bitboard b); -} // namespace Stockfish::Bitboards - -constexpr Bitboard AllSquares = ~Bitboard(0); -constexpr Bitboard DarkSquares = 0xAA55AA55AA55AA55ULL; +} // namespace Stockfish::Bitboards constexpr Bitboard FileABB = 0x0101010101010101ULL; constexpr Bitboard FileBBB = FileABB << 1; @@ -60,392 +55,317 @@ constexpr Bitboard Rank6BB = Rank1BB << (8 * 5); constexpr Bitboard Rank7BB = Rank1BB << (8 * 6); constexpr Bitboard Rank8BB = Rank1BB << (8 * 7); -constexpr Bitboard QueenSide = FileABB | FileBBB | FileCBB | FileDBB; -constexpr Bitboard CenterFiles = FileCBB | FileDBB | FileEBB | FileFBB; -constexpr Bitboard KingSide = FileEBB | FileFBB | FileGBB | FileHBB; -constexpr Bitboard Center = (FileDBB | FileEBB) & (Rank4BB | Rank5BB); - -constexpr Bitboard KingFlank[FILE_NB] = { - QueenSide ^ FileDBB, QueenSide, QueenSide, - CenterFiles, CenterFiles, - KingSide, KingSide, KingSide ^ FileEBB -}; - extern uint8_t PopCnt16[1 << 16]; extern uint8_t SquareDistance[SQUARE_NB][SQUARE_NB]; -extern Bitboard SquareBB[SQUARE_NB]; extern Bitboard BetweenBB[SQUARE_NB][SQUARE_NB]; extern Bitboard LineBB[SQUARE_NB][SQUARE_NB]; extern Bitboard PseudoAttacks[PIECE_TYPE_NB][SQUARE_NB]; extern Bitboard PawnAttacks[COLOR_NB][SQUARE_NB]; -/// Magic holds all magic bitboards relevant data for a single square +// Magic holds all magic bitboards relevant data for a single square struct Magic { - Bitboard mask; - Bitboard magic; - Bitboard* attacks; - unsigned shift; + Bitboard mask; + Bitboard magic; + Bitboard* attacks; + unsigned shift; - // Compute the attack's index using the 'magic bitboards' approach - unsigned index(Bitboard occupied) const { + // Compute the attack's index using the 'magic bitboards' approach + unsigned index(Bitboard occupied) const { - if (HasPext) - return unsigned(pext(occupied, mask)); + if (HasPext) + return unsigned(pext(occupied, mask)); - if (Is64Bit) - return unsigned(((occupied & mask) * magic) >> shift); + if (Is64Bit) + return unsigned(((occupied & mask) * magic) >> shift); - unsigned lo = unsigned(occupied) & unsigned(mask); - unsigned hi = unsigned(occupied >> 32) & unsigned(mask >> 32); - return (lo * unsigned(magic) ^ hi * unsigned(magic >> 32)) >> shift; - } + unsigned lo = unsigned(occupied) & unsigned(mask); + unsigned hi = unsigned(occupied >> 32) & unsigned(mask >> 32); + return (lo * unsigned(magic) ^ hi * unsigned(magic >> 32)) >> shift; + } }; extern Magic RookMagics[SQUARE_NB]; extern Magic BishopMagics[SQUARE_NB]; -inline Bitboard square_bb(Square s) { - assert(is_ok(s)); - return SquareBB[s]; +constexpr Bitboard square_bb(Square s) { + assert(is_ok(s)); + return (1ULL << s); } -/// Overloads of bitwise operators between a Bitboard and a Square for testing -/// whether a given bit is set in a bitboard, and for setting and clearing bits. +// Overloads of bitwise operators between a Bitboard and a Square for testing +// whether a given bit is set in a bitboard, and for setting and clearing bits. -inline Bitboard operator&( Bitboard b, Square s) { return b & square_bb(s); } -inline Bitboard operator|( Bitboard b, Square s) { return b | square_bb(s); } -inline Bitboard operator^( Bitboard b, Square s) { return b ^ square_bb(s); } +inline Bitboard operator&(Bitboard b, Square s) { return b & square_bb(s); } +inline Bitboard operator|(Bitboard b, Square s) { return b | square_bb(s); } +inline Bitboard operator^(Bitboard b, Square s) { return b ^ square_bb(s); } inline Bitboard& operator|=(Bitboard& b, Square s) { return b |= square_bb(s); } inline Bitboard& operator^=(Bitboard& b, Square s) { return b ^= square_bb(s); } -inline Bitboard operator&(Square s, Bitboard b) { return b & s; } -inline Bitboard operator|(Square s, Bitboard b) { return b | s; } -inline Bitboard operator^(Square s, Bitboard b) { return b ^ s; } +inline Bitboard operator&(Square s, Bitboard b) { return b & s; } +inline Bitboard operator|(Square s, Bitboard b) { return b | s; } +inline Bitboard operator^(Square s, Bitboard b) { return b ^ s; } -inline Bitboard operator|(Square s1, Square s2) { return square_bb(s1) | s2; } +inline Bitboard operator|(Square s1, Square s2) { return square_bb(s1) | s2; } -constexpr bool more_than_one(Bitboard b) { - return b & (b - 1); -} +constexpr bool more_than_one(Bitboard b) { return b & (b - 1); } -constexpr bool opposite_colors(Square s1, Square s2) { - return (s1 + rank_of(s1) + s2 + rank_of(s2)) & 1; -} +// rank_bb() and file_bb() return a bitboard representing all the squares on +// the given file or rank. + +constexpr Bitboard rank_bb(Rank r) { return Rank1BB << (8 * r); } + +constexpr Bitboard rank_bb(Square s) { return rank_bb(rank_of(s)); } + +constexpr Bitboard file_bb(File f) { return FileABB << f; } + +constexpr Bitboard file_bb(Square s) { return file_bb(file_of(s)); } -/// rank_bb() and file_bb() return a bitboard representing all the squares on -/// the given file or rank. - -constexpr Bitboard rank_bb(Rank r) { - return Rank1BB << (8 * r); -} - -constexpr Bitboard rank_bb(Square s) { - return rank_bb(rank_of(s)); -} - -constexpr Bitboard file_bb(File f) { - return FileABB << f; -} - -constexpr Bitboard file_bb(Square s) { - return file_bb(file_of(s)); -} - - -/// shift() moves a bitboard one or two steps as specified by the direction D - +// Moves a bitboard one or two steps as specified by the direction D template constexpr Bitboard shift(Bitboard b) { - return D == NORTH ? b << 8 : D == SOUTH ? b >> 8 - : D == NORTH+NORTH? b <<16 : D == SOUTH+SOUTH? b >>16 - : D == EAST ? (b & ~FileHBB) << 1 : D == WEST ? (b & ~FileABB) >> 1 - : D == NORTH_EAST ? (b & ~FileHBB) << 9 : D == NORTH_WEST ? (b & ~FileABB) << 7 - : D == SOUTH_EAST ? (b & ~FileHBB) >> 7 : D == SOUTH_WEST ? (b & ~FileABB) >> 9 - : 0; + return D == NORTH ? b << 8 + : D == SOUTH ? b >> 8 + : D == NORTH + NORTH ? b << 16 + : D == SOUTH + SOUTH ? b >> 16 + : D == EAST ? (b & ~FileHBB) << 1 + : D == WEST ? (b & ~FileABB) >> 1 + : D == NORTH_EAST ? (b & ~FileHBB) << 9 + : D == NORTH_WEST ? (b & ~FileABB) << 7 + : D == SOUTH_EAST ? (b & ~FileHBB) >> 7 + : D == SOUTH_WEST ? (b & ~FileABB) >> 9 + : 0; } -/// pawn_attacks_bb() returns the squares attacked by pawns of the given color -/// from the squares in the given bitboard. - +// Returns the squares attacked by pawns of the given color +// from the squares in the given bitboard. template constexpr Bitboard pawn_attacks_bb(Bitboard b) { - return C == WHITE ? shift(b) | shift(b) - : shift(b) | shift(b); + return C == WHITE ? shift(b) | shift(b) + : shift(b) | shift(b); } inline Bitboard pawn_attacks_bb(Color c, Square s) { - assert(is_ok(s)); - return PawnAttacks[c][s]; + assert(is_ok(s)); + return PawnAttacks[c][s]; } - -/// pawn_double_attacks_bb() returns the squares doubly attacked by pawns of the -/// given color from the squares in the given bitboard. - -template -constexpr Bitboard pawn_double_attacks_bb(Bitboard b) { - return C == WHITE ? shift(b) & shift(b) - : shift(b) & shift(b); -} - - -/// adjacent_files_bb() returns a bitboard representing all the squares on the -/// adjacent files of a given square. - -constexpr Bitboard adjacent_files_bb(Square s) { - return shift(file_bb(s)) | shift(file_bb(s)); -} - - -/// line_bb() returns a bitboard representing an entire line (from board edge -/// to board edge) that intersects the two given squares. If the given squares -/// are not on a same file/rank/diagonal, the function returns 0. For instance, -/// line_bb(SQ_C4, SQ_F7) will return a bitboard with the A2-G8 diagonal. - +// Returns a bitboard representing an entire line (from board edge +// to board edge) that intersects the two given squares. If the given squares +// are not on a same file/rank/diagonal, the function returns 0. For instance, +// line_bb(SQ_C4, SQ_F7) will return a bitboard with the A2-G8 diagonal. inline Bitboard line_bb(Square s1, Square s2) { - assert(is_ok(s1) && is_ok(s2)); - - return LineBB[s1][s2]; + assert(is_ok(s1) && is_ok(s2)); + return LineBB[s1][s2]; } -/// between_bb(s1, s2) returns a bitboard representing the squares in the semi-open -/// segment between the squares s1 and s2 (excluding s1 but including s2). If the -/// given squares are not on a same file/rank/diagonal, it returns s2. For instance, -/// between_bb(SQ_C4, SQ_F7) will return a bitboard with squares D5, E6 and F7, but -/// between_bb(SQ_E6, SQ_F8) will return a bitboard with the square F8. This trick -/// allows to generate non-king evasion moves faster: the defending piece must either -/// interpose itself to cover the check or capture the checking piece. - +// Returns a bitboard representing the squares in the semi-open +// segment between the squares s1 and s2 (excluding s1 but including s2). If the +// given squares are not on a same file/rank/diagonal, it returns s2. For instance, +// between_bb(SQ_C4, SQ_F7) will return a bitboard with squares D5, E6 and F7, but +// between_bb(SQ_E6, SQ_F8) will return a bitboard with the square F8. This trick +// allows to generate non-king evasion moves faster: the defending piece must either +// interpose itself to cover the check or capture the checking piece. inline Bitboard between_bb(Square s1, Square s2) { - assert(is_ok(s1) && is_ok(s2)); - - return BetweenBB[s1][s2]; + assert(is_ok(s1) && is_ok(s2)); + return BetweenBB[s1][s2]; } +// Returns true if the squares s1, s2 and s3 are aligned either on a +// straight or on a diagonal line. +inline bool aligned(Square s1, Square s2, Square s3) { return line_bb(s1, s2) & s3; } -/// forward_ranks_bb() returns a bitboard representing the squares on the ranks in -/// front of the given one, from the point of view of the given color. For instance, -/// forward_ranks_bb(BLACK, SQ_D3) will return the 16 squares on ranks 1 and 2. -constexpr Bitboard forward_ranks_bb(Color c, Square s) { - return c == WHITE ? ~Rank1BB << 8 * relative_rank(WHITE, s) - : ~Rank8BB >> 8 * relative_rank(BLACK, s); +// distance() functions return the distance between x and y, defined as the +// number of steps for a king in x to reach y. + +template +inline int distance(Square x, Square y); + +template<> +inline int distance(Square x, Square y) { + return std::abs(file_of(x) - file_of(y)); } - -/// forward_file_bb() returns a bitboard representing all the squares along the -/// line in front of the given one, from the point of view of the given color. - -constexpr Bitboard forward_file_bb(Color c, Square s) { - return forward_ranks_bb(c, s) & file_bb(s); +template<> +inline int distance(Square x, Square y) { + return std::abs(rank_of(x) - rank_of(y)); } - -/// pawn_attack_span() returns a bitboard representing all the squares that can -/// be attacked by a pawn of the given color when it moves along its file, starting -/// from the given square. - -constexpr Bitboard pawn_attack_span(Color c, Square s) { - return forward_ranks_bb(c, s) & adjacent_files_bb(s); +template<> +inline int distance(Square x, Square y) { + return SquareDistance[x][y]; } - -/// passed_pawn_span() returns a bitboard which can be used to test if a pawn of -/// the given color and on the given square is a passed pawn. - -constexpr Bitboard passed_pawn_span(Color c, Square s) { - return pawn_attack_span(c, s) | forward_file_bb(c, s); -} - - -/// aligned() returns true if the squares s1, s2 and s3 are aligned either on a -/// straight or on a diagonal line. - -inline bool aligned(Square s1, Square s2, Square s3) { - return line_bb(s1, s2) & s3; -} - - -/// distance() functions return the distance between x and y, defined as the -/// number of steps for a king in x to reach y. - -template inline int distance(Square x, Square y); -template<> inline int distance(Square x, Square y) { return std::abs(file_of(x) - file_of(y)); } -template<> inline int distance(Square x, Square y) { return std::abs(rank_of(x) - rank_of(y)); } -template<> inline int distance(Square x, Square y) { return SquareDistance[x][y]; } - inline int edge_distance(File f) { return std::min(f, File(FILE_H - f)); } -inline int edge_distance(Rank r) { return std::min(r, Rank(RANK_8 - r)); } - - -/// attacks_bb(Square) returns the pseudo attacks of the give piece type -/// assuming an empty board. +// Returns the pseudo attacks of the given piece type +// assuming an empty board. template inline Bitboard attacks_bb(Square s) { - assert((Pt != PAWN) && (is_ok(s))); - - return PseudoAttacks[Pt][s]; + assert((Pt != PAWN) && (is_ok(s))); + return PseudoAttacks[Pt][s]; } -/// attacks_bb(Square, Bitboard) returns the attacks by the given piece -/// assuming the board is occupied according to the passed Bitboard. -/// Sliding piece attacks do not continue passed an occupied square. - +// Returns the attacks by the given piece +// assuming the board is occupied according to the passed Bitboard. +// Sliding piece attacks do not continue passed an occupied square. template inline Bitboard attacks_bb(Square s, Bitboard occupied) { - assert((Pt != PAWN) && (is_ok(s))); + assert((Pt != PAWN) && (is_ok(s))); - switch (Pt) - { - case BISHOP: return BishopMagics[s].attacks[BishopMagics[s].index(occupied)]; - case ROOK : return RookMagics[s].attacks[ RookMagics[s].index(occupied)]; - case QUEEN : return attacks_bb(s, occupied) | attacks_bb(s, occupied); - default : return PseudoAttacks[Pt][s]; - } + switch (Pt) + { + case BISHOP : + return BishopMagics[s].attacks[BishopMagics[s].index(occupied)]; + case ROOK : + return RookMagics[s].attacks[RookMagics[s].index(occupied)]; + case QUEEN : + return attacks_bb(s, occupied) | attacks_bb(s, occupied); + default : + return PseudoAttacks[Pt][s]; + } } +// Returns the attacks by the given piece +// assuming the board is occupied according to the passed Bitboard. +// Sliding piece attacks do not continue passed an occupied square. inline Bitboard attacks_bb(PieceType pt, Square s, Bitboard occupied) { - assert((pt != PAWN) && (is_ok(s))); + assert((pt != PAWN) && (is_ok(s))); - switch (pt) - { - case BISHOP: return attacks_bb(s, occupied); - case ROOK : return attacks_bb< ROOK>(s, occupied); - case QUEEN : return attacks_bb(s, occupied) | attacks_bb(s, occupied); - default : return PseudoAttacks[pt][s]; - } + switch (pt) + { + case BISHOP : + return attacks_bb(s, occupied); + case ROOK : + return attacks_bb(s, occupied); + case QUEEN : + return attacks_bb(s, occupied) | attacks_bb(s, occupied); + default : + return PseudoAttacks[pt][s]; + } } -/// popcount() counts the number of non-zero bits in a bitboard - +// Counts the number of non-zero bits in a bitboard. inline int popcount(Bitboard b) { #ifndef USE_POPCNT - union { Bitboard bb; uint16_t u[4]; } v = { b }; - return PopCnt16[v.u[0]] + PopCnt16[v.u[1]] + PopCnt16[v.u[2]] + PopCnt16[v.u[3]]; + union { + Bitboard bb; + uint16_t u[4]; + } v = {b}; + return PopCnt16[v.u[0]] + PopCnt16[v.u[1]] + PopCnt16[v.u[2]] + PopCnt16[v.u[3]]; -#elif defined(_MSC_VER) || defined(__INTEL_COMPILER) +#elif defined(_MSC_VER) - return (int)_mm_popcnt_u64(b); + return int(_mm_popcnt_u64(b)); -#else // Assumed gcc or compatible compiler +#else // Assumed gcc or compatible compiler - return __builtin_popcountll(b); + return __builtin_popcountll(b); #endif } - -/// lsb() and msb() return the least/most significant bit in a non-zero bitboard - -#if defined(__GNUC__) // GCC, Clang, ICC - +// Returns the least significant bit in a non-zero bitboard. inline Square lsb(Bitboard b) { - assert(b); - return Square(__builtin_ctzll(b)); -} + assert(b); -inline Square msb(Bitboard b) { - assert(b); - return Square(63 ^ __builtin_clzll(b)); -} +#if defined(__GNUC__) // GCC, Clang, ICX -#elif defined(_MSC_VER) // MSVC + return Square(__builtin_ctzll(b)); -#ifdef _WIN64 // MSVC, WIN64 +#elif defined(_MSC_VER) + #ifdef _WIN64 // MSVC, WIN64 -inline Square lsb(Bitboard b) { - assert(b); - unsigned long idx; - _BitScanForward64(&idx, b); - return (Square) idx; -} + unsigned long idx; + _BitScanForward64(&idx, b); + return Square(idx); -inline Square msb(Bitboard b) { - assert(b); - unsigned long idx; - _BitScanReverse64(&idx, b); - return (Square) idx; -} - -#else // MSVC, WIN32 - -inline Square lsb(Bitboard b) { - assert(b); - unsigned long idx; - - if (b & 0xffffffff) { - _BitScanForward(&idx, int32_t(b)); - return Square(idx); - } else { - _BitScanForward(&idx, int32_t(b >> 32)); - return Square(idx + 32); - } -} - -inline Square msb(Bitboard b) { - assert(b); - unsigned long idx; - - if (b >> 32) { - _BitScanReverse(&idx, int32_t(b >> 32)); - return Square(idx + 32); - } else { - _BitScanReverse(&idx, int32_t(b)); - return Square(idx); - } -} - -#endif + #else // MSVC, WIN32 + unsigned long idx; + if (b & 0xffffffff) + { + _BitScanForward(&idx, int32_t(b)); + return Square(idx); + } + else + { + _BitScanForward(&idx, int32_t(b >> 32)); + return Square(idx + 32); + } + #endif #else // Compiler is neither GCC nor MSVC compatible - -#error "Compiler not supported." - + #error "Compiler not supported." #endif +} -/// least_significant_square_bb() returns the bitboard of the least significant -/// square of a non-zero bitboard. It is equivalent to square_bb(lsb(bb)). +// Returns the most significant bit in a non-zero bitboard. +inline Square msb(Bitboard b) { + assert(b); +#if defined(__GNUC__) // GCC, Clang, ICX + + return Square(63 ^ __builtin_clzll(b)); + +#elif defined(_MSC_VER) + #ifdef _WIN64 // MSVC, WIN64 + + unsigned long idx; + _BitScanReverse64(&idx, b); + return Square(idx); + + #else // MSVC, WIN32 + + unsigned long idx; + + if (b >> 32) + { + _BitScanReverse(&idx, int32_t(b >> 32)); + return Square(idx + 32); + } + else + { + _BitScanReverse(&idx, int32_t(b)); + return Square(idx); + } + #endif +#else // Compiler is neither GCC nor MSVC compatible + #error "Compiler not supported." +#endif +} + +// Returns the bitboard of the least significant +// square of a non-zero bitboard. It is equivalent to square_bb(lsb(bb)). inline Bitboard least_significant_square_bb(Bitboard b) { - assert(b); - return b & -b; + assert(b); + return b & -b; } -/// pop_lsb() finds and clears the least significant bit in a non-zero bitboard - +// Finds and clears the least significant bit in a non-zero bitboard. inline Square pop_lsb(Bitboard& b) { - assert(b); - const Square s = lsb(b); - b &= b - 1; - return s; + assert(b); + const Square s = lsb(b); + b &= b - 1; + return s; } +} // namespace Stockfish -/// frontmost_sq() returns the most advanced square for the given color, -/// requires a non-zero bitboard. -inline Square frontmost_sq(Color c, Bitboard b) { - assert(b); - return c == WHITE ? msb(b) : lsb(b); -} - -} // namespace Stockfish - -#endif // #ifndef BITBOARD_H_INCLUDED +#endif // #ifndef BITBOARD_H_INCLUDED diff --git a/src/endgame.cpp b/src/endgame.cpp deleted file mode 100644 index e773e7a9..00000000 --- a/src/endgame.cpp +++ /dev/null @@ -1,747 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include - -#include "bitboard.h" -#include "endgame.h" -#include "movegen.h" - -namespace Stockfish { - -namespace { - - // Used to drive the king towards the edge of the board - // in KX vs K and KQ vs KR endgames. - // Values range from 27 (center squares) to 90 (in the corners) - inline int push_to_edge(Square s) { - int rd = edge_distance(rank_of(s)), fd = edge_distance(file_of(s)); - return 90 - (7 * fd * fd / 2 + 7 * rd * rd / 2); - } - - // Used to drive the king towards A1H8 corners in KBN vs K endgames. - // Values range from 0 on A8H1 diagonal to 7 in A1H8 corners - inline int push_to_corner(Square s) { - return abs(7 - rank_of(s) - file_of(s)); - } - - // Drive a piece close to or away from another piece - inline int push_close(Square s1, Square s2) { return 140 - 20 * distance(s1, s2); } - inline int push_away(Square s1, Square s2) { return 120 - push_close(s1, s2); } - -#ifndef NDEBUG - bool verify_material(const Position& pos, Color c, Value npm, int pawnsCnt) { - return pos.non_pawn_material(c) == npm && pos.count(c) == pawnsCnt; - } -#endif - - // Map the square as if strongSide is white and strongSide's only pawn - // is on the left half of the board. - Square normalize(const Position& pos, Color strongSide, Square sq) { - - assert(pos.count(strongSide) == 1); - - if (file_of(pos.square(strongSide)) >= FILE_E) - sq = flip_file(sq); - - return strongSide == WHITE ? sq : flip_rank(sq); - } - -} // namespace - - -namespace Endgames { - - std::pair, Map> maps; - - void init() { - - add("KPK"); - add("KNNK"); - add("KBNK"); - add("KRKP"); - add("KRKB"); - add("KRKN"); - add("KQKP"); - add("KQKR"); - add("KNNKP"); - - add("KRPKR"); - add("KRPKB"); - add("KBPKB"); - add("KBPKN"); - add("KBPPKB"); - add("KRPPKRP"); - } -} - - -/// Mate with KX vs K. This function is used to evaluate positions with -/// king and plenty of material vs a lone king. It simply gives the -/// attacking side a bonus for driving the defending king towards the edge -/// of the board, and for keeping the distance between the two kings small. -template<> -Value Endgame::operator()(const Position& pos) const { - - assert(verify_material(pos, weakSide, VALUE_ZERO, 0)); - assert(!pos.checkers()); // Eval is never called when in check - - // Stalemate detection with lone king - if (pos.side_to_move() == weakSide && !MoveList(pos).size()) - return VALUE_DRAW; - - Square strongKing = pos.square(strongSide); - Square weakKing = pos.square(weakSide); - - Value result = pos.non_pawn_material(strongSide) - + pos.count(strongSide) * PawnValueEg - + push_to_edge(weakKing) - + push_close(strongKing, weakKing); - - if ( pos.count(strongSide) - || pos.count(strongSide) - ||(pos.count(strongSide) && pos.count(strongSide)) - || ( (pos.pieces(strongSide, BISHOP) & ~DarkSquares) - && (pos.pieces(strongSide, BISHOP) & DarkSquares))) - result = std::min(result + VALUE_KNOWN_WIN, VALUE_TB_WIN_IN_MAX_PLY - 1); - - return strongSide == pos.side_to_move() ? result : -result; -} - - -/// Mate with KBN vs K. This is similar to KX vs K, but we have to drive the -/// defending king towards a corner square that our bishop attacks. -template<> -Value Endgame::operator()(const Position& pos) const { - - assert(verify_material(pos, strongSide, KnightValueMg + BishopValueMg, 0)); - assert(verify_material(pos, weakSide, VALUE_ZERO, 0)); - - Square strongKing = pos.square(strongSide); - Square strongBishop = pos.square(strongSide); - Square weakKing = pos.square(weakSide); - - // If our bishop does not attack A1/H8, we flip the enemy king square - // to drive to opposite corners (A8/H1). - - Value result = (VALUE_KNOWN_WIN + 3520) - + push_close(strongKing, weakKing) - + 420 * push_to_corner(opposite_colors(strongBishop, SQ_A1) ? flip_file(weakKing) : weakKing); - - assert(abs(result) < VALUE_TB_WIN_IN_MAX_PLY); - return strongSide == pos.side_to_move() ? result : -result; -} - - -/// KP vs K. This endgame is evaluated with the help of a bitbase -template<> -Value Endgame::operator()(const Position& pos) const { - - assert(verify_material(pos, strongSide, VALUE_ZERO, 1)); - assert(verify_material(pos, weakSide, VALUE_ZERO, 0)); - - // Assume strongSide is white and the pawn is on files A-D - Square strongKing = normalize(pos, strongSide, pos.square(strongSide)); - Square strongPawn = normalize(pos, strongSide, pos.square(strongSide)); - Square weakKing = normalize(pos, strongSide, pos.square(weakSide)); - - Color us = strongSide == pos.side_to_move() ? WHITE : BLACK; - - if (!Bitbases::probe(strongKing, strongPawn, weakKing, us)) - return VALUE_DRAW; - - Value result = VALUE_KNOWN_WIN + PawnValueEg + Value(rank_of(strongPawn)); - - return strongSide == pos.side_to_move() ? result : -result; -} - - -/// KR vs KP. This is a somewhat tricky endgame to evaluate precisely without -/// a bitbase. The function below returns drawish scores when the pawn is -/// far advanced with support of the king, while the attacking king is far -/// away. -template<> -Value Endgame::operator()(const Position& pos) const { - - assert(verify_material(pos, strongSide, RookValueMg, 0)); - assert(verify_material(pos, weakSide, VALUE_ZERO, 1)); - - Square strongKing = pos.square(strongSide); - Square weakKing = pos.square(weakSide); - Square strongRook = pos.square(strongSide); - Square weakPawn = pos.square(weakSide); - Square queeningSquare = make_square(file_of(weakPawn), relative_rank(weakSide, RANK_8)); - Value result; - - // If the stronger side's king is in front of the pawn, it's a win - if (forward_file_bb(strongSide, strongKing) & weakPawn) - result = RookValueEg - distance(strongKing, weakPawn); - - // If the weaker side's king is too far from the pawn and the rook, - // it's a win. - else if ( distance(weakKing, weakPawn) >= 3 + (pos.side_to_move() == weakSide) - && distance(weakKing, strongRook) >= 3) - result = RookValueEg - distance(strongKing, weakPawn); - - // If the pawn is far advanced and supported by the defending king, - // the position is drawish - else if ( relative_rank(strongSide, weakKing) <= RANK_3 - && distance(weakKing, weakPawn) == 1 - && relative_rank(strongSide, strongKing) >= RANK_4 - && distance(strongKing, weakPawn) > 2 + (pos.side_to_move() == strongSide)) - result = Value(80) - 8 * distance(strongKing, weakPawn); - - else - result = Value(200) - 8 * ( distance(strongKing, weakPawn + pawn_push(weakSide)) - - distance(weakKing, weakPawn + pawn_push(weakSide)) - - distance(weakPawn, queeningSquare)); - - return strongSide == pos.side_to_move() ? result : -result; -} - - -/// KR vs KB. This is very simple, and always returns drawish scores. The -/// score is slightly bigger when the defending king is close to the edge. -template<> -Value Endgame::operator()(const Position& pos) const { - - assert(verify_material(pos, strongSide, RookValueMg, 0)); - assert(verify_material(pos, weakSide, BishopValueMg, 0)); - - Value result = Value(push_to_edge(pos.square(weakSide))); - return strongSide == pos.side_to_move() ? result : -result; -} - - -/// KR vs KN. The attacking side has slightly better winning chances than -/// in KR vs KB, particularly if the king and the knight are far apart. -template<> -Value Endgame::operator()(const Position& pos) const { - - assert(verify_material(pos, strongSide, RookValueMg, 0)); - assert(verify_material(pos, weakSide, KnightValueMg, 0)); - - Square weakKing = pos.square(weakSide); - Square weakKnight = pos.square(weakSide); - Value result = Value(push_to_edge(weakKing) + push_away(weakKing, weakKnight)); - return strongSide == pos.side_to_move() ? result : -result; -} - - -/// KQ vs KP. In general, this is a win for the stronger side, but there are a -/// few important exceptions. A pawn on 7th rank and on the A,C,F or H files -/// with a king positioned next to it can be a draw, so in that case, we only -/// use the distance between the kings. -template<> -Value Endgame::operator()(const Position& pos) const { - - assert(verify_material(pos, strongSide, QueenValueMg, 0)); - assert(verify_material(pos, weakSide, VALUE_ZERO, 1)); - - Square strongKing = pos.square(strongSide); - Square weakKing = pos.square(weakSide); - Square weakPawn = pos.square(weakSide); - - Value result = Value(push_close(strongKing, weakKing)); - - if ( relative_rank(weakSide, weakPawn) != RANK_7 - || distance(weakKing, weakPawn) != 1 - || ((FileBBB | FileDBB | FileEBB | FileGBB) & weakPawn)) - result += QueenValueEg - PawnValueEg; - - return strongSide == pos.side_to_move() ? result : -result; -} - - -/// KQ vs KR. This is almost identical to KX vs K: we give the attacking -/// king a bonus for having the kings close together, and for forcing the -/// defending king towards the edge. If we also take care to avoid null move for -/// the defending side in the search, this is usually sufficient to win KQ vs KR. -template<> -Value Endgame::operator()(const Position& pos) const { - - assert(verify_material(pos, strongSide, QueenValueMg, 0)); - assert(verify_material(pos, weakSide, RookValueMg, 0)); - - Square strongKing = pos.square(strongSide); - Square weakKing = pos.square(weakSide); - - Value result = QueenValueEg - - RookValueEg - + push_to_edge(weakKing) - + push_close(strongKing, weakKing); - - return strongSide == pos.side_to_move() ? result : -result; -} - - -/// KNN vs KP. Very drawish, but there are some mate opportunities if we can -/// press the weakSide King to a corner before the pawn advances too much. -template<> -Value Endgame::operator()(const Position& pos) const { - - assert(verify_material(pos, strongSide, 2 * KnightValueMg, 0)); - assert(verify_material(pos, weakSide, VALUE_ZERO, 1)); - - Square weakKing = pos.square(weakSide); - Square weakPawn = pos.square(weakSide); - - Value result = PawnValueEg - + 2 * push_to_edge(weakKing) - - 10 * relative_rank(weakSide, weakPawn); - - return strongSide == pos.side_to_move() ? result : -result; -} - - -/// Some cases of trivial draws -template<> Value Endgame::operator()(const Position&) const { return VALUE_DRAW; } - - -/// KB and one or more pawns vs K. It checks for draws with rook pawns and -/// a bishop of the wrong color. If such a draw is detected, SCALE_FACTOR_DRAW -/// is returned. If not, the return value is SCALE_FACTOR_NONE, i.e. no scaling -/// will be used. -template<> -ScaleFactor Endgame::operator()(const Position& pos) const { - - assert(pos.non_pawn_material(strongSide) == BishopValueMg); - assert(pos.count(strongSide) >= 1); - - // No assertions about the material of weakSide, because we want draws to - // be detected even when the weaker side has some pawns. - - Bitboard strongPawns = pos.pieces(strongSide, PAWN); - Bitboard allPawns = pos.pieces(PAWN); - - Square strongBishop = pos.square(strongSide); - Square weakKing = pos.square(weakSide); - Square strongKing = pos.square(strongSide); - - // All strongSide pawns are on a single rook file? - if (!(strongPawns & ~FileABB) || !(strongPawns & ~FileHBB)) - { - Square queeningSquare = relative_square(strongSide, make_square(file_of(lsb(strongPawns)), RANK_8)); - - if ( opposite_colors(queeningSquare, strongBishop) - && distance(queeningSquare, weakKing) <= 1) - return SCALE_FACTOR_DRAW; - } - - // If all the pawns are on the same B or G file, then it's potentially a draw - if ((!(allPawns & ~FileBBB) || !(allPawns & ~FileGBB)) - && pos.non_pawn_material(weakSide) == 0 - && pos.count(weakSide) >= 1) - { - // Get the least advanced weakSide pawn - Square weakPawn = frontmost_sq(strongSide, pos.pieces(weakSide, PAWN)); - - // There's potential for a draw if our pawn is blocked on the 7th rank, - // the bishop cannot attack it or they only have one pawn left. - if ( relative_rank(strongSide, weakPawn) == RANK_7 - && (strongPawns & (weakPawn + pawn_push(weakSide))) - && (opposite_colors(strongBishop, weakPawn) || !more_than_one(strongPawns))) - { - int strongKingDist = distance(weakPawn, strongKing); - int weakKingDist = distance(weakPawn, weakKing); - - // It's a draw if the weak king is on its back two ranks, within 2 - // squares of the blocking pawn and the strong king is not - // closer. (I think this rule only fails in practically - // unreachable positions such as 5k1K/6p1/6P1/8/8/3B4/8/8 w - // and positions where qsearch will immediately correct the - // problem such as 8/4k1p1/6P1/1K6/3B4/8/8/8 w). - if ( relative_rank(strongSide, weakKing) >= RANK_7 - && weakKingDist <= 2 - && weakKingDist <= strongKingDist) - return SCALE_FACTOR_DRAW; - } - } - - return SCALE_FACTOR_NONE; -} - - -/// KQ vs KR and one or more pawns. It tests for fortress draws with a rook on -/// the third rank defended by a pawn. -template<> -ScaleFactor Endgame::operator()(const Position& pos) const { - - assert(verify_material(pos, strongSide, QueenValueMg, 0)); - assert(pos.count(weakSide) == 1); - assert(pos.count(weakSide) >= 1); - - Square strongKing = pos.square(strongSide); - Square weakKing = pos.square(weakSide); - Square weakRook = pos.square(weakSide); - - if ( relative_rank(weakSide, weakKing) <= RANK_2 - && relative_rank(weakSide, strongKing) >= RANK_4 - && relative_rank(weakSide, weakRook) == RANK_3 - && ( pos.pieces(weakSide, PAWN) - & attacks_bb(weakKing) - & pawn_attacks_bb(strongSide, weakRook))) - return SCALE_FACTOR_DRAW; - - return SCALE_FACTOR_NONE; -} - - -/// KRP vs KR. This function knows a handful of the most important classes of -/// drawn positions, but is far from perfect. It would probably be a good idea -/// to add more knowledge in the future. -/// -/// It would also be nice to rewrite the actual code for this function, -/// which is mostly copied from Glaurung 1.x, and isn't very pretty. -template<> -ScaleFactor Endgame::operator()(const Position& pos) const { - - assert(verify_material(pos, strongSide, RookValueMg, 1)); - assert(verify_material(pos, weakSide, RookValueMg, 0)); - - // Assume strongSide is white and the pawn is on files A-D - Square strongKing = normalize(pos, strongSide, pos.square(strongSide)); - Square strongRook = normalize(pos, strongSide, pos.square(strongSide)); - Square strongPawn = normalize(pos, strongSide, pos.square(strongSide)); - Square weakKing = normalize(pos, strongSide, pos.square(weakSide)); - Square weakRook = normalize(pos, strongSide, pos.square(weakSide)); - - File pawnFile = file_of(strongPawn); - Rank pawnRank = rank_of(strongPawn); - Square queeningSquare = make_square(pawnFile, RANK_8); - int tempo = (pos.side_to_move() == strongSide); - - // If the pawn is not too far advanced and the defending king defends the - // queening square, use the third-rank defence. - if ( pawnRank <= RANK_5 - && distance(weakKing, queeningSquare) <= 1 - && strongKing <= SQ_H5 - && (rank_of(weakRook) == RANK_6 || (pawnRank <= RANK_3 && rank_of(strongRook) != RANK_6))) - return SCALE_FACTOR_DRAW; - - // The defending side saves a draw by checking from behind in case the pawn - // has advanced to the 6th rank with the king behind. - if ( pawnRank == RANK_6 - && distance(weakKing, queeningSquare) <= 1 - && rank_of(strongKing) + tempo <= RANK_6 - && (rank_of(weakRook) == RANK_1 || (!tempo && distance(weakRook, strongPawn) >= 3))) - return SCALE_FACTOR_DRAW; - - if ( pawnRank >= RANK_6 - && weakKing == queeningSquare - && rank_of(weakRook) == RANK_1 - && (!tempo || distance(strongKing, strongPawn) >= 2)) - return SCALE_FACTOR_DRAW; - - // White pawn on a7 and rook on a8 is a draw if black's king is on g7 or h7 - // and the black rook is behind the pawn. - if ( strongPawn == SQ_A7 - && strongRook == SQ_A8 - && (weakKing == SQ_H7 || weakKing == SQ_G7) - && file_of(weakRook) == FILE_A - && (rank_of(weakRook) <= RANK_3 || file_of(strongKing) >= FILE_D || rank_of(strongKing) <= RANK_5)) - return SCALE_FACTOR_DRAW; - - // If the defending king blocks the pawn and the attacking king is too far - // away, it's a draw. - if ( pawnRank <= RANK_5 - && weakKing == strongPawn + NORTH - && distance(strongKing, strongPawn) - tempo >= 2 - && distance(strongKing, weakRook) - tempo >= 2) - return SCALE_FACTOR_DRAW; - - // Pawn on the 7th rank supported by the rook from behind usually wins if the - // attacking king is closer to the queening square than the defending king, - // and the defending king cannot gain tempi by threatening the attacking rook. - if ( pawnRank == RANK_7 - && pawnFile != FILE_A - && file_of(strongRook) == pawnFile - && strongRook != queeningSquare - && (distance(strongKing, queeningSquare) < distance(weakKing, queeningSquare) - 2 + tempo) - && (distance(strongKing, queeningSquare) < distance(weakKing, strongRook) + tempo)) - return ScaleFactor(SCALE_FACTOR_MAX - 2 * distance(strongKing, queeningSquare)); - - // Similar to the above, but with the pawn further back - if ( pawnFile != FILE_A - && file_of(strongRook) == pawnFile - && strongRook < strongPawn - && (distance(strongKing, queeningSquare) < distance(weakKing, queeningSquare) - 2 + tempo) - && (distance(strongKing, strongPawn + NORTH) < distance(weakKing, strongPawn + NORTH) - 2 + tempo) - && ( distance(weakKing, strongRook) + tempo >= 3 - || ( distance(strongKing, queeningSquare) < distance(weakKing, strongRook) + tempo - && (distance(strongKing, strongPawn + NORTH) < distance(weakKing, strongPawn) + tempo)))) - return ScaleFactor( SCALE_FACTOR_MAX - - 8 * distance(strongPawn, queeningSquare) - - 2 * distance(strongKing, queeningSquare)); - - // If the pawn is not far advanced and the defending king is somewhere in - // the pawn's path, it's probably a draw. - if (pawnRank <= RANK_4 && weakKing > strongPawn) - { - if (file_of(weakKing) == file_of(strongPawn)) - return ScaleFactor(10); - if ( distance(weakKing, strongPawn) == 1 - && distance(strongKing, weakKing) > 2) - return ScaleFactor(24 - 2 * distance(strongKing, weakKing)); - } - return SCALE_FACTOR_NONE; -} - -template<> -ScaleFactor Endgame::operator()(const Position& pos) const { - - assert(verify_material(pos, strongSide, RookValueMg, 1)); - assert(verify_material(pos, weakSide, BishopValueMg, 0)); - - // Test for a rook pawn - if (pos.pieces(PAWN) & (FileABB | FileHBB)) - { - Square weakKing = pos.square(weakSide); - Square weakBishop = pos.square(weakSide); - Square strongKing = pos.square(strongSide); - Square strongPawn = pos.square(strongSide); - Rank pawnRank = relative_rank(strongSide, strongPawn); - Direction push = pawn_push(strongSide); - - // If the pawn is on the 5th rank and the pawn (currently) is on - // the same color square as the bishop then there is a chance of - // a fortress. Depending on the king position give a moderate - // reduction or a stronger one if the defending king is near the - // corner but not trapped there. - if (pawnRank == RANK_5 && !opposite_colors(weakBishop, strongPawn)) - { - int d = distance(strongPawn + 3 * push, weakKing); - - if (d <= 2 && !(d == 0 && weakKing == strongKing + 2 * push)) - return ScaleFactor(24); - else - return ScaleFactor(48); - } - - // When the pawn has moved to the 6th rank we can be fairly sure - // it's drawn if the bishop attacks the square in front of the - // pawn from a reasonable distance and the defending king is near - // the corner - if ( pawnRank == RANK_6 - && distance(strongPawn + 2 * push, weakKing) <= 1 - && (attacks_bb(weakBishop) & (strongPawn + push)) - && distance(weakBishop, strongPawn) >= 2) - return ScaleFactor(8); - } - - return SCALE_FACTOR_NONE; -} - -/// KRPP vs KRP. There is just a single rule: if the stronger side has no passed -/// pawns and the defending king is actively placed, the position is drawish. -template<> -ScaleFactor Endgame::operator()(const Position& pos) const { - - assert(verify_material(pos, strongSide, RookValueMg, 2)); - assert(verify_material(pos, weakSide, RookValueMg, 1)); - - Square strongPawn1 = lsb(pos.pieces(strongSide, PAWN)); - Square strongPawn2 = msb(pos.pieces(strongSide, PAWN)); - Square weakKing = pos.square(weakSide); - - // Does the stronger side have a passed pawn? - if (pos.pawn_passed(strongSide, strongPawn1) || pos.pawn_passed(strongSide, strongPawn2)) - return SCALE_FACTOR_NONE; - - Rank pawnRank = std::max(relative_rank(strongSide, strongPawn1), relative_rank(strongSide, strongPawn2)); - - if ( distance(weakKing, strongPawn1) <= 1 - && distance(weakKing, strongPawn2) <= 1 - && relative_rank(strongSide, weakKing) > pawnRank) - { - assert(pawnRank > RANK_1 && pawnRank < RANK_7); - return ScaleFactor(7 * pawnRank); - } - return SCALE_FACTOR_NONE; -} - - -/// K and two or more pawns vs K. There is just a single rule here: if all pawns -/// are on the same rook file and are blocked by the defending king, it's a draw. -template<> -ScaleFactor Endgame::operator()(const Position& pos) const { - - assert(pos.non_pawn_material(strongSide) == VALUE_ZERO); - assert(pos.count(strongSide) >= 2); - assert(verify_material(pos, weakSide, VALUE_ZERO, 0)); - - Square weakKing = pos.square(weakSide); - Bitboard strongPawns = pos.pieces(strongSide, PAWN); - - // If all pawns are ahead of the king on a single rook file, it's a draw. - if ( !(strongPawns & ~(FileABB | FileHBB)) - && !(strongPawns & ~passed_pawn_span(weakSide, weakKing))) - return SCALE_FACTOR_DRAW; - - return SCALE_FACTOR_NONE; -} - - -/// KBP vs KB. There are two rules: if the defending king is somewhere along the -/// path of the pawn, and the square of the king is not of the same color as the -/// stronger side's bishop, it's a draw. If the two bishops have opposite color, -/// it's almost always a draw. -template<> -ScaleFactor Endgame::operator()(const Position& pos) const { - - assert(verify_material(pos, strongSide, BishopValueMg, 1)); - assert(verify_material(pos, weakSide, BishopValueMg, 0)); - - Square strongPawn = pos.square(strongSide); - Square strongBishop = pos.square(strongSide); - Square weakBishop = pos.square(weakSide); - Square weakKing = pos.square(weakSide); - - // Case 1: Defending king blocks the pawn, and cannot be driven away - if ( (forward_file_bb(strongSide, strongPawn) & weakKing) - && ( opposite_colors(weakKing, strongBishop) - || relative_rank(strongSide, weakKing) <= RANK_6)) - return SCALE_FACTOR_DRAW; - - // Case 2: Opposite colored bishops - if (opposite_colors(strongBishop, weakBishop)) - return SCALE_FACTOR_DRAW; - - return SCALE_FACTOR_NONE; -} - - -/// KBPP vs KB. It detects a few basic draws with opposite-colored bishops -template<> -ScaleFactor Endgame::operator()(const Position& pos) const { - - assert(verify_material(pos, strongSide, BishopValueMg, 2)); - assert(verify_material(pos, weakSide, BishopValueMg, 0)); - - Square strongBishop = pos.square(strongSide); - Square weakBishop = pos.square(weakSide); - - if (!opposite_colors(strongBishop, weakBishop)) - return SCALE_FACTOR_NONE; - - Square weakKing = pos.square(weakSide); - Square strongPawn1 = lsb(pos.pieces(strongSide, PAWN)); - Square strongPawn2 = msb(pos.pieces(strongSide, PAWN)); - Square blockSq1, blockSq2; - - if (relative_rank(strongSide, strongPawn1) > relative_rank(strongSide, strongPawn2)) - { - blockSq1 = strongPawn1 + pawn_push(strongSide); - blockSq2 = make_square(file_of(strongPawn2), rank_of(strongPawn1)); - } - else - { - blockSq1 = strongPawn2 + pawn_push(strongSide); - blockSq2 = make_square(file_of(strongPawn1), rank_of(strongPawn2)); - } - - switch (distance(strongPawn1, strongPawn2)) - { - case 0: - // Both pawns are on the same file. It's an easy draw if the defender firmly - // controls some square in the frontmost pawn's path. - if ( file_of(weakKing) == file_of(blockSq1) - && relative_rank(strongSide, weakKing) >= relative_rank(strongSide, blockSq1) - && opposite_colors(weakKing, strongBishop)) - return SCALE_FACTOR_DRAW; - else - return SCALE_FACTOR_NONE; - - case 1: - // Pawns on adjacent files. It's a draw if the defender firmly controls the - // square in front of the frontmost pawn's path, and the square diagonally - // behind this square on the file of the other pawn. - if ( weakKing == blockSq1 - && opposite_colors(weakKing, strongBishop) - && ( weakBishop == blockSq2 - || (attacks_bb(blockSq2, pos.pieces()) & pos.pieces(weakSide, BISHOP)) - || distance(strongPawn1, strongPawn2) >= 2)) - return SCALE_FACTOR_DRAW; - - else if ( weakKing == blockSq2 - && opposite_colors(weakKing, strongBishop) - && ( weakBishop == blockSq1 - || (attacks_bb(blockSq1, pos.pieces()) & pos.pieces(weakSide, BISHOP)))) - return SCALE_FACTOR_DRAW; - else - return SCALE_FACTOR_NONE; - - default: - // The pawns are not on the same file or adjacent files. No scaling. - return SCALE_FACTOR_NONE; - } -} - - -/// KBP vs KN. There is a single rule: if the defending king is somewhere along -/// the path of the pawn, and the square of the king is not of the same color as -/// the stronger side's bishop, it's a draw. -template<> -ScaleFactor Endgame::operator()(const Position& pos) const { - - assert(verify_material(pos, strongSide, BishopValueMg, 1)); - assert(verify_material(pos, weakSide, KnightValueMg, 0)); - - Square strongPawn = pos.square(strongSide); - Square strongBishop = pos.square(strongSide); - Square weakKing = pos.square(weakSide); - - if ( file_of(weakKing) == file_of(strongPawn) - && relative_rank(strongSide, strongPawn) < relative_rank(strongSide, weakKing) - && ( opposite_colors(weakKing, strongBishop) - || relative_rank(strongSide, weakKing) <= RANK_6)) - return SCALE_FACTOR_DRAW; - - return SCALE_FACTOR_NONE; -} - - -/// KP vs KP. This is done by removing the weakest side's pawn and probing the -/// KP vs K bitbase: if the weakest side has a draw without the pawn, it probably -/// has at least a draw with the pawn as well. The exception is when the stronger -/// side's pawn is far advanced and not on a rook file; in this case it is often -/// possible to win (e.g. 8/4k3/3p4/3P4/6K1/8/8/8 w - - 0 1). -template<> -ScaleFactor Endgame::operator()(const Position& pos) const { - - assert(verify_material(pos, strongSide, VALUE_ZERO, 1)); - assert(verify_material(pos, weakSide, VALUE_ZERO, 1)); - - // Assume strongSide is white and the pawn is on files A-D - Square strongKing = normalize(pos, strongSide, pos.square(strongSide)); - Square weakKing = normalize(pos, strongSide, pos.square(weakSide)); - Square strongPawn = normalize(pos, strongSide, pos.square(strongSide)); - - Color us = strongSide == pos.side_to_move() ? WHITE : BLACK; - - // If the pawn has advanced to the fifth rank or further, and is not a - // rook pawn, it's too dangerous to assume that it's at least a draw. - if (rank_of(strongPawn) >= RANK_5 && file_of(strongPawn) != FILE_A) - return SCALE_FACTOR_NONE; - - // Probe the KPK bitbase with the weakest side's pawn removed. If it's a draw, - // it's probably at least a draw even with the pawn. - return Bitbases::probe(strongKing, strongPawn, weakKing, us) ? SCALE_FACTOR_NONE : SCALE_FACTOR_DRAW; -} - -} // namespace Stockfish diff --git a/src/endgame.h b/src/endgame.h deleted file mode 100644 index e79f696f..00000000 --- a/src/endgame.h +++ /dev/null @@ -1,126 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#ifndef ENDGAME_H_INCLUDED -#define ENDGAME_H_INCLUDED - -#include -#include -#include -#include -#include - -#include "position.h" -#include "types.h" - -namespace Stockfish { - -/// EndgameCode lists all supported endgame functions by corresponding codes - -enum EndgameCode { - - EVALUATION_FUNCTIONS, - KNNK, // KNN vs K - KNNKP, // KNN vs KP - KXK, // Generic "mate lone king" eval - KBNK, // KBN vs K - KPK, // KP vs K - KRKP, // KR vs KP - KRKB, // KR vs KB - KRKN, // KR vs KN - KQKP, // KQ vs KP - KQKR, // KQ vs KR - - SCALING_FUNCTIONS, - KBPsK, // KB and pawns vs K - KQKRPs, // KQ vs KR and pawns - KRPKR, // KRP vs KR - KRPKB, // KRP vs KB - KRPPKRP, // KRPP vs KRP - KPsK, // K and pawns vs K - KBPKB, // KBP vs KB - KBPPKB, // KBPP vs KB - KBPKN, // KBP vs KN - KPKP // KP vs KP -}; - - -/// Endgame functions can be of two types depending on whether they return a -/// Value or a ScaleFactor. - -template using -eg_type = typename std::conditional<(E < SCALING_FUNCTIONS), Value, ScaleFactor>::type; - - -/// Base and derived functors for endgame evaluation and scaling functions - -template -struct EndgameBase { - - explicit EndgameBase(Color c) : strongSide(c), weakSide(~c) {} - virtual ~EndgameBase() = default; - virtual T operator()(const Position&) const = 0; - - const Color strongSide, weakSide; -}; - - -template> -struct Endgame : public EndgameBase { - - explicit Endgame(Color c) : EndgameBase(c) {} - T operator()(const Position&) const override; -}; - - -/// The Endgames namespace handles the pointers to endgame evaluation and scaling -/// base objects in two std::map. We use polymorphism to invoke the actual -/// endgame function by calling its virtual operator(). - -namespace Endgames { - - template using Ptr = std::unique_ptr>; - template using Map = std::unordered_map>; - - extern std::pair, Map> maps; - - void init(); - - template - Map& map() { - return std::get::value>(maps); - } - - template> - void add(const std::string& code) { - - StateInfo st; - map()[Position().set(code, WHITE, &st).material_key()] = Ptr(new Endgame(WHITE)); - map()[Position().set(code, BLACK, &st).material_key()] = Ptr(new Endgame(BLACK)); - } - - template - const EndgameBase* probe(Key key) { - auto it = map().find(key); - return it != map().end() ? it->second.get() : nullptr; - } -} - -} // namespace Stockfish - -#endif // #ifndef ENDGAME_H_INCLUDED diff --git a/src/engine.cpp b/src/engine.cpp new file mode 100644 index 00000000..e8da24aa --- /dev/null +++ b/src/engine.cpp @@ -0,0 +1,187 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include "engine.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "evaluate.h" +#include "misc.h" +#include "nnue/network.h" +#include "nnue/nnue_common.h" +#include "perft.h" +#include "position.h" +#include "search.h" +#include "syzygy/tbprobe.h" +#include "types.h" +#include "uci.h" +#include "ucioption.h" + +namespace Stockfish { + +namespace NN = Eval::NNUE; + +constexpr auto StartFEN = "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1"; + +Engine::Engine(std::string path) : + binaryDirectory(CommandLine::get_binary_directory(path)), + states(new std::deque(1)), + networks(NN::Networks( + NN::NetworkBig({EvalFileDefaultNameBig, "None", ""}, NN::EmbeddedNNUEType::BIG), + NN::NetworkSmall({EvalFileDefaultNameSmall, "None", ""}, NN::EmbeddedNNUEType::SMALL))) { + pos.set(StartFEN, false, &states->back()); + capSq = SQ_NONE; +} + +std::uint64_t Engine::perft(const std::string& fen, Depth depth, bool isChess960) { + verify_networks(); + + return Benchmark::perft(fen, depth, isChess960); +} + +void Engine::go(Search::LimitsType& limits) { + assert(limits.perft == 0); + verify_networks(); + limits.capSq = capSq; + + threads.start_thinking(options, pos, states, limits); +} +void Engine::stop() { threads.stop = true; } + +void Engine::search_clear() { + wait_for_search_finished(); + + tt.clear(options["Threads"]); + threads.clear(); + + // @TODO wont work with multiple instances + Tablebases::init(options["SyzygyPath"]); // Free mapped files +} + +void Engine::set_on_update_no_moves(std::function&& f) { + updateContext.onUpdateNoMoves = std::move(f); +} + +void Engine::set_on_update_full(std::function&& f) { + updateContext.onUpdateFull = std::move(f); +} + +void Engine::set_on_iter(std::function&& f) { + updateContext.onIter = std::move(f); +} + +void Engine::set_on_bestmove(std::function&& f) { + updateContext.onBestmove = std::move(f); +} + +void Engine::wait_for_search_finished() { threads.main_thread()->wait_for_search_finished(); } + +void Engine::set_position(const std::string& fen, const std::vector& moves) { + // Drop the old state and create a new one + states = StateListPtr(new std::deque(1)); + pos.set(fen, options["UCI_Chess960"], &states->back()); + + capSq = SQ_NONE; + for (const auto& move : moves) + { + auto m = UCIEngine::to_move(pos, move); + + if (m == Move::none()) + break; + + states->emplace_back(); + pos.do_move(m, states->back()); + + capSq = SQ_NONE; + DirtyPiece& dp = states->back().dirtyPiece; + if (dp.dirty_num > 1 && dp.to[1] == SQ_NONE) + capSq = m.to_sq(); + } +} + +// modifiers + +void Engine::resize_threads() { threads.set({options, threads, tt, networks}, updateContext); } + +void Engine::set_tt_size(size_t mb) { + wait_for_search_finished(); + tt.resize(mb, options["Threads"]); +} + +void Engine::set_ponderhit(bool b) { threads.main_manager()->ponder = b; } + +// network related + +void Engine::verify_networks() const { + networks.big.verify(options["EvalFile"]); + networks.small.verify(options["EvalFileSmall"]); +} + +void Engine::load_networks() { + load_big_network(options["EvalFile"]); + load_small_network(options["EvalFileSmall"]); +} + +void Engine::load_big_network(const std::string& file) { + networks.big.load(binaryDirectory, file); + threads.clear(); +} + +void Engine::load_small_network(const std::string& file) { + networks.small.load(binaryDirectory, file); + threads.clear(); +} + +void Engine::save_network(const std::pair, std::string> files[2]) { + networks.big.save(files[0].first); + networks.small.save(files[1].first); +} + +// utility functions + +void Engine::trace_eval() const { + StateListPtr trace_states(new std::deque(1)); + Position p; + p.set(pos.fen(), options["UCI_Chess960"], &trace_states->back()); + + verify_networks(); + + sync_cout << "\n" << Eval::trace(p, networks) << sync_endl; +} + +OptionsMap& Engine::get_options() { return options; } + +std::string Engine::fen() const { return pos.fen(); } + +void Engine::flip() { pos.flip(); } + +std::string Engine::visualize() const { + std::stringstream ss; + ss << pos; + return ss.str(); +} + +} diff --git a/src/engine.h b/src/engine.h new file mode 100644 index 00000000..64a814cb --- /dev/null +++ b/src/engine.h @@ -0,0 +1,110 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifndef ENGINE_H_INCLUDED +#define ENGINE_H_INCLUDED + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "nnue/network.h" +#include "position.h" +#include "search.h" +#include "syzygy/tbprobe.h" // for Stockfish::Depth +#include "thread.h" +#include "tt.h" +#include "ucioption.h" + +namespace Stockfish { + +enum Square : int; + +class Engine { + public: + using InfoShort = Search::InfoShort; + using InfoFull = Search::InfoFull; + using InfoIter = Search::InfoIteration; + + Engine(std::string path = ""); + ~Engine() { wait_for_search_finished(); } + + std::uint64_t perft(const std::string& fen, Depth depth, bool isChess960); + + // non blocking call to start searching + void go(Search::LimitsType&); + // non blocking call to stop searching + void stop(); + + // blocking call to wait for search to finish + void wait_for_search_finished(); + // set a new position, moves are in UCI format + void set_position(const std::string& fen, const std::vector& moves); + + // modifiers + + void resize_threads(); + void set_tt_size(size_t mb); + void set_ponderhit(bool); + void search_clear(); + + void set_on_update_no_moves(std::function&&); + void set_on_update_full(std::function&&); + void set_on_iter(std::function&&); + void set_on_bestmove(std::function&&); + + // network related + + void verify_networks() const; + void load_networks(); + void load_big_network(const std::string& file); + void load_small_network(const std::string& file); + void save_network(const std::pair, std::string> files[2]); + + // utility functions + + void trace_eval() const; + OptionsMap& get_options(); + std::string fen() const; + void flip(); + std::string visualize() const; + + private: + const std::string binaryDirectory; + + Position pos; + StateListPtr states; + Square capSq; + + OptionsMap options; + ThreadPool threads; + TranspositionTable tt; + Eval::NNUE::Networks networks; + + Search::SearchManager::UpdateContext updateContext; +}; + +} // namespace Stockfish + + +#endif // #ifndef ENGINE_H_INCLUDED diff --git a/src/evaluate.cpp b/src/evaluate.cpp index 510931c6..98d3c216 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -16,1182 +16,110 @@ along with this program. If not, see . */ +#include "evaluate.h" + #include #include +#include #include -#include // For std::memset -#include #include -#include #include -#include -#include +#include +#include -#include "bitboard.h" -#include "evaluate.h" -#include "material.h" -#include "misc.h" -#include "pawns.h" -#include "thread.h" -#include "timeman.h" +#include "nnue/network.h" +#include "nnue/nnue_misc.h" +#include "position.h" +#include "types.h" #include "uci.h" -#include "incbin/incbin.h" - - -// Macro to embed the default efficiently updatable neural network (NNUE) file -// data in the engine binary (using incbin.h, by Dale Weiler). -// This macro invocation will declare the following three variables -// const unsigned char gEmbeddedNNUEData[]; // a pointer to the embedded data -// const unsigned char *const gEmbeddedNNUEEnd; // a marker to the end -// const unsigned int gEmbeddedNNUESize; // the size of the embedded file -// Note that this does not work in Microsoft Visual Studio. -#if !defined(_MSC_VER) && !defined(NNUE_EMBEDDING_OFF) - INCBIN(EmbeddedNNUE, EvalFileDefaultName); -#else - const unsigned char gEmbeddedNNUEData[1] = {0x0}; - const unsigned char *const gEmbeddedNNUEEnd = &gEmbeddedNNUEData[1]; - const unsigned int gEmbeddedNNUESize = 1; -#endif - - -using namespace std; +#include "nnue/nnue_accumulator.h" namespace Stockfish { -namespace Eval { - - bool useNNUE; - string currentEvalFileName = "None"; - - /// NNUE::init() tries to load a NNUE network at startup time, or when the engine - /// receives a UCI command "setoption name EvalFile value nn-[a-z0-9]{12}.nnue" - /// The name of the NNUE network is always retrieved from the EvalFile option. - /// We search the given network in three locations: internally (the default - /// network may be embedded in the binary), in the active working directory and - /// in the engine directory. Distro packagers may define the DEFAULT_NNUE_DIRECTORY - /// variable to have the engine search in a special directory in their distro. - - void NNUE::init() { - - useNNUE = Options["Use NNUE"]; - if (!useNNUE) - return; - - string eval_file = string(Options["EvalFile"]); - if (eval_file.empty()) - eval_file = EvalFileDefaultName; - - #if defined(DEFAULT_NNUE_DIRECTORY) - #define stringify2(x) #x - #define stringify(x) stringify2(x) - vector dirs = { "" , "" , CommandLine::binaryDirectory , stringify(DEFAULT_NNUE_DIRECTORY) }; - #else - vector dirs = { "" , "" , CommandLine::binaryDirectory }; - #endif - - for (string directory : dirs) - if (currentEvalFileName != eval_file) - { - if (directory != "") - { - ifstream stream(directory + eval_file, ios::binary); - if (load_eval(eval_file, stream)) - currentEvalFileName = eval_file; - } - - if (directory == "" && eval_file == EvalFileDefaultName) - { - // C++ way to prepare a buffer for a memory stream - class MemoryBuffer : public basic_streambuf { - public: MemoryBuffer(char* p, size_t n) { setg(p, p, p + n); setp(p, p + n); } - }; - - MemoryBuffer buffer(const_cast(reinterpret_cast(gEmbeddedNNUEData)), - size_t(gEmbeddedNNUESize)); - (void) gEmbeddedNNUEEnd; // Silence warning on unused variable - - istream stream(&buffer); - if (load_eval(eval_file, stream)) - currentEvalFileName = eval_file; - } - } - } - - /// NNUE::verify() verifies that the last net used was loaded successfully - void NNUE::verify() { - - string eval_file = string(Options["EvalFile"]); - if (eval_file.empty()) - eval_file = EvalFileDefaultName; - - if (useNNUE && currentEvalFileName != eval_file) - { - - string msg1 = "If the UCI option \"Use NNUE\" is set to true, network evaluation parameters compatible with the engine must be available."; - string msg2 = "The option is set to true, but the network file " + eval_file + " was not loaded successfully."; - string msg3 = "The UCI option EvalFile might need to specify the full path, including the directory name, to the network file."; - string msg4 = "The default net can be downloaded from: https://tests.stockfishchess.org/api/nn/" + std::string(EvalFileDefaultName); - string msg5 = "The engine will be terminated now."; - - sync_cout << "info string ERROR: " << msg1 << sync_endl; - sync_cout << "info string ERROR: " << msg2 << sync_endl; - sync_cout << "info string ERROR: " << msg3 << sync_endl; - sync_cout << "info string ERROR: " << msg4 << sync_endl; - sync_cout << "info string ERROR: " << msg5 << sync_endl; - - exit(EXIT_FAILURE); - } - - if (useNNUE) - sync_cout << "info string NNUE evaluation using " << eval_file << " enabled" << sync_endl; - else - sync_cout << "info string classical evaluation enabled" << sync_endl; - } +// Returns a static, purely materialistic evaluation of the position from +// the point of view of the given color. It can be divided by PawnValue to get +// an approximation of the material advantage on the board in terms of pawns. +int Eval::simple_eval(const Position& pos, Color c) { + return PawnValue * (pos.count(c) - pos.count(~c)) + + (pos.non_pawn_material(c) - pos.non_pawn_material(~c)); } -namespace Trace { - enum Tracing { NO_TRACE, TRACE }; - - enum Term { // The first 8 entries are reserved for PieceType - MATERIAL = 8, IMBALANCE, MOBILITY, THREAT, PASSED, SPACE, WINNABLE, TOTAL, TERM_NB - }; - - Score scores[TERM_NB][COLOR_NB]; - - double to_cp(Value v) { return double(v) / PawnValueEg; } - - void add(int idx, Color c, Score s) { - scores[idx][c] = s; - } - - void add(int idx, Score w, Score b = SCORE_ZERO) { - scores[idx][WHITE] = w; - scores[idx][BLACK] = b; - } - - std::ostream& operator<<(std::ostream& os, Score s) { - os << std::setw(5) << to_cp(mg_value(s)) << " " - << std::setw(5) << to_cp(eg_value(s)); - return os; - } - - std::ostream& operator<<(std::ostream& os, Term t) { - - if (t == MATERIAL || t == IMBALANCE || t == WINNABLE || t == TOTAL) - os << " ---- ----" << " | " << " ---- ----"; - else - os << scores[t][WHITE] << " | " << scores[t][BLACK]; - - os << " | " << scores[t][WHITE] - scores[t][BLACK] << " |\n"; - return os; - } -} - -using namespace Trace; - -namespace { - - // Threshold for lazy and space evaluation - constexpr Value LazyThreshold1 = Value(3631); - constexpr Value LazyThreshold2 = Value(2084); - constexpr Value SpaceThreshold = Value(11551); - - // KingAttackWeights[PieceType] contains king attack weights by piece type - constexpr int KingAttackWeights[PIECE_TYPE_NB] = { 0, 0, 81, 52, 44, 10 }; - - // SafeCheck[PieceType][single/multiple] contains safe check bonus by piece type, - // higher if multiple safe checks are possible for that piece type. - constexpr int SafeCheck[][2] = { - {}, {}, {803, 1292}, {639, 974}, {1087, 1878}, {759, 1132} - }; - -#define S(mg, eg) make_score(mg, eg) - - // MobilityBonus[PieceType-2][attacked] contains bonuses for middle and end game, - // indexed by piece type and number of attacked squares in the mobility area. - constexpr Score MobilityBonus[][32] = { - { S(-62,-79), S(-53,-57), S(-12,-31), S( -3,-17), S( 3, 7), S( 12, 13), // Knight - S( 21, 16), S( 28, 21), S( 37, 26) }, - { S(-47,-59), S(-20,-25), S( 14, -8), S( 29, 12), S( 39, 21), S( 53, 40), // Bishop - S( 53, 56), S( 60, 58), S( 62, 65), S( 69, 72), S( 78, 78), S( 83, 87), - S( 91, 88), S( 96, 98) }, - { S(-60,-82), S(-24,-15), S( 0, 17) ,S( 3, 43), S( 4, 72), S( 14,100), // Rook - S( 20,102), S( 30,122), S( 41,133), S(41 ,139), S( 41,153), S( 45,160), - S( 57,165), S( 58,170), S( 67,175) }, - { S(-29,-49), S(-16,-29), S( -8, -8), S( -8, 17), S( 18, 39), S( 25, 54), // Queen - S( 23, 59), S( 37, 73), S( 41, 76), S( 54, 95), S( 65, 95) ,S( 68,101), - S( 69,124), S( 70,128), S( 70,132), S( 70,133) ,S( 71,136), S( 72,140), - S( 74,147), S( 76,149), S( 90,153), S(104,169), S(105,171), S(106,171), - S(112,178), S(114,185), S(114,187), S(119,221) } - }; - - // BishopPawns[distance from edge] contains a file-dependent penalty for pawns on - // squares of the same color as our bishop. - constexpr Score BishopPawns[int(FILE_NB) / 2] = { - S(3, 8), S(3, 9), S(2, 8), S(3, 8) - }; - - // KingProtector[knight/bishop] contains penalty for each distance unit to own king - constexpr Score KingProtector[] = { S(8, 9), S(6, 9) }; - - // Outpost[knight/bishop] contains bonuses for each knight or bishop occupying a - // pawn protected square on rank 4 to 6 which is also safe from a pawn attack. - constexpr Score Outpost[] = { S(57, 38), S(31, 24) }; - - // PassedRank[Rank] contains a bonus according to the rank of a passed pawn - constexpr Score PassedRank[RANK_NB] = { - S(0, 0), S(7, 27), S(16, 32), S(17, 40), S(64, 71), S(170, 174), S(278, 262) - }; - - constexpr Score RookOnClosedFile = S(10, 5); - constexpr Score RookOnOpenFile[] = { S(19, 6), S(47, 26) }; - - // ThreatByMinor/ByRook[attacked PieceType] contains bonuses according to - // which piece type attacks which one. Attacks on lesser pieces which are - // pawn-defended are not considered. - constexpr Score ThreatByMinor[PIECE_TYPE_NB] = { - S(0, 0), S(5, 32), S(55, 41), S(77, 56), S(89, 119), S(79, 162) - }; - - constexpr Score ThreatByRook[PIECE_TYPE_NB] = { - S(0, 0), S(3, 44), S(37, 68), S(42, 60), S(0, 39), S(58, 43) - }; - - constexpr Value CorneredBishop = Value(50); - - // Assorted bonuses and penalties - constexpr Score UncontestedOutpost = S( 1, 10); - constexpr Score BishopOnKingRing = S( 24, 0); - constexpr Score BishopXRayPawns = S( 4, 5); - constexpr Score FlankAttacks = S( 8, 0); - constexpr Score Hanging = S( 69, 36); - constexpr Score KnightOnQueen = S( 16, 11); - constexpr Score LongDiagonalBishop = S( 45, 0); - constexpr Score MinorBehindPawn = S( 18, 3); - constexpr Score PassedFile = S( 11, 8); - constexpr Score PawnlessFlank = S( 17, 95); - constexpr Score ReachableOutpost = S( 31, 22); - constexpr Score RestrictedPiece = S( 7, 7); - constexpr Score RookOnKingRing = S( 16, 0); - constexpr Score SliderOnQueen = S( 60, 18); - constexpr Score ThreatByKing = S( 24, 89); - constexpr Score ThreatByPawnPush = S( 48, 39); - constexpr Score ThreatBySafePawn = S(173, 94); - constexpr Score TrappedRook = S( 55, 13); - constexpr Score WeakQueenProtection = S( 14, 0); - constexpr Score WeakQueen = S( 56, 15); - - -#undef S - - // Evaluation class computes and stores attacks tables and other working data - template - class Evaluation { - - public: - Evaluation() = delete; - explicit Evaluation(const Position& p) : pos(p) {} - Evaluation& operator=(const Evaluation&) = delete; - Value value(); - - private: - template void initialize(); - template Score pieces(); - template Score king() const; - template Score threats() const; - template Score passed() const; - template Score space() const; - Value winnable(Score score) const; - - const Position& pos; - Material::Entry* me; - Pawns::Entry* pe; - Bitboard mobilityArea[COLOR_NB]; - Score mobility[COLOR_NB] = { SCORE_ZERO, SCORE_ZERO }; - - // attackedBy[color][piece type] is a bitboard representing all squares - // attacked by a given color and piece type. Special "piece types" which - // is also calculated is ALL_PIECES. - Bitboard attackedBy[COLOR_NB][PIECE_TYPE_NB]; - - // attackedBy2[color] are the squares attacked by at least 2 units of a given - // color, including x-rays. But diagonal x-rays through pawns are not computed. - Bitboard attackedBy2[COLOR_NB]; - - // kingRing[color] are the squares adjacent to the king plus some other - // very near squares, depending on king position. - Bitboard kingRing[COLOR_NB]; - - // kingAttackersCount[color] is the number of pieces of the given color - // which attack a square in the kingRing of the enemy king. - int kingAttackersCount[COLOR_NB]; - - // kingAttackersWeight[color] is the sum of the "weights" of the pieces of - // the given color which attack a square in the kingRing of the enemy king. - // The weights of the individual piece types are given by the elements in - // the KingAttackWeights array. - int kingAttackersWeight[COLOR_NB]; - - // kingAttacksCount[color] is the number of attacks by the given color to - // squares directly adjacent to the enemy king. Pieces which attack more - // than one square are counted multiple times. For instance, if there is - // a white knight on g5 and black's king is on g8, this white knight adds 2 - // to kingAttacksCount[WHITE]. - int kingAttacksCount[COLOR_NB]; - }; - - - // Evaluation::initialize() computes king and pawn attacks, and the king ring - // bitboard for a given color. This is done at the beginning of the evaluation. - - template template - void Evaluation::initialize() { - - constexpr Color Them = ~Us; - constexpr Direction Up = pawn_push(Us); - constexpr Direction Down = -Up; - constexpr Bitboard LowRanks = (Us == WHITE ? Rank2BB | Rank3BB : Rank7BB | Rank6BB); - - const Square ksq = pos.square(Us); - - Bitboard dblAttackByPawn = pawn_double_attacks_bb(pos.pieces(Us, PAWN)); - - // Find our pawns that are blocked or on the first two ranks - Bitboard b = pos.pieces(Us, PAWN) & (shift(pos.pieces()) | LowRanks); - - // Squares occupied by those pawns, by our king or queen, by blockers to attacks on our king - // or controlled by enemy pawns are excluded from the mobility area. - mobilityArea[Us] = ~(b | pos.pieces(Us, KING, QUEEN) | pos.blockers_for_king(Us) | pe->pawn_attacks(Them)); - - // Initialize attackedBy[] for king and pawns - attackedBy[Us][KING] = attacks_bb(ksq); - attackedBy[Us][PAWN] = pe->pawn_attacks(Us); - attackedBy[Us][ALL_PIECES] = attackedBy[Us][KING] | attackedBy[Us][PAWN]; - attackedBy2[Us] = dblAttackByPawn | (attackedBy[Us][KING] & attackedBy[Us][PAWN]); - - // Init our king safety tables - Square s = make_square(std::clamp(file_of(ksq), FILE_B, FILE_G), - std::clamp(rank_of(ksq), RANK_2, RANK_7)); - kingRing[Us] = attacks_bb(s) | s; - - kingAttackersCount[Them] = popcount(kingRing[Us] & pe->pawn_attacks(Them)); - kingAttacksCount[Them] = kingAttackersWeight[Them] = 0; - - // Remove from kingRing[] the squares defended by two pawns - kingRing[Us] &= ~dblAttackByPawn; - } - - - // Evaluation::pieces() scores pieces of a given color and type - - template template - Score Evaluation::pieces() { - - constexpr Color Them = ~Us; - constexpr Direction Down = -pawn_push(Us); - constexpr Bitboard OutpostRanks = (Us == WHITE ? Rank4BB | Rank5BB | Rank6BB - : Rank5BB | Rank4BB | Rank3BB); - Bitboard b1 = pos.pieces(Us, Pt); - Bitboard b, bb; - Score score = SCORE_ZERO; - - attackedBy[Us][Pt] = 0; - - while (b1) - { - Square s = pop_lsb(b1); - - // Find attacked squares, including x-ray attacks for bishops and rooks - b = Pt == BISHOP ? attacks_bb(s, pos.pieces() ^ pos.pieces(QUEEN)) - : Pt == ROOK ? attacks_bb< ROOK>(s, pos.pieces() ^ pos.pieces(QUEEN) ^ pos.pieces(Us, ROOK)) - : attacks_bb(s, pos.pieces()); - - if (pos.blockers_for_king(Us) & s) - b &= line_bb(pos.square(Us), s); - - attackedBy2[Us] |= attackedBy[Us][ALL_PIECES] & b; - attackedBy[Us][Pt] |= b; - attackedBy[Us][ALL_PIECES] |= b; - - if (b & kingRing[Them]) - { - kingAttackersCount[Us]++; - kingAttackersWeight[Us] += KingAttackWeights[Pt]; - kingAttacksCount[Us] += popcount(b & attackedBy[Them][KING]); - } - - else if (Pt == ROOK && (file_bb(s) & kingRing[Them])) - score += RookOnKingRing; - - else if (Pt == BISHOP && (attacks_bb(s, pos.pieces(PAWN)) & kingRing[Them])) - score += BishopOnKingRing; - - int mob = popcount(b & mobilityArea[Us]); - mobility[Us] += MobilityBonus[Pt - 2][mob]; - - if (Pt == BISHOP || Pt == KNIGHT) - { - // Bonus if the piece is on an outpost square or can reach one - // Bonus for knights (UncontestedOutpost) if few relevant targets - bb = OutpostRanks & (attackedBy[Us][PAWN] | shift(pos.pieces(PAWN))) - & ~pe->pawn_attacks_span(Them); - Bitboard targets = pos.pieces(Them) & ~pos.pieces(PAWN); - - if ( Pt == KNIGHT - && bb & s & ~CenterFiles // on a side outpost - && !(b & targets) // no relevant attacks - && (!more_than_one(targets & (s & QueenSide ? QueenSide : KingSide)))) - score += UncontestedOutpost * popcount(pos.pieces(PAWN) & (s & QueenSide ? QueenSide : KingSide)); - else if (bb & s) - score += Outpost[Pt == BISHOP]; - else if (Pt == KNIGHT && bb & b & ~pos.pieces(Us)) - score += ReachableOutpost; - - // Bonus for a knight or bishop shielded by pawn - if (shift(pos.pieces(PAWN)) & s) - score += MinorBehindPawn; - - // Penalty if the piece is far from the king - score -= KingProtector[Pt == BISHOP] * distance(pos.square(Us), s); - - if constexpr (Pt == BISHOP) - { - // Penalty according to the number of our pawns on the same color square as the - // bishop, bigger when the center files are blocked with pawns and smaller - // when the bishop is outside the pawn chain. - Bitboard blocked = pos.pieces(Us, PAWN) & shift(pos.pieces()); - - score -= BishopPawns[edge_distance(file_of(s))] * pos.pawns_on_same_color_squares(Us, s) - * (!(attackedBy[Us][PAWN] & s) + popcount(blocked & CenterFiles)); - - // Penalty for all enemy pawns x-rayed - score -= BishopXRayPawns * popcount(attacks_bb(s) & pos.pieces(Them, PAWN)); - - // Bonus for bishop on a long diagonal which can "see" both center squares - if (more_than_one(attacks_bb(s, pos.pieces(PAWN)) & Center)) - score += LongDiagonalBishop; - - // An important Chess960 pattern: a cornered bishop blocked by a friendly - // pawn diagonally in front of it is a very serious problem, especially - // when that pawn is also blocked. - if ( pos.is_chess960() - && (s == relative_square(Us, SQ_A1) || s == relative_square(Us, SQ_H1))) - { - Direction d = pawn_push(Us) + (file_of(s) == FILE_A ? EAST : WEST); - if (pos.piece_on(s + d) == make_piece(Us, PAWN)) - score -= !pos.empty(s + d + pawn_push(Us)) ? 4 * make_score(CorneredBishop, CorneredBishop) - : 3 * make_score(CorneredBishop, CorneredBishop); - } - } - } - - if constexpr (Pt == ROOK) - { - // Bonuses for rook on a (semi-)open or closed file - if (pos.is_on_semiopen_file(Us, s)) - { - score += RookOnOpenFile[pos.is_on_semiopen_file(Them, s)]; - } - else - { - // If our pawn on this file is blocked, increase penalty - if ( pos.pieces(Us, PAWN) - & shift(pos.pieces()) - & file_bb(s)) - { - score -= RookOnClosedFile; - } - - // Penalty when trapped by the king, even more if the king cannot castle - if (mob <= 3) - { - File kf = file_of(pos.square(Us)); - if ((kf < FILE_E) == (file_of(s) < kf)) - score -= TrappedRook * (1 + !pos.castling_rights(Us)); - } - } - } - - if constexpr (Pt == QUEEN) - { - // Penalty if any relative pin or discovered attack against the queen - Bitboard queenPinners; - if (pos.slider_blockers(pos.pieces(Them, ROOK, BISHOP), s, queenPinners)) - score -= WeakQueen; - } - } - if constexpr (T) - Trace::add(Pt, Us, score); - - return score; - } - - - // Evaluation::king() assigns bonuses and penalties to a king of a given color - - template template - Score Evaluation::king() const { - - constexpr Color Them = ~Us; - constexpr Bitboard Camp = (Us == WHITE ? AllSquares ^ Rank6BB ^ Rank7BB ^ Rank8BB - : AllSquares ^ Rank1BB ^ Rank2BB ^ Rank3BB); - - Bitboard weak, b1, b2, b3, safe, unsafeChecks = 0; - Bitboard rookChecks, queenChecks, bishopChecks, knightChecks; - int kingDanger = 0; - const Square ksq = pos.square(Us); - - // Init the score with king shelter and enemy pawns storm - Score score = pe->king_safety(pos); - - // Attacked squares defended at most once by our queen or king - weak = attackedBy[Them][ALL_PIECES] - & ~attackedBy2[Us] - & (~attackedBy[Us][ALL_PIECES] | attackedBy[Us][KING] | attackedBy[Us][QUEEN]); - - // Analyse the safe enemy's checks which are possible on next move - safe = ~pos.pieces(Them); - safe &= ~attackedBy[Us][ALL_PIECES] | (weak & attackedBy2[Them]); - - b1 = attacks_bb(ksq, pos.pieces() ^ pos.pieces(Us, QUEEN)); - b2 = attacks_bb(ksq, pos.pieces() ^ pos.pieces(Us, QUEEN)); - - // Enemy rooks checks - rookChecks = b1 & attackedBy[Them][ROOK] & safe; - if (rookChecks) - kingDanger += SafeCheck[ROOK][more_than_one(rookChecks)]; - else - unsafeChecks |= b1 & attackedBy[Them][ROOK]; - - // Enemy queen safe checks: count them only if the checks are from squares from - // which opponent cannot give a rook check, because rook checks are more valuable. - queenChecks = (b1 | b2) & attackedBy[Them][QUEEN] & safe - & ~(attackedBy[Us][QUEEN] | rookChecks); - if (queenChecks) - kingDanger += SafeCheck[QUEEN][more_than_one(queenChecks)]; - - // Enemy bishops checks: count them only if they are from squares from which - // opponent cannot give a queen check, because queen checks are more valuable. - bishopChecks = b2 & attackedBy[Them][BISHOP] & safe - & ~queenChecks; - if (bishopChecks) - kingDanger += SafeCheck[BISHOP][more_than_one(bishopChecks)]; - - else - unsafeChecks |= b2 & attackedBy[Them][BISHOP]; - - // Enemy knights checks - knightChecks = attacks_bb(ksq) & attackedBy[Them][KNIGHT]; - if (knightChecks & safe) - kingDanger += SafeCheck[KNIGHT][more_than_one(knightChecks & safe)]; - else - unsafeChecks |= knightChecks; - - // Find the squares that opponent attacks in our king flank, the squares - // which they attack twice in that flank, and the squares that we defend. - b1 = attackedBy[Them][ALL_PIECES] & KingFlank[file_of(ksq)] & Camp; - b2 = b1 & attackedBy2[Them]; - b3 = attackedBy[Us][ALL_PIECES] & KingFlank[file_of(ksq)] & Camp; - - int kingFlankAttack = popcount(b1) + popcount(b2); - int kingFlankDefense = popcount(b3); - - kingDanger += kingAttackersCount[Them] * kingAttackersWeight[Them] // (~10 Elo) - + 183 * popcount(kingRing[Us] & weak) // (~15 Elo) - + 148 * popcount(unsafeChecks) // (~4 Elo) - + 98 * popcount(pos.blockers_for_king(Us)) // (~2 Elo) - + 69 * kingAttacksCount[Them] // (~0.5 Elo) - + 3 * kingFlankAttack * kingFlankAttack / 8 // (~0.5 Elo) - + mg_value(mobility[Them] - mobility[Us]) // (~0.5 Elo) - - 873 * !pos.count(Them) // (~24 Elo) - - 100 * bool(attackedBy[Us][KNIGHT] & attackedBy[Us][KING]) // (~5 Elo) - - 6 * mg_value(score) / 8 // (~8 Elo) - - 4 * kingFlankDefense // (~5 Elo) - + 37; // (~0.5 Elo) - - // Transform the kingDanger units into a Score, and subtract it from the evaluation - if (kingDanger > 100) - score -= make_score(kingDanger * kingDanger / 4096, kingDanger / 16); - - // Penalty when our king is on a pawnless flank - if (!(pos.pieces(PAWN) & KingFlank[file_of(ksq)])) - score -= PawnlessFlank; - - // Penalty if king flank is under attack, potentially moving toward the king - score -= FlankAttacks * kingFlankAttack; - - if constexpr (T) - Trace::add(KING, Us, score); - - return score; - } - - - // Evaluation::threats() assigns bonuses according to the types of the - // attacking and the attacked pieces. - - template template - Score Evaluation::threats() const { - - constexpr Color Them = ~Us; - constexpr Direction Up = pawn_push(Us); - constexpr Bitboard TRank3BB = (Us == WHITE ? Rank3BB : Rank6BB); - - Bitboard b, weak, defended, nonPawnEnemies, stronglyProtected, safe; - Score score = SCORE_ZERO; - - // Non-pawn enemies - nonPawnEnemies = pos.pieces(Them) & ~pos.pieces(PAWN); - - // Squares strongly protected by the enemy, either because they defend the - // square with a pawn, or because they defend the square twice and we don't. - stronglyProtected = attackedBy[Them][PAWN] - | (attackedBy2[Them] & ~attackedBy2[Us]); - - // Non-pawn enemies, strongly protected - defended = nonPawnEnemies & stronglyProtected; - - // Enemies not strongly protected and under our attack - weak = pos.pieces(Them) & ~stronglyProtected & attackedBy[Us][ALL_PIECES]; - - // Bonus according to the kind of attacking pieces - if (defended | weak) - { - b = (defended | weak) & (attackedBy[Us][KNIGHT] | attackedBy[Us][BISHOP]); - while (b) - score += ThreatByMinor[type_of(pos.piece_on(pop_lsb(b)))]; - - b = weak & attackedBy[Us][ROOK]; - while (b) - score += ThreatByRook[type_of(pos.piece_on(pop_lsb(b)))]; - - if (weak & attackedBy[Us][KING]) - score += ThreatByKing; - - b = ~attackedBy[Them][ALL_PIECES] - | (nonPawnEnemies & attackedBy2[Us]); - score += Hanging * popcount(weak & b); - - // Additional bonus if weak piece is only protected by a queen - score += WeakQueenProtection * popcount(weak & attackedBy[Them][QUEEN]); - } - - // Bonus for restricting their piece moves - b = attackedBy[Them][ALL_PIECES] - & ~stronglyProtected - & attackedBy[Us][ALL_PIECES]; - score += RestrictedPiece * popcount(b); - - // Protected or unattacked squares - safe = ~attackedBy[Them][ALL_PIECES] | attackedBy[Us][ALL_PIECES]; - - // Bonus for attacking enemy pieces with our relatively safe pawns - b = pos.pieces(Us, PAWN) & safe; - b = pawn_attacks_bb(b) & nonPawnEnemies; - score += ThreatBySafePawn * popcount(b); - - // Find squares where our pawns can push on the next move - b = shift(pos.pieces(Us, PAWN)) & ~pos.pieces(); - b |= shift(b & TRank3BB) & ~pos.pieces(); - - // Keep only the squares which are relatively safe - b &= ~attackedBy[Them][PAWN] & safe; - - // Bonus for safe pawn threats on the next move - b = pawn_attacks_bb(b) & nonPawnEnemies; - score += ThreatByPawnPush * popcount(b); - - // Bonus for threats on the next moves against enemy queen - if (pos.count(Them) == 1) - { - bool queenImbalance = pos.count() == 1; - - Square s = pos.square(Them); - safe = mobilityArea[Us] - & ~pos.pieces(Us, PAWN) - & ~stronglyProtected; - - b = attackedBy[Us][KNIGHT] & attacks_bb(s); - - score += KnightOnQueen * popcount(b & safe) * (1 + queenImbalance); - - b = (attackedBy[Us][BISHOP] & attacks_bb(s, pos.pieces())) - | (attackedBy[Us][ROOK ] & attacks_bb(s, pos.pieces())); - - score += SliderOnQueen * popcount(b & safe & attackedBy2[Us]) * (1 + queenImbalance); - } - - if constexpr (T) - Trace::add(THREAT, Us, score); - - return score; - } - - // Evaluation::passed() evaluates the passed pawns and candidate passed - // pawns of the given color. - - template template - Score Evaluation::passed() const { - - constexpr Color Them = ~Us; - constexpr Direction Up = pawn_push(Us); - constexpr Direction Down = -Up; - - auto king_proximity = [&](Color c, Square s) { - return std::min(distance(pos.square(c), s), 5); - }; - - Bitboard b, bb, squaresToQueen, unsafeSquares, blockedPassers, helpers; - Score score = SCORE_ZERO; - - b = pe->passed_pawns(Us); - - blockedPassers = b & shift(pos.pieces(Them, PAWN)); - if (blockedPassers) - { - helpers = shift(pos.pieces(Us, PAWN)) - & ~pos.pieces(Them) - & (~attackedBy2[Them] | attackedBy[Us][ALL_PIECES]); - - // Remove blocked candidate passers that don't have help to pass - b &= ~blockedPassers - | shift(helpers) - | shift(helpers); - } - - while (b) - { - Square s = pop_lsb(b); - - assert(!(pos.pieces(Them, PAWN) & forward_file_bb(Us, s + Up))); - - int r = relative_rank(Us, s); - - Score bonus = PassedRank[r]; - - if (r > RANK_3) - { - int w = 5 * r - 13; - Square blockSq = s + Up; - - // Adjust bonus based on the king's proximity - bonus += make_score(0, ( king_proximity(Them, blockSq) * 19 / 4 - - king_proximity(Us, blockSq) * 2) * w); - - // If blockSq is not the queening square then consider also a second push - if (r != RANK_7) - bonus -= make_score(0, king_proximity(Us, blockSq + Up) * w); - - // If the pawn is free to advance, then increase the bonus - if (pos.empty(blockSq)) - { - squaresToQueen = forward_file_bb(Us, s); - unsafeSquares = passed_pawn_span(Us, s); - - bb = forward_file_bb(Them, s) & pos.pieces(ROOK, QUEEN); - - if (!(pos.pieces(Them) & bb)) - unsafeSquares &= attackedBy[Them][ALL_PIECES] | pos.pieces(Them); - - // If there are no enemy pieces or attacks on passed pawn span, assign a big bonus. - // Or if there is some, but they are all attacked by our pawns, assign a bit smaller bonus. - // Otherwise assign a smaller bonus if the path to queen is not attacked - // and even smaller bonus if it is attacked but block square is not. - int k = !unsafeSquares ? 36 : - !(unsafeSquares & ~attackedBy[Us][PAWN]) ? 30 : - !(unsafeSquares & squaresToQueen) ? 17 : - !(unsafeSquares & blockSq) ? 7 : - 0 ; - - // Assign a larger bonus if the block square is defended - if ((pos.pieces(Us) & bb) || (attackedBy[Us][ALL_PIECES] & blockSq)) - k += 5; - - bonus += make_score(k * w, k * w); - } - } // r > RANK_3 - - score += bonus - PassedFile * edge_distance(file_of(s)); - } - - if constexpr (T) - Trace::add(PASSED, Us, score); - - return score; - } - - - // Evaluation::space() computes a space evaluation for a given side, aiming to improve game - // play in the opening. It is based on the number of safe squares on the four central files - // on ranks 2 to 4. Completely safe squares behind a friendly pawn are counted twice. - // Finally, the space bonus is multiplied by a weight which decreases according to occupancy. - - template template - Score Evaluation::space() const { - - // Early exit if, for example, both queens or 6 minor pieces have been exchanged - if (pos.non_pawn_material() < SpaceThreshold) - return SCORE_ZERO; - - constexpr Color Them = ~Us; - constexpr Direction Down = -pawn_push(Us); - constexpr Bitboard SpaceMask = - Us == WHITE ? CenterFiles & (Rank2BB | Rank3BB | Rank4BB) - : CenterFiles & (Rank7BB | Rank6BB | Rank5BB); - - // Find the available squares for our pieces inside the area defined by SpaceMask - Bitboard safe = SpaceMask - & ~pos.pieces(Us, PAWN) - & ~attackedBy[Them][PAWN]; - - // Find all squares which are at most three squares behind some friendly pawn - Bitboard behind = pos.pieces(Us, PAWN); - behind |= shift(behind); - behind |= shift(behind); - - // Compute space score based on the number of safe squares and number of our pieces - // increased with number of total blocked pawns in position. - int bonus = popcount(safe) + popcount(behind & safe & ~attackedBy[Them][ALL_PIECES]); - int weight = pos.count(Us) - 3 + std::min(pe->blocked_count(), 9); - Score score = make_score(bonus * weight * weight / 16, 0); - - if constexpr (T) - Trace::add(SPACE, Us, score); - - return score; - } - - - // Evaluation::winnable() adjusts the midgame and endgame score components, based on - // the known attacking/defending status of the players. The final value is derived - // by interpolation from the midgame and endgame values. - - template - Value Evaluation::winnable(Score score) const { - - int outflanking = distance(pos.square(WHITE), pos.square(BLACK)) - + int(rank_of(pos.square(WHITE)) - rank_of(pos.square(BLACK))); - - bool pawnsOnBothFlanks = (pos.pieces(PAWN) & QueenSide) - && (pos.pieces(PAWN) & KingSide); - - bool almostUnwinnable = outflanking < 0 - && !pawnsOnBothFlanks; - - bool infiltration = rank_of(pos.square(WHITE)) > RANK_4 - || rank_of(pos.square(BLACK)) < RANK_5; - - // Compute the initiative bonus for the attacking side - int complexity = 9 * pe->passed_count() - + 12 * pos.count() - + 9 * outflanking - + 21 * pawnsOnBothFlanks - + 24 * infiltration - + 51 * !pos.non_pawn_material() - - 43 * almostUnwinnable - -110 ; - - Value mg = mg_value(score); - Value eg = eg_value(score); - - // Now apply the bonus: note that we find the attacking side by extracting the - // sign of the midgame or endgame values, and that we carefully cap the bonus - // so that the midgame and endgame scores do not change sign after the bonus. - int u = ((mg > 0) - (mg < 0)) * std::clamp(complexity + 50, -abs(mg), 0); - int v = ((eg > 0) - (eg < 0)) * std::max(complexity, -abs(eg)); - - mg += u; - eg += v; - - // Compute the scale factor for the winning side - Color strongSide = eg > VALUE_DRAW ? WHITE : BLACK; - int sf = me->scale_factor(pos, strongSide); - - // If scale factor is not already specific, scale up/down via general heuristics - if (sf == SCALE_FACTOR_NORMAL) - { - if (pos.opposite_bishops()) - { - // For pure opposite colored bishops endgames use scale factor - // based on the number of passed pawns of the strong side. - if ( pos.non_pawn_material(WHITE) == BishopValueMg - && pos.non_pawn_material(BLACK) == BishopValueMg) - sf = 18 + 4 * popcount(pe->passed_pawns(strongSide)); - // For every other opposite colored bishops endgames use scale factor - // based on the number of all pieces of the strong side. - else - sf = 22 + 3 * pos.count(strongSide); - } - // For rook endgames with strong side not having overwhelming pawn number advantage - // and its pawns being on one flank and weak side protecting its pieces with a king - // use lower scale factor. - else if ( pos.non_pawn_material(WHITE) == RookValueMg - && pos.non_pawn_material(BLACK) == RookValueMg - && pos.count(strongSide) - pos.count(~strongSide) <= 1 - && bool(KingSide & pos.pieces(strongSide, PAWN)) != bool(QueenSide & pos.pieces(strongSide, PAWN)) - && (attacks_bb(pos.square(~strongSide)) & pos.pieces(~strongSide, PAWN))) - sf = 36; - // For queen vs no queen endgames use scale factor - // based on number of minors of side that doesn't have queen. - else if (pos.count() == 1) - sf = 37 + 3 * (pos.count(WHITE) == 1 ? pos.count(BLACK) + pos.count(BLACK) - : pos.count(WHITE) + pos.count(WHITE)); - // In every other case use scale factor based on - // the number of pawns of the strong side reduced if pawns are on a single flank. - else - sf = std::min(sf, 36 + 7 * pos.count(strongSide)) - 4 * !pawnsOnBothFlanks; - - // Reduce scale factor in case of pawns being on a single flank - sf -= 4 * !pawnsOnBothFlanks; - } - - // Interpolate between the middlegame and (scaled by 'sf') endgame score - v = mg * int(me->game_phase()) - + eg * int(PHASE_MIDGAME - me->game_phase()) * ScaleFactor(sf) / SCALE_FACTOR_NORMAL; - v /= PHASE_MIDGAME; - - if constexpr (T) - { - Trace::add(WINNABLE, make_score(u, eg * ScaleFactor(sf) / SCALE_FACTOR_NORMAL - eg_value(score))); - Trace::add(TOTAL, make_score(mg, eg * ScaleFactor(sf) / SCALE_FACTOR_NORMAL)); - } - - return Value(v); - } - - - // Evaluation::value() is the main function of the class. It computes the various - // parts of the evaluation and returns the value of the position from the point - // of view of the side to move. - - template - Value Evaluation::value() { +// Evaluate is the evaluator for the outer world. It returns a static evaluation +// of the position from the point of view of the side to move. +Value Eval::evaluate(const Eval::NNUE::Networks& networks, + const Position& pos, + Eval::NNUE::AccumulatorCaches& caches, + int optimism) { assert(!pos.checkers()); - // Probe the material hash table - me = Material::probe(pos); + int simpleEval = simple_eval(pos, pos.side_to_move()); + bool smallNet = std::abs(simpleEval) > SmallNetThreshold; + int nnueComplexity; + int v; - // If we have a specialized evaluation function for the current material - // configuration, call it and return. - if (me->specialized_eval_exists()) - return me->evaluate(pos); + Value nnue = smallNet ? networks.small.evaluate(pos, &caches.small, true, &nnueComplexity) + : networks.big.evaluate(pos, &caches.big, true, &nnueComplexity); - // Initialize score by reading the incrementally updated scores included in - // the position object (material + piece square tables) and the material - // imbalance. Score is computed internally from the white point of view. - Score score = pos.psq_score() + me->imbalance() + pos.this_thread()->trend; + const auto adjustEval = [&](int nnueDiv, int pawnCountConstant, int pawnCountMul, + int npmConstant, int evalDiv, int shufflingConstant) { + // Blend optimism and eval with nnue complexity and material imbalance + optimism += optimism * (nnueComplexity + std::abs(simpleEval - nnue)) / 584; + nnue -= nnue * (nnueComplexity * 5 / 3) / nnueDiv; - // Probe the pawn hash table - pe = Pawns::probe(pos); - score += pe->pawn_score(WHITE) - pe->pawn_score(BLACK); + int npm = pos.non_pawn_material() / 64; + v = (nnue * (npm + pawnCountConstant + pawnCountMul * pos.count()) + + optimism * (npmConstant + npm)) + / evalDiv; - // Early exit if score is high - auto lazy_skip = [&](Value lazyThreshold) { - return abs(mg_value(score) + eg_value(score)) > lazyThreshold - + std::abs(pos.this_thread()->bestValue) * 5 / 4 - + pos.non_pawn_material() / 32; + // Damp down the evaluation linearly when shuffling + int shuffling = pos.rule50_count(); + v = v * (shufflingConstant - shuffling) / 207; }; - if (lazy_skip(LazyThreshold1)) - goto make_v; + if (!smallNet) + adjustEval(32395, 942, 11, 139, 1058, 178); + else + adjustEval(32793, 944, 9, 140, 1067, 206); - // Main evaluation begins here - initialize(); - initialize(); - - // Pieces evaluated first (also populates attackedBy, attackedBy2). - // Note that the order of evaluation of the terms is left unspecified. - score += pieces() - pieces() - + pieces() - pieces() - + pieces() - pieces() - + pieces() - pieces(); - - score += mobility[WHITE] - mobility[BLACK]; - - // More complex interactions that require fully populated attack bitboards - score += king< WHITE>() - king< BLACK>() - + passed< WHITE>() - passed< BLACK>(); - - if (lazy_skip(LazyThreshold2)) - goto make_v; - - score += threats() - threats() - + space< WHITE>() - space< BLACK>(); - -make_v: - // Derive single value from mg and eg parts of score - Value v = winnable(score); - - // In case of tracing add all remaining individual evaluation terms - if constexpr (T) - { - Trace::add(MATERIAL, pos.psq_score()); - Trace::add(IMBALANCE, me->imbalance()); - Trace::add(PAWN, pe->pawn_score(WHITE), pe->pawn_score(BLACK)); - Trace::add(MOBILITY, mobility[WHITE], mobility[BLACK]); - } - - // Evaluation grain - v = (v / 16) * 16; - - // Side to move point of view - v = (pos.side_to_move() == WHITE ? v : -v); + // Guarantee evaluation does not hit the tablebase range + v = std::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1); return v; - } - - - /// Fisher Random Chess: correction for cornered bishops, to fix chess960 play with NNUE - - Value fix_FRC(const Position& pos) { - - constexpr Bitboard Corners = 1ULL << SQ_A1 | 1ULL << SQ_H1 | 1ULL << SQ_A8 | 1ULL << SQ_H8; - - if (!(pos.pieces(BISHOP) & Corners)) - return VALUE_ZERO; - - int correction = 0; - - if ( pos.piece_on(SQ_A1) == W_BISHOP - && pos.piece_on(SQ_B2) == W_PAWN) - correction -= CorneredBishop; - - if ( pos.piece_on(SQ_H1) == W_BISHOP - && pos.piece_on(SQ_G2) == W_PAWN) - correction -= CorneredBishop; - - if ( pos.piece_on(SQ_A8) == B_BISHOP - && pos.piece_on(SQ_B7) == B_PAWN) - correction += CorneredBishop; - - if ( pos.piece_on(SQ_H8) == B_BISHOP - && pos.piece_on(SQ_G7) == B_PAWN) - correction += CorneredBishop; - - return pos.side_to_move() == WHITE ? Value(3 * correction) - : -Value(3 * correction); - } - -} // namespace Eval - - -/// evaluate() is the evaluator for the outer world. It returns a static -/// evaluation of the position from the point of view of the side to move. - -Value Eval::evaluate(const Position& pos) { - - Value v; - bool useClassical = false; - - // Deciding between classical and NNUE eval (~10 Elo): for high PSQ imbalance we use classical, - // but we switch to NNUE during long shuffling or with high material on the board. - if ( !useNNUE - || abs(eg_value(pos.psq_score())) * 5 > (849 + pos.non_pawn_material() / 64) * (5 + pos.rule50_count())) - { - v = Evaluation(pos).value(); // classical - useClassical = abs(v) >= 298; - } - - // If result of a classical evaluation is much lower than threshold fall back to NNUE - if (useNNUE && !useClassical) - { - Value nnue = NNUE::evaluate(pos, true); // NNUE - int scale = 1136 + 20 * pos.non_pawn_material() / 1024; - Color stm = pos.side_to_move(); - Value optimism = pos.this_thread()->optimism[stm]; - Value psq = (stm == WHITE ? 1 : -1) * eg_value(pos.psq_score()); - int complexity = 35 * abs(nnue - psq) / 256; - - optimism = optimism * (44 + complexity) / 32; - v = (nnue + optimism) * scale / 1024 - optimism; - - if (pos.is_chess960()) - v += fix_FRC(pos); - } - - // Damp down the evaluation linearly when shuffling - v = v * (208 - pos.rule50_count()) / 208; - - // Guarantee evaluation does not hit the tablebase range - v = std::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1); - Value zero = Value(0); return zero - v; } -/// trace() is like evaluate(), but instead of returning a value, it returns -/// a string (suitable for outputting to stdout) that contains the detailed -/// descriptions and values of each evaluation term. Useful for debugging. -/// Trace scores are from white's point of view +// Like evaluate(), but instead of returning a value, it returns +// a string (suitable for outputting to stdout) that contains the detailed +// descriptions and values of each evaluation term. Useful for debugging. +// Trace scores are from white's point of view +std::string Eval::trace(Position& pos, const Eval::NNUE::Networks& networks) { -std::string Eval::trace(Position& pos) { + if (pos.checkers()) + return "Final evaluation: none (in check)"; - if (pos.checkers()) - return "Final evaluation: none (in check)"; + auto caches = std::make_unique(networks); - std::stringstream ss; - ss << std::showpoint << std::noshowpos << std::fixed << std::setprecision(2); + std::stringstream ss; + ss << std::showpoint << std::noshowpos << std::fixed << std::setprecision(2); + ss << '\n' << NNUE::trace(pos, networks, *caches) << '\n'; - Value v; + ss << std::showpoint << std::showpos << std::fixed << std::setprecision(2) << std::setw(15); - std::memset(scores, 0, sizeof(scores)); + Value v = networks.big.evaluate(pos, &caches->big, false); + v = pos.side_to_move() == WHITE ? v : -v; + ss << "NNUE evaluation " << 0.01 * UCIEngine::to_cp(v, pos) << " (white side)\n"; - // Reset any global variable used in eval - pos.this_thread()->trend = SCORE_ZERO; - pos.this_thread()->bestValue = VALUE_ZERO; - pos.this_thread()->optimism[WHITE] = VALUE_ZERO; - pos.this_thread()->optimism[BLACK] = VALUE_ZERO; + v = evaluate(networks, pos, *caches, VALUE_ZERO); + v = pos.side_to_move() == WHITE ? v : -v; + ss << "Final evaluation " << 0.01 * UCIEngine::to_cp(v, pos) << " (white side)"; + ss << " [with scaled NNUE, ...]"; + ss << "\n"; - v = Evaluation(pos).value(); - - ss << std::showpoint << std::noshowpos << std::fixed << std::setprecision(2) - << " Contributing terms for the classical eval:\n" - << "+------------+-------------+-------------+-------------+\n" - << "| Term | White | Black | Total |\n" - << "| | MG EG | MG EG | MG EG |\n" - << "+------------+-------------+-------------+-------------+\n" - << "| Material | " << Term(MATERIAL) - << "| Imbalance | " << Term(IMBALANCE) - << "| Pawns | " << Term(PAWN) - << "| Knights | " << Term(KNIGHT) - << "| Bishops | " << Term(BISHOP) - << "| Rooks | " << Term(ROOK) - << "| Queens | " << Term(QUEEN) - << "| Mobility | " << Term(MOBILITY) - << "|King safety | " << Term(KING) - << "| Threats | " << Term(THREAT) - << "| Passed | " << Term(PASSED) - << "| Space | " << Term(SPACE) - << "| Winnable | " << Term(WINNABLE) - << "+------------+-------------+-------------+-------------+\n" - << "| Total | " << Term(TOTAL) - << "+------------+-------------+-------------+-------------+\n"; - - if (Eval::useNNUE) - ss << '\n' << NNUE::trace(pos) << '\n'; - - ss << std::showpoint << std::showpos << std::fixed << std::setprecision(2) << std::setw(15); - - v = pos.side_to_move() == WHITE ? v : -v; - ss << "\nClassical evaluation " << to_cp(v) << " (white side)\n"; - if (Eval::useNNUE) - { - v = NNUE::evaluate(pos, false); - v = pos.side_to_move() == WHITE ? v : -v; - ss << "NNUE evaluation " << to_cp(v) << " (white side)\n"; - } - - v = evaluate(pos); - v = pos.side_to_move() == WHITE ? v : -v; - ss << "Final evaluation " << to_cp(v) << " (white side)"; - if (Eval::useNNUE) - ss << " [with scaled NNUE, hybrid, ...]"; - ss << "\n"; - - return ss.str(); + return ss.str(); } -} // namespace Stockfish +} // namespace Stockfish diff --git a/src/evaluate.h b/src/evaluate.h index 1934c9bd..2d244ff6 100644 --- a/src/evaluate.h +++ b/src/evaluate.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,7 +20,6 @@ #define EVALUATE_H_INCLUDED #include -#include #include "types.h" @@ -30,33 +29,29 @@ class Position; namespace Eval { - std::string trace(Position& pos); - Value evaluate(const Position& pos); +constexpr inline int SmallNetThreshold = 1274; - extern bool useNNUE; - extern std::string currentEvalFileName; +// The default net name MUST follow the format nn-[SHA256 first 12 digits].nnue +// for the build process (profile-build and fishtest) to work. Do not change the +// name of the macro or the location where this macro is defined, as it is used +// in the Makefile/Fishtest. +#define EvalFileDefaultNameBig "nn-ae6a388e4a1a.nnue" +#define EvalFileDefaultNameSmall "nn-baff1ede1f90.nnue" - // The default net name MUST follow the format nn-[SHA256 first 12 digits].nnue - // for the build process (profile-build and fishtest) to work. Do not change the - // name of the macro, as it is used in the Makefile. - #define EvalFileDefaultName "nn-6877cd24400e.nnue" +namespace NNUE { +struct Networks; +struct AccumulatorCaches; +} - namespace NNUE { +std::string trace(Position& pos, const Eval::NNUE::Networks& networks); - std::string trace(Position& pos); - Value evaluate(const Position& pos, bool adjusted = false); +int simple_eval(const Position& pos, Color c); +Value evaluate(const NNUE::Networks& networks, + const Position& pos, + Eval::NNUE::AccumulatorCaches& caches, + int optimism); +} // namespace Eval - void init(); - void verify(); +} // namespace Stockfish - bool load_eval(std::string name, std::istream& stream); - bool save_eval(std::ostream& stream); - bool save_eval(const std::optional& filename); - - } // namespace NNUE - -} // namespace Eval - -} // namespace Stockfish - -#endif // #ifndef EVALUATE_H_INCLUDED +#endif // #ifndef EVALUATE_H_INCLUDED diff --git a/src/incbin/incbin.h b/src/incbin/incbin.h old mode 100755 new mode 100644 index c19684d7..18718b95 --- a/src/incbin/incbin.h +++ b/src/incbin/incbin.h @@ -3,8 +3,8 @@ * @author Dale Weiler * @brief Utility for including binary files * - * Facilities for including binary files into the current translation unit and - * making use from them externally in other translation units. + * Facilities for including binary files into the current translation unit + * and making use of them externally in other translation units. */ #ifndef INCBIN_HDR #define INCBIN_HDR @@ -139,7 +139,7 @@ #endif #if defined(__APPLE__) -/* The directives are different for Apple branded compilers */ +/* The directives are different for Apple-branded compilers */ # define INCBIN_SECTION INCBIN_OUTPUT_SECTION "\n" # define INCBIN_GLOBAL(NAME) ".globl " INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME "\n" # define INCBIN_INT ".long " @@ -261,8 +261,8 @@ INCBIN_STRINGIZE( \ INCBIN_STYLE_IDENT(TYPE)) \ -/* Generate the global labels by indirectly invoking the macro with our style - * type and concatenating the name against them. */ +/* Generate the global labels by indirectly invoking the macro + * with our style type and concatenate the name against them. */ #define INCBIN_GLOBAL_LABELS(NAME, TYPE) \ INCBIN_INVOKE( \ INCBIN_GLOBAL, \ diff --git a/src/main.cpp b/src/main.cpp index fad0ef84..a6a3d1c4 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,35 +19,26 @@ #include #include "bitboard.h" -#include "endgame.h" +#include "misc.h" #include "position.h" -#include "psqt.h" -#include "search.h" -#include "syzygy/tbprobe.h" -#include "thread.h" -#include "tt.h" +#include "types.h" #include "uci.h" +#include "tune.h" using namespace Stockfish; int main(int argc, char* argv[]) { - std::cout << engine_info() << std::endl; + std::cout << engine_info() << std::endl; - CommandLine::init(argc, argv); - UCI::init(Options); - Tune::init(); - PSQT::init(); - Bitboards::init(); - Position::init(); - Bitbases::init(); - Endgames::init(); - Threads.set(size_t(Options["Threads"])); - Search::clear(); // After threads are up - Eval::NNUE::init(); + Bitboards::init(); + Position::init(); - UCI::loop(argc, argv); + UCIEngine uci(argc, argv); - Threads.set(0); - return 0; + Tune::init(uci.engine_options()); + + uci.loop(); + + return 0; } diff --git a/src/material.cpp b/src/material.cpp deleted file mode 100644 index 1567358a..00000000 --- a/src/material.cpp +++ /dev/null @@ -1,229 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include -#include // For std::memset - -#include "material.h" -#include "thread.h" - -using namespace std; - -namespace Stockfish { - -namespace { - #define S(mg, eg) make_score(mg, eg) - - // Polynomial material imbalance parameters - - // One Score parameter for each pair (our piece, another of our pieces) - constexpr Score QuadraticOurs[][PIECE_TYPE_NB] = { - // OUR PIECE 2 - // bishop pair pawn knight bishop rook queen - {S(1419, 1455) }, // Bishop pair - {S( 101, 28), S( 37, 39) }, // Pawn - {S( 57, 64), S(249, 187), S(-49, -62) }, // Knight OUR PIECE 1 - {S( 0, 0), S(118, 137), S( 10, 27), S( 0, 0) }, // Bishop - {S( -63, -68), S( -5, 3), S(100, 81), S(132, 118), S(-246, -244) }, // Rook - {S(-210, -211), S( 37, 14), S(147, 141), S(161, 105), S(-158, -174), S(-9,-31) } // Queen - }; - - // One Score parameter for each pair (our piece, their piece) - constexpr Score QuadraticTheirs[][PIECE_TYPE_NB] = { - // THEIR PIECE - // bishop pair pawn knight bishop rook queen - { }, // Bishop pair - {S( 33, 30) }, // Pawn - {S( 46, 18), S(106, 84) }, // Knight OUR PIECE - {S( 75, 35), S( 59, 44), S( 60, 15) }, // Bishop - {S( 26, 35), S( 6, 22), S( 38, 39), S(-12, -2) }, // Rook - {S( 97, 93), S(100, 163), S(-58, -91), S(112, 192), S(276, 225) } // Queen - }; - - #undef S - - // Endgame evaluation and scaling functions are accessed directly and not through - // the function maps because they correspond to more than one material hash key. - Endgame EvaluateKXK[] = { Endgame(WHITE), Endgame(BLACK) }; - - Endgame ScaleKBPsK[] = { Endgame(WHITE), Endgame(BLACK) }; - Endgame ScaleKQKRPs[] = { Endgame(WHITE), Endgame(BLACK) }; - Endgame ScaleKPsK[] = { Endgame(WHITE), Endgame(BLACK) }; - Endgame ScaleKPKP[] = { Endgame(WHITE), Endgame(BLACK) }; - - // Helper used to detect a given material distribution - bool is_KXK(const Position& pos, Color us) { - return !more_than_one(pos.pieces(~us)) - && pos.non_pawn_material(us) >= RookValueMg; - } - - bool is_KBPsK(const Position& pos, Color us) { - return pos.non_pawn_material(us) == BishopValueMg - && pos.count(us) >= 1; - } - - bool is_KQKRPs(const Position& pos, Color us) { - return !pos.count(us) - && pos.non_pawn_material(us) == QueenValueMg - && pos.count(~us) == 1 - && pos.count(~us) >= 1; - } - - - /// imbalance() calculates the imbalance by comparing the piece count of each - /// piece type for both colors. - - template - Score imbalance(const int pieceCount[][PIECE_TYPE_NB]) { - - constexpr Color Them = ~Us; - - Score bonus = SCORE_ZERO; - - // Second-degree polynomial material imbalance, by Tord Romstad - for (int pt1 = NO_PIECE_TYPE; pt1 <= QUEEN; ++pt1) - { - if (!pieceCount[Us][pt1]) - continue; - - int v = QuadraticOurs[pt1][pt1] * pieceCount[Us][pt1]; - - for (int pt2 = NO_PIECE_TYPE; pt2 < pt1; ++pt2) - v += QuadraticOurs[pt1][pt2] * pieceCount[Us][pt2] - + QuadraticTheirs[pt1][pt2] * pieceCount[Them][pt2]; - - bonus += pieceCount[Us][pt1] * v; - } - - return bonus; - } - -} // namespace - -namespace Material { - - -/// Material::probe() looks up the current position's material configuration in -/// the material hash table. It returns a pointer to the Entry if the position -/// is found. Otherwise a new Entry is computed and stored there, so we don't -/// have to recompute all when the same material configuration occurs again. - -Entry* probe(const Position& pos) { - - Key key = pos.material_key(); - Entry* e = pos.this_thread()->materialTable[key]; - - if (e->key == key) - return e; - - std::memset(e, 0, sizeof(Entry)); - e->key = key; - e->factor[WHITE] = e->factor[BLACK] = (uint8_t)SCALE_FACTOR_NORMAL; - - Value npm_w = pos.non_pawn_material(WHITE); - Value npm_b = pos.non_pawn_material(BLACK); - Value npm = std::clamp(npm_w + npm_b, EndgameLimit, MidgameLimit); - - // Map total non-pawn material into [PHASE_ENDGAME, PHASE_MIDGAME] - e->gamePhase = Phase(((npm - EndgameLimit) * PHASE_MIDGAME) / (MidgameLimit - EndgameLimit)); - - // Let's look if we have a specialized evaluation function for this particular - // material configuration. Firstly we look for a fixed configuration one, then - // for a generic one if the previous search failed. - if ((e->evaluationFunction = Endgames::probe(key)) != nullptr) - return e; - - for (Color c : { WHITE, BLACK }) - if (is_KXK(pos, c)) - { - e->evaluationFunction = &EvaluateKXK[c]; - return e; - } - - // OK, we didn't find any special evaluation function for the current material - // configuration. Is there a suitable specialized scaling function? - const auto* sf = Endgames::probe(key); - - if (sf) - { - e->scalingFunction[sf->strongSide] = sf; // Only strong color assigned - return e; - } - - // We didn't find any specialized scaling function, so fall back on generic - // ones that refer to more than one material distribution. Note that in this - // case we don't return after setting the function. - for (Color c : { WHITE, BLACK }) - { - if (is_KBPsK(pos, c)) - e->scalingFunction[c] = &ScaleKBPsK[c]; - - else if (is_KQKRPs(pos, c)) - e->scalingFunction[c] = &ScaleKQKRPs[c]; - } - - if (npm_w + npm_b == VALUE_ZERO && pos.pieces(PAWN)) // Only pawns on the board - { - if (!pos.count(BLACK)) - { - assert(pos.count(WHITE) >= 2); - - e->scalingFunction[WHITE] = &ScaleKPsK[WHITE]; - } - else if (!pos.count(WHITE)) - { - assert(pos.count(BLACK) >= 2); - - e->scalingFunction[BLACK] = &ScaleKPsK[BLACK]; - } - else if (pos.count(WHITE) == 1 && pos.count(BLACK) == 1) - { - // This is a special case because we set scaling functions - // for both colors instead of only one. - e->scalingFunction[WHITE] = &ScaleKPKP[WHITE]; - e->scalingFunction[BLACK] = &ScaleKPKP[BLACK]; - } - } - - // Zero or just one pawn makes it difficult to win, even with a small material - // advantage. This catches some trivial draws like KK, KBK and KNK and gives a - // drawish scale factor for cases such as KRKBP and KmmKm (except for KBBKN). - if (!pos.count(WHITE) && npm_w - npm_b <= BishopValueMg) - e->factor[WHITE] = uint8_t(npm_w < RookValueMg ? SCALE_FACTOR_DRAW : - npm_b <= BishopValueMg ? 4 : 14); - - if (!pos.count(BLACK) && npm_b - npm_w <= BishopValueMg) - e->factor[BLACK] = uint8_t(npm_b < RookValueMg ? SCALE_FACTOR_DRAW : - npm_w <= BishopValueMg ? 4 : 14); - - // Evaluate the material imbalance. We use PIECE_TYPE_NONE as a place holder - // for the bishop pair "extended piece", which allows us to be more flexible - // in defining bishop pair bonuses. - const int pieceCount[COLOR_NB][PIECE_TYPE_NB] = { - { pos.count(WHITE) > 1, pos.count(WHITE), pos.count(WHITE), - pos.count(WHITE) , pos.count(WHITE), pos.count(WHITE) }, - { pos.count(BLACK) > 1, pos.count(BLACK), pos.count(BLACK), - pos.count(BLACK) , pos.count(BLACK), pos.count(BLACK) } }; - - e->score = (imbalance(pieceCount) - imbalance(pieceCount)) / 16; - return e; -} - -} // namespace Material - -} // namespace Stockfish diff --git a/src/material.h b/src/material.h deleted file mode 100644 index 3ca169ce..00000000 --- a/src/material.h +++ /dev/null @@ -1,71 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#ifndef MATERIAL_H_INCLUDED -#define MATERIAL_H_INCLUDED - -#include "endgame.h" -#include "misc.h" -#include "position.h" -#include "types.h" - -namespace Stockfish::Material { - -/// Material::Entry contains various information about a material configuration. -/// It contains a material imbalance evaluation, a function pointer to a special -/// endgame evaluation function (which in most cases is NULL, meaning that the -/// standard evaluation function will be used), and scale factors. -/// -/// The scale factors are used to scale the evaluation score up or down. For -/// instance, in KRB vs KR endgames, the score is scaled down by a factor of 4, -/// which will result in scores of absolute value less than one pawn. - -struct Entry { - - Score imbalance() const { return score; } - Phase game_phase() const { return (Phase)gamePhase; } - bool specialized_eval_exists() const { return evaluationFunction != nullptr; } - Value evaluate(const Position& pos) const { return (*evaluationFunction)(pos); } - - // scale_factor() takes a position and a color as input and returns a scale factor - // for the given color. We have to provide the position in addition to the color - // because the scale factor may also be a function which should be applied to - // the position. For instance, in KBP vs K endgames, the scaling function looks - // for rook pawns and wrong-colored bishops. - ScaleFactor scale_factor(const Position& pos, Color c) const { - ScaleFactor sf = scalingFunction[c] ? (*scalingFunction[c])(pos) - : SCALE_FACTOR_NONE; - return sf != SCALE_FACTOR_NONE ? sf : ScaleFactor(factor[c]); - } - - Key key; - const EndgameBase* evaluationFunction; - const EndgameBase* scalingFunction[COLOR_NB]; // Could be one for each - // side (e.g. KPKP, KBPsK) - Score score; - int16_t gamePhase; - uint8_t factor[COLOR_NB]; -}; - -typedef HashTable Table; - -Entry* probe(const Position& pos); - -} // namespace Stockfish::Material - -#endif // #ifndef MATERIAL_H_INCLUDED diff --git a/src/misc.cpp b/src/misc.cpp index 41c59b3f..1abb81b1 100644 --- a/src/misc.cpp +++ b/src/misc.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -16,313 +16,403 @@ along with this program. If not, see . */ +#include "misc.h" + #ifdef _WIN32 -#if _WIN32_WINNT < 0x0601 -#undef _WIN32_WINNT -#define _WIN32_WINNT 0x0601 // Force to include needed API prototypes -#endif + #if _WIN32_WINNT < 0x0601 + #undef _WIN32_WINNT + #define _WIN32_WINNT 0x0601 // Force to include needed API prototypes + #endif -#ifndef NOMINMAX -#define NOMINMAX -#endif + #ifndef NOMINMAX + #define NOMINMAX + #endif -#include + #include // The needed Windows API for processor groups could be missed from old Windows // versions, so instead of calling them directly (forcing the linker to resolve // the calls at compile time), try to load them at runtime. To do this we need // first to define the corresponding function pointers. extern "C" { -typedef bool(*fun1_t)(LOGICAL_PROCESSOR_RELATIONSHIP, - PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, PDWORD); -typedef bool(*fun2_t)(USHORT, PGROUP_AFFINITY); -typedef bool(*fun3_t)(HANDLE, CONST GROUP_AFFINITY*, PGROUP_AFFINITY); -typedef bool(*fun4_t)(USHORT, PGROUP_AFFINITY, USHORT, PUSHORT); -typedef WORD(*fun5_t)(); +using fun1_t = bool (*)(LOGICAL_PROCESSOR_RELATIONSHIP, + PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, + PDWORD); +using fun2_t = bool (*)(USHORT, PGROUP_AFFINITY); +using fun3_t = bool (*)(HANDLE, CONST GROUP_AFFINITY*, PGROUP_AFFINITY); +using fun4_t = bool (*)(USHORT, PGROUP_AFFINITY, USHORT, PUSHORT); +using fun5_t = WORD (*)(); +using fun6_t = bool (*)(HANDLE, DWORD, PHANDLE); +using fun7_t = bool (*)(LPCSTR, LPCSTR, PLUID); +using fun8_t = bool (*)(HANDLE, BOOL, PTOKEN_PRIVILEGES, DWORD, PTOKEN_PRIVILEGES, PDWORD); } #endif +#include +#include +#include #include #include #include +#include #include -#include -#include +#include + +#include "types.h" #if defined(__linux__) && !defined(__ANDROID__) -#include -#include + #include #endif -#if defined(__APPLE__) || defined(__ANDROID__) || defined(__OpenBSD__) || (defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC) && !defined(_WIN32)) || defined(__e2k__) -#define POSIXALIGNEDALLOC -#include +#if defined(__APPLE__) || defined(__ANDROID__) || defined(__OpenBSD__) \ + || (defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC) && !defined(_WIN32)) \ + || defined(__e2k__) + #define POSIXALIGNEDALLOC + #include #endif -#include "misc.h" -#include "thread.h" - -using namespace std; - namespace Stockfish { namespace { -/// Version number. If Version is left empty, then compile date in the format -/// DD-MM-YY and show in engine_info. -const string Version = ""; +// Version number or dev. +constexpr std::string_view version = "dev"; -/// Our fancy logging facility. The trick here is to replace cin.rdbuf() and -/// cout.rdbuf() with two Tie objects that tie cin and cout to a file stream. We -/// can toggle the logging of std::cout and std:cin at runtime whilst preserving -/// usual I/O functionality, all without changing a single line of code! -/// Idea from http://groups.google.com/group/comp.lang.c++/msg/1d941c0f26ea0d81 +// Our fancy logging facility. The trick here is to replace cin.rdbuf() and +// cout.rdbuf() with two Tie objects that tie cin and cout to a file stream. We +// can toggle the logging of std::cout and std:cin at runtime whilst preserving +// usual I/O functionality, all without changing a single line of code! +// Idea from http://groups.google.com/group/comp.lang.c++/msg/1d941c0f26ea0d81 -struct Tie: public streambuf { // MSVC requires split streambuf for cin and cout +struct Tie: public std::streambuf { // MSVC requires split streambuf for cin and cout - Tie(streambuf* b, streambuf* l) : buf(b), logBuf(l) {} + Tie(std::streambuf* b, std::streambuf* l) : + buf(b), + logBuf(l) {} - int sync() override { return logBuf->pubsync(), buf->pubsync(); } - int overflow(int c) override { return log(buf->sputc((char)c), "<< "); } - int underflow() override { return buf->sgetc(); } - int uflow() override { return log(buf->sbumpc(), ">> "); } + int sync() override { return logBuf->pubsync(), buf->pubsync(); } + int overflow(int c) override { return log(buf->sputc(char(c)), "<< "); } + int underflow() override { return buf->sgetc(); } + int uflow() override { return log(buf->sbumpc(), ">> "); } - streambuf *buf, *logBuf; + std::streambuf *buf, *logBuf; - int log(int c, const char* prefix) { + int log(int c, const char* prefix) { - static int last = '\n'; // Single log file + static int last = '\n'; // Single log file - if (last == '\n') - logBuf->sputn(prefix, 3); + if (last == '\n') + logBuf->sputn(prefix, 3); - return last = logBuf->sputc((char)c); - } + return last = logBuf->sputc(char(c)); + } }; class Logger { - Logger() : in(cin.rdbuf(), file.rdbuf()), out(cout.rdbuf(), file.rdbuf()) {} - ~Logger() { start(""); } + Logger() : + in(std::cin.rdbuf(), file.rdbuf()), + out(std::cout.rdbuf(), file.rdbuf()) {} + ~Logger() { start(""); } - ofstream file; - Tie in, out; + std::ofstream file; + Tie in, out; -public: - static void start(const std::string& fname) { + public: + static void start(const std::string& fname) { - static Logger l; + static Logger l; - if (l.file.is_open()) - { - cout.rdbuf(l.out.buf); - cin.rdbuf(l.in.buf); - l.file.close(); - } - - if (!fname.empty()) - { - l.file.open(fname, ifstream::out); - - if (!l.file.is_open()) + if (l.file.is_open()) { - cerr << "Unable to open debug log file " << fname << endl; - exit(EXIT_FAILURE); + std::cout.rdbuf(l.out.buf); + std::cin.rdbuf(l.in.buf); + l.file.close(); } - cin.rdbuf(&l.in); - cout.rdbuf(&l.out); + if (!fname.empty()) + { + l.file.open(fname, std::ifstream::out); + + if (!l.file.is_open()) + { + std::cerr << "Unable to open debug log file " << fname << std::endl; + exit(EXIT_FAILURE); + } + + std::cin.rdbuf(&l.in); + std::cout.rdbuf(&l.out); + } } - } }; -} // namespace +} // namespace -/// engine_info() returns the full name of the current Stockfish version. This -/// will be either "Stockfish DD-MM-YY" (where DD-MM-YY is the date when -/// the program was compiled) or "Stockfish ", depending on whether -/// Version is empty. +// Returns the full name of the current Stockfish version. +// For local dev compiles we try to append the commit sha and commit date +// from git if that fails only the local compilation date is set and "nogit" is specified: +// Stockfish dev-YYYYMMDD-SHA +// or +// Stockfish dev-YYYYMMDD-nogit +// +// For releases (non-dev builds) we only include the version number: +// Stockfish version +std::string engine_info(bool to_uci) { + std::stringstream ss; + ss << "Stockfish " << version << std::setfill('0'); -string engine_info(bool to_uci) { + if constexpr (version == "dev") + { + ss << "-"; +#ifdef GIT_DATE + ss << stringify(GIT_DATE); +#else + constexpr std::string_view months("Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec"); + std::string month, day, year; + std::stringstream date(__DATE__); // From compiler, format is "Sep 21 2008" - const string months("Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec"); - string month, day, year; - stringstream ss, date(__DATE__); // From compiler, format is "Sep 21 2008" + date >> month >> day >> year; + ss << year << std::setw(2) << std::setfill('0') << (1 + months.find(month) / 4) + << std::setw(2) << std::setfill('0') << day; +#endif - ss << "Stockfish " << Version << setfill('0'); + ss << "-"; - if (Version.empty()) - { - date >> month >> day >> year; - ss << setw(2) << day << setw(2) << (1 + months.find(month) / 4) << year.substr(2); - } +#ifdef GIT_SHA + ss << stringify(GIT_SHA); +#else + ss << "nogit"; +#endif + } - ss << (to_uci ? "\nid author ": " by ") - << "the Stockfish developers (see AUTHORS file)"; + ss << (to_uci ? "\nid author " : " by ") << "the Stockfish developers (see AUTHORS file)"; - return ss.str(); + return ss.str(); } -/// compiler_info() returns a string trying to describe the compiler we use - +// Returns a string trying to describe the compiler we use std::string compiler_info() { - #define stringify2(x) #x - #define stringify(x) stringify2(x) - #define make_version_string(major, minor, patch) stringify(major) "." stringify(minor) "." stringify(patch) +#define make_version_string(major, minor, patch) \ + stringify(major) "." stringify(minor) "." stringify(patch) -/// Predefined macros hell: -/// -/// __GNUC__ Compiler is gcc, Clang or Intel on Linux -/// __INTEL_COMPILER Compiler is Intel -/// _MSC_VER Compiler is MSVC or Intel on Windows -/// _WIN32 Building on Windows (any) -/// _WIN64 Building on Windows 64 bit + // Predefined macros hell: + // + // __GNUC__ Compiler is GCC, Clang or ICX + // __clang__ Compiler is Clang or ICX + // __INTEL_LLVM_COMPILER Compiler is ICX + // _MSC_VER Compiler is MSVC + // _WIN32 Building on Windows (any) + // _WIN64 Building on Windows 64 bit - std::string compiler = "\nCompiled by "; + std::string compiler = "\nCompiled by : "; - #ifdef __clang__ - compiler += "clang++ "; - compiler += make_version_string(__clang_major__, __clang_minor__, __clang_patchlevel__); - #elif __INTEL_COMPILER - compiler += "Intel compiler "; - compiler += "(version "; - compiler += stringify(__INTEL_COMPILER) " update " stringify(__INTEL_COMPILER_UPDATE); - compiler += ")"; - #elif _MSC_VER - compiler += "MSVC "; - compiler += "(version "; - compiler += stringify(_MSC_FULL_VER) "." stringify(_MSC_BUILD); - compiler += ")"; - #elif defined(__e2k__) && defined(__LCC__) +#if defined(__INTEL_LLVM_COMPILER) + compiler += "ICX "; + compiler += stringify(__INTEL_LLVM_COMPILER); +#elif defined(__clang__) + compiler += "clang++ "; + compiler += make_version_string(__clang_major__, __clang_minor__, __clang_patchlevel__); +#elif _MSC_VER + compiler += "MSVC "; + compiler += "(version "; + compiler += stringify(_MSC_FULL_VER) "." stringify(_MSC_BUILD); + compiler += ")"; +#elif defined(__e2k__) && defined(__LCC__) #define dot_ver2(n) \ - compiler += (char)'.'; \ - compiler += (char)('0' + (n) / 10); \ - compiler += (char)('0' + (n) % 10); + compiler += char('.'); \ + compiler += char('0' + (n) / 10); \ + compiler += char('0' + (n) % 10); - compiler += "MCST LCC "; - compiler += "(version "; - compiler += std::to_string(__LCC__ / 100); - dot_ver2(__LCC__ % 100) - dot_ver2(__LCC_MINOR__) - compiler += ")"; - #elif __GNUC__ - compiler += "g++ (GNUC) "; - compiler += make_version_string(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__); - #else - compiler += "Unknown compiler "; - compiler += "(unknown version)"; - #endif + compiler += "MCST LCC "; + compiler += "(version "; + compiler += std::to_string(__LCC__ / 100); + dot_ver2(__LCC__ % 100) dot_ver2(__LCC_MINOR__) compiler += ")"; +#elif __GNUC__ + compiler += "g++ (GNUC) "; + compiler += make_version_string(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__); +#else + compiler += "Unknown compiler "; + compiler += "(unknown version)"; +#endif - #if defined(__APPLE__) - compiler += " on Apple"; - #elif defined(__CYGWIN__) - compiler += " on Cygwin"; - #elif defined(__MINGW64__) - compiler += " on MinGW64"; - #elif defined(__MINGW32__) - compiler += " on MinGW32"; - #elif defined(__ANDROID__) - compiler += " on Android"; - #elif defined(__linux__) - compiler += " on Linux"; - #elif defined(_WIN64) - compiler += " on Microsoft Windows 64-bit"; - #elif defined(_WIN32) - compiler += " on Microsoft Windows 32-bit"; - #else - compiler += " on unknown system"; - #endif +#if defined(__APPLE__) + compiler += " on Apple"; +#elif defined(__CYGWIN__) + compiler += " on Cygwin"; +#elif defined(__MINGW64__) + compiler += " on MinGW64"; +#elif defined(__MINGW32__) + compiler += " on MinGW32"; +#elif defined(__ANDROID__) + compiler += " on Android"; +#elif defined(__linux__) + compiler += " on Linux"; +#elif defined(_WIN64) + compiler += " on Microsoft Windows 64-bit"; +#elif defined(_WIN32) + compiler += " on Microsoft Windows 32-bit"; +#else + compiler += " on unknown system"; +#endif - compiler += "\nCompilation settings include: "; - compiler += (Is64Bit ? " 64bit" : " 32bit"); - #if defined(USE_VNNI) + compiler += "\nCompilation architecture : "; +#if defined(ARCH) + compiler += stringify(ARCH); +#else + compiler += "(undefined architecture)"; +#endif + + compiler += "\nCompilation settings : "; + compiler += (Is64Bit ? "64bit" : "32bit"); +#if defined(USE_VNNI) compiler += " VNNI"; - #endif - #if defined(USE_AVX512) +#endif +#if defined(USE_AVX512) compiler += " AVX512"; - #endif - compiler += (HasPext ? " BMI2" : ""); - #if defined(USE_AVX2) +#endif + compiler += (HasPext ? " BMI2" : ""); +#if defined(USE_AVX2) compiler += " AVX2"; - #endif - #if defined(USE_SSE41) +#endif +#if defined(USE_SSE41) compiler += " SSE41"; - #endif - #if defined(USE_SSSE3) +#endif +#if defined(USE_SSSE3) compiler += " SSSE3"; - #endif - #if defined(USE_SSE2) +#endif +#if defined(USE_SSE2) compiler += " SSE2"; - #endif - compiler += (HasPopCnt ? " POPCNT" : ""); - #if defined(USE_MMX) - compiler += " MMX"; - #endif - #if defined(USE_NEON) +#endif + compiler += (HasPopCnt ? " POPCNT" : ""); +#if defined(USE_NEON_DOTPROD) + compiler += " NEON_DOTPROD"; +#elif defined(USE_NEON) compiler += " NEON"; - #endif +#endif - #if !defined(NDEBUG) +#if !defined(NDEBUG) compiler += " DEBUG"; - #endif +#endif - compiler += "\n__VERSION__ macro expands to: "; - #ifdef __VERSION__ - compiler += __VERSION__; - #else - compiler += "(undefined macro)"; - #endif - compiler += "\n"; + compiler += "\nCompiler __VERSION__ macro : "; +#ifdef __VERSION__ + compiler += __VERSION__; +#else + compiler += "(undefined macro)"; +#endif - return compiler; + compiler += "\n"; + + return compiler; } -/// Debug functions used mainly to collect run-time statistics -static std::atomic hits[2], means[2]; +// Debug functions used mainly to collect run-time statistics +constexpr int MaxDebugSlots = 32; -void dbg_hit_on(bool b) { ++hits[0]; if (b) ++hits[1]; } -void dbg_hit_on(bool c, bool b) { if (c) dbg_hit_on(b); } -void dbg_mean_of(int v) { ++means[0]; means[1] += v; } +namespace { + +template +struct DebugInfo { + std::atomic data[N] = {0}; + + constexpr inline std::atomic& operator[](int index) { return data[index]; } +}; + +DebugInfo<2> hit[MaxDebugSlots]; +DebugInfo<2> mean[MaxDebugSlots]; +DebugInfo<3> stdev[MaxDebugSlots]; +DebugInfo<6> correl[MaxDebugSlots]; + +} // namespace + +void dbg_hit_on(bool cond, int slot) { + + ++hit[slot][0]; + if (cond) + ++hit[slot][1]; +} + +void dbg_mean_of(int64_t value, int slot) { + + ++mean[slot][0]; + mean[slot][1] += value; +} + +void dbg_stdev_of(int64_t value, int slot) { + + ++stdev[slot][0]; + stdev[slot][1] += value; + stdev[slot][2] += value * value; +} + +void dbg_correl_of(int64_t value1, int64_t value2, int slot) { + + ++correl[slot][0]; + correl[slot][1] += value1; + correl[slot][2] += value1 * value1; + correl[slot][3] += value2; + correl[slot][4] += value2 * value2; + correl[slot][5] += value1 * value2; +} void dbg_print() { - if (hits[0]) - cerr << "Total " << hits[0] << " Hits " << hits[1] - << " hit rate (%) " << 100 * hits[1] / hits[0] << endl; + int64_t n; + auto E = [&n](int64_t x) { return double(x) / n; }; + auto sqr = [](double x) { return x * x; }; - if (means[0]) - cerr << "Total " << means[0] << " Mean " - << (double)means[1] / means[0] << endl; + for (int i = 0; i < MaxDebugSlots; ++i) + if ((n = hit[i][0])) + std::cerr << "Hit #" << i << ": Total " << n << " Hits " << hit[i][1] + << " Hit Rate (%) " << 100.0 * E(hit[i][1]) << std::endl; + + for (int i = 0; i < MaxDebugSlots; ++i) + if ((n = mean[i][0])) + { + std::cerr << "Mean #" << i << ": Total " << n << " Mean " << E(mean[i][1]) << std::endl; + } + + for (int i = 0; i < MaxDebugSlots; ++i) + if ((n = stdev[i][0])) + { + double r = sqrt(E(stdev[i][2]) - sqr(E(stdev[i][1]))); + std::cerr << "Stdev #" << i << ": Total " << n << " Stdev " << r << std::endl; + } + + for (int i = 0; i < MaxDebugSlots; ++i) + if ((n = correl[i][0])) + { + double r = (E(correl[i][5]) - E(correl[i][1]) * E(correl[i][3])) + / (sqrt(E(correl[i][2]) - sqr(E(correl[i][1]))) + * sqrt(E(correl[i][4]) - sqr(E(correl[i][3])))); + std::cerr << "Correl. #" << i << ": Total " << n << " Coefficient " << r << std::endl; + } } -/// Used to serialize access to std::cout to avoid multiple threads writing at -/// the same time. - +// Used to serialize access to std::cout +// to avoid multiple threads writing at the same time. std::ostream& operator<<(std::ostream& os, SyncCout sc) { - static std::mutex m; + static std::mutex m; - if (sc == IO_LOCK) - m.lock(); + if (sc == IO_LOCK) + m.lock(); - if (sc == IO_UNLOCK) - m.unlock(); + if (sc == IO_UNLOCK) + m.unlock(); - return os; + return os; } -/// Trampoline helper to avoid moving Logger to misc.h +// Trampoline helper to avoid moving Logger to misc.h void start_logger(const std::string& fname) { Logger::start(fname); } -/// prefetch() preloads the given address in L1/L2 cache. This is a non-blocking -/// function that doesn't stall the CPU waiting for data to be loaded from memory, -/// which can be quite slow. #ifdef NO_PREFETCH void prefetch(void*) {} @@ -331,160 +421,173 @@ void prefetch(void*) {} void prefetch(void* addr) { -# if defined(__INTEL_COMPILER) - // This hack prevents prefetches from being optimized away by - // Intel compiler. Both MSVC and gcc seem not be affected by this. - __asm__ (""); -# endif - -# if defined(__INTEL_COMPILER) || defined(_MSC_VER) - _mm_prefetch((char*)addr, _MM_HINT_T0); -# else - __builtin_prefetch(addr); -# endif + #if defined(_MSC_VER) + _mm_prefetch((char*) addr, _MM_HINT_T0); + #else + __builtin_prefetch(addr); + #endif } #endif -/// std_aligned_alloc() is our wrapper for systems where the c++17 implementation -/// does not guarantee the availability of aligned_alloc(). Memory allocated with -/// std_aligned_alloc() must be freed with std_aligned_free(). - +// Wrapper for systems where the c++17 implementation +// does not guarantee the availability of aligned_alloc(). Memory allocated with +// std_aligned_alloc() must be freed with std_aligned_free(). void* std_aligned_alloc(size_t alignment, size_t size) { #if defined(POSIXALIGNEDALLOC) - void *mem; - return posix_memalign(&mem, alignment, size) ? nullptr : mem; + void* mem; + return posix_memalign(&mem, alignment, size) ? nullptr : mem; +#elif defined(_WIN32) && !defined(_M_ARM) && !defined(_M_ARM64) + return _mm_malloc(size, alignment); #elif defined(_WIN32) - return _mm_malloc(size, alignment); + return _aligned_malloc(size, alignment); #else - return std::aligned_alloc(alignment, size); + return std::aligned_alloc(alignment, size); #endif } void std_aligned_free(void* ptr) { #if defined(POSIXALIGNEDALLOC) - free(ptr); + free(ptr); +#elif defined(_WIN32) && !defined(_M_ARM) && !defined(_M_ARM64) + _mm_free(ptr); #elif defined(_WIN32) - _mm_free(ptr); + _aligned_free(ptr); #else - free(ptr); + free(ptr); #endif } -/// aligned_large_pages_alloc() will return suitably aligned memory, if possible using large pages. +// aligned_large_pages_alloc() will return suitably aligned memory, if possible using large pages. #if defined(_WIN32) -static void* aligned_large_pages_alloc_windows(size_t allocSize) { +static void* aligned_large_pages_alloc_windows([[maybe_unused]] size_t allocSize) { - #if !defined(_WIN64) - (void)allocSize; // suppress unused-parameter compiler warning + #if !defined(_WIN64) return nullptr; - #else + #else - HANDLE hProcessToken { }; - LUID luid { }; - void* mem = nullptr; + HANDLE hProcessToken{}; + LUID luid{}; + void* mem = nullptr; - const size_t largePageSize = GetLargePageMinimum(); - if (!largePageSize) - return nullptr; + const size_t largePageSize = GetLargePageMinimum(); + if (!largePageSize) + return nullptr; - // We need SeLockMemoryPrivilege, so try to enable it for the process - if (!OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hProcessToken)) - return nullptr; + // Dynamically link OpenProcessToken, LookupPrivilegeValue and AdjustTokenPrivileges - if (LookupPrivilegeValue(NULL, SE_LOCK_MEMORY_NAME, &luid)) - { - TOKEN_PRIVILEGES tp { }; - TOKEN_PRIVILEGES prevTp { }; - DWORD prevTpLen = 0; + HMODULE hAdvapi32 = GetModuleHandle(TEXT("advapi32.dll")); - tp.PrivilegeCount = 1; - tp.Privileges[0].Luid = luid; - tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; + if (!hAdvapi32) + hAdvapi32 = LoadLibrary(TEXT("advapi32.dll")); - // Try to enable SeLockMemoryPrivilege. Note that even if AdjustTokenPrivileges() succeeds, - // we still need to query GetLastError() to ensure that the privileges were actually obtained. - if (AdjustTokenPrivileges( - hProcessToken, FALSE, &tp, sizeof(TOKEN_PRIVILEGES), &prevTp, &prevTpLen) && - GetLastError() == ERROR_SUCCESS) - { - // Round up size to full pages and allocate - allocSize = (allocSize + largePageSize - 1) & ~size_t(largePageSize - 1); - mem = VirtualAlloc( - NULL, allocSize, MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES, PAGE_READWRITE); + auto fun6 = fun6_t((void (*)()) GetProcAddress(hAdvapi32, "OpenProcessToken")); + if (!fun6) + return nullptr; + auto fun7 = fun7_t((void (*)()) GetProcAddress(hAdvapi32, "LookupPrivilegeValueA")); + if (!fun7) + return nullptr; + auto fun8 = fun8_t((void (*)()) GetProcAddress(hAdvapi32, "AdjustTokenPrivileges")); + if (!fun8) + return nullptr; - // Privilege no longer needed, restore previous state - AdjustTokenPrivileges(hProcessToken, FALSE, &prevTp, 0, NULL, NULL); - } - } + // We need SeLockMemoryPrivilege, so try to enable it for the process + if (!fun6( // OpenProcessToken() + GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hProcessToken)) + return nullptr; - CloseHandle(hProcessToken); + if (fun7( // LookupPrivilegeValue(nullptr, SE_LOCK_MEMORY_NAME, &luid) + nullptr, "SeLockMemoryPrivilege", &luid)) + { + TOKEN_PRIVILEGES tp{}; + TOKEN_PRIVILEGES prevTp{}; + DWORD prevTpLen = 0; - return mem; + tp.PrivilegeCount = 1; + tp.Privileges[0].Luid = luid; + tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; - #endif + // Try to enable SeLockMemoryPrivilege. Note that even if AdjustTokenPrivileges() succeeds, + // we still need to query GetLastError() to ensure that the privileges were actually obtained. + if (fun8( // AdjustTokenPrivileges() + hProcessToken, FALSE, &tp, sizeof(TOKEN_PRIVILEGES), &prevTp, &prevTpLen) + && GetLastError() == ERROR_SUCCESS) + { + // Round up size to full pages and allocate + allocSize = (allocSize + largePageSize - 1) & ~size_t(largePageSize - 1); + mem = VirtualAlloc(nullptr, allocSize, MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES, + PAGE_READWRITE); + + // Privilege no longer needed, restore previous state + fun8( // AdjustTokenPrivileges () + hProcessToken, FALSE, &prevTp, 0, nullptr, nullptr); + } + } + + CloseHandle(hProcessToken); + + return mem; + + #endif } void* aligned_large_pages_alloc(size_t allocSize) { - // Try to allocate large pages - void* mem = aligned_large_pages_alloc_windows(allocSize); + // Try to allocate large pages + void* mem = aligned_large_pages_alloc_windows(allocSize); - // Fall back to regular, page aligned, allocation if necessary - if (!mem) - mem = VirtualAlloc(NULL, allocSize, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE); + // Fall back to regular, page-aligned, allocation if necessary + if (!mem) + mem = VirtualAlloc(nullptr, allocSize, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE); - return mem; + return mem; } #else void* aligned_large_pages_alloc(size_t allocSize) { -#if defined(__linux__) - constexpr size_t alignment = 2 * 1024 * 1024; // assumed 2MB page size -#else - constexpr size_t alignment = 4096; // assumed small page size -#endif + #if defined(__linux__) + constexpr size_t alignment = 2 * 1024 * 1024; // assumed 2MB page size + #else + constexpr size_t alignment = 4096; // assumed small page size + #endif - // round up to multiples of alignment - size_t size = ((allocSize + alignment - 1) / alignment) * alignment; - void *mem = std_aligned_alloc(alignment, size); -#if defined(MADV_HUGEPAGE) - madvise(mem, size, MADV_HUGEPAGE); -#endif - return mem; + // Round up to multiples of alignment + size_t size = ((allocSize + alignment - 1) / alignment) * alignment; + void* mem = std_aligned_alloc(alignment, size); + #if defined(MADV_HUGEPAGE) + madvise(mem, size, MADV_HUGEPAGE); + #endif + return mem; } #endif -/// aligned_large_pages_free() will free the previously allocated ttmem +// aligned_large_pages_free() will free the previously allocated ttmem #if defined(_WIN32) void aligned_large_pages_free(void* mem) { - if (mem && !VirtualFree(mem, 0, MEM_RELEASE)) - { - DWORD err = GetLastError(); - std::cerr << "Failed to free large page memory. Error code: 0x" - << std::hex << err - << std::dec << std::endl; - exit(EXIT_FAILURE); - } + if (mem && !VirtualFree(mem, 0, MEM_RELEASE)) + { + DWORD err = GetLastError(); + std::cerr << "Failed to free large page memory. Error code: 0x" << std::hex << err + << std::dec << std::endl; + exit(EXIT_FAILURE); + } } #else -void aligned_large_pages_free(void *mem) { - std_aligned_free(mem); -} +void aligned_large_pages_free(void* mem) { std_aligned_free(mem); } #endif @@ -493,180 +596,177 @@ namespace WinProcGroup { #ifndef _WIN32 -void bindThisThread(size_t) {} +void bind_this_thread(size_t) {} #else -/// best_node() retrieves logical processor information using Windows specific -/// API and returns the best node id for the thread with index idx. Original -/// code from Texel by Peter Österlund. - +namespace { +// Retrieves logical processor information using Windows-specific +// API and returns the best node id for the thread with index idx. Original +// code from Texel by Peter Österlund. int best_node(size_t idx) { - int threads = 0; - int nodes = 0; - int cores = 0; - DWORD returnLength = 0; - DWORD byteOffset = 0; + int threads = 0; + int nodes = 0; + int cores = 0; + DWORD returnLength = 0; + DWORD byteOffset = 0; - // Early exit if the needed API is not available at runtime - HMODULE k32 = GetModuleHandle("Kernel32.dll"); - auto fun1 = (fun1_t)(void(*)())GetProcAddress(k32, "GetLogicalProcessorInformationEx"); - if (!fun1) - return -1; + // Early exit if the needed API is not available at runtime + HMODULE k32 = GetModuleHandle(TEXT("Kernel32.dll")); + auto fun1 = (fun1_t) (void (*)()) GetProcAddress(k32, "GetLogicalProcessorInformationEx"); + if (!fun1) + return -1; - // First call to GetLogicalProcessorInformationEx() to get returnLength. - // We expect the call to fail due to null buffer. - if (fun1(RelationAll, nullptr, &returnLength)) - return -1; + // First call to GetLogicalProcessorInformationEx() to get returnLength. + // We expect the call to fail due to null buffer. + if (fun1(RelationAll, nullptr, &returnLength)) + return -1; - // Once we know returnLength, allocate the buffer - SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *buffer, *ptr; - ptr = buffer = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)malloc(returnLength); + // Once we know returnLength, allocate the buffer + SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *buffer, *ptr; + ptr = buffer = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*) malloc(returnLength); - // Second call to GetLogicalProcessorInformationEx(), now we expect to succeed - if (!fun1(RelationAll, buffer, &returnLength)) - { - free(buffer); - return -1; - } + // Second call to GetLogicalProcessorInformationEx(), now we expect to succeed + if (!fun1(RelationAll, buffer, &returnLength)) + { + free(buffer); + return -1; + } - while (byteOffset < returnLength) - { - if (ptr->Relationship == RelationNumaNode) - nodes++; + while (byteOffset < returnLength) + { + if (ptr->Relationship == RelationNumaNode) + nodes++; - else if (ptr->Relationship == RelationProcessorCore) - { - cores++; - threads += (ptr->Processor.Flags == LTP_PC_SMT) ? 2 : 1; - } + else if (ptr->Relationship == RelationProcessorCore) + { + cores++; + threads += (ptr->Processor.Flags == LTP_PC_SMT) ? 2 : 1; + } - assert(ptr->Size); - byteOffset += ptr->Size; - ptr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)(((char*)ptr) + ptr->Size); - } + assert(ptr->Size); + byteOffset += ptr->Size; + ptr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*) (((char*) ptr) + ptr->Size); + } - free(buffer); + free(buffer); - std::vector groups; + std::vector groups; - // Run as many threads as possible on the same node until core limit is - // reached, then move on filling the next node. - for (int n = 0; n < nodes; n++) - for (int i = 0; i < cores / nodes; i++) - groups.push_back(n); + // Run as many threads as possible on the same node until the core limit is + // reached, then move on to filling the next node. + for (int n = 0; n < nodes; n++) + for (int i = 0; i < cores / nodes; i++) + groups.push_back(n); - // In case a core has more than one logical processor (we assume 2) and we - // have still threads to allocate, then spread them evenly across available - // nodes. - for (int t = 0; t < threads - cores; t++) - groups.push_back(t % nodes); + // In case a core has more than one logical processor (we assume 2) and we + // still have threads to allocate, spread them evenly across available nodes. + for (int t = 0; t < threads - cores; t++) + groups.push_back(t % nodes); - // If we still have more threads than the total number of logical processors - // then return -1 and let the OS to decide what to do. - return idx < groups.size() ? groups[idx] : -1; + // If we still have more threads than the total number of logical processors + // then return -1 and let the OS to decide what to do. + return idx < groups.size() ? groups[idx] : -1; +} } -/// bindThisThread() set the group affinity of the current thread +// Sets the group affinity of the current thread +void bind_this_thread(size_t idx) { -void bindThisThread(size_t idx) { + // Use only local variables to be thread-safe + int node = best_node(idx); - // Use only local variables to be thread-safe - int node = best_node(idx); + if (node == -1) + return; - if (node == -1) - return; + // Early exit if the needed API are not available at runtime + HMODULE k32 = GetModuleHandle(TEXT("Kernel32.dll")); + auto fun2 = fun2_t((void (*)()) GetProcAddress(k32, "GetNumaNodeProcessorMaskEx")); + auto fun3 = fun3_t((void (*)()) GetProcAddress(k32, "SetThreadGroupAffinity")); + auto fun4 = fun4_t((void (*)()) GetProcAddress(k32, "GetNumaNodeProcessorMask2")); + auto fun5 = fun5_t((void (*)()) GetProcAddress(k32, "GetMaximumProcessorGroupCount")); - // Early exit if the needed API are not available at runtime - HMODULE k32 = GetModuleHandle("Kernel32.dll"); - auto fun2 = (fun2_t)(void(*)())GetProcAddress(k32, "GetNumaNodeProcessorMaskEx"); - auto fun3 = (fun3_t)(void(*)())GetProcAddress(k32, "SetThreadGroupAffinity"); - auto fun4 = (fun4_t)(void(*)())GetProcAddress(k32, "GetNumaNodeProcessorMask2"); - auto fun5 = (fun5_t)(void(*)())GetProcAddress(k32, "GetMaximumProcessorGroupCount"); + if (!fun2 || !fun3) + return; - if (!fun2 || !fun3) - return; - - if (!fun4 || !fun5) - { - GROUP_AFFINITY affinity; - if (fun2(node, &affinity)) // GetNumaNodeProcessorMaskEx - fun3(GetCurrentThread(), &affinity, nullptr); // SetThreadGroupAffinity - } - else - { - // If a numa node has more than one processor group, we assume they are - // sized equal and we spread threads evenly across the groups. - USHORT elements, returnedElements; - elements = fun5(); // GetMaximumProcessorGroupCount - GROUP_AFFINITY *affinity = (GROUP_AFFINITY*)malloc(elements * sizeof(GROUP_AFFINITY)); - if (fun4(node, affinity, elements, &returnedElements)) // GetNumaNodeProcessorMask2 - fun3(GetCurrentThread(), &affinity[idx % returnedElements], nullptr); // SetThreadGroupAffinity - free(affinity); - } + if (!fun4 || !fun5) + { + GROUP_AFFINITY affinity; + if (fun2(node, &affinity)) // GetNumaNodeProcessorMaskEx + fun3(GetCurrentThread(), &affinity, nullptr); // SetThreadGroupAffinity + } + else + { + // If a numa node has more than one processor group, we assume they are + // sized equal and we spread threads evenly across the groups. + USHORT elements, returnedElements; + elements = fun5(); // GetMaximumProcessorGroupCount + GROUP_AFFINITY* affinity = (GROUP_AFFINITY*) malloc(elements * sizeof(GROUP_AFFINITY)); + if (fun4(node, affinity, elements, &returnedElements)) // GetNumaNodeProcessorMask2 + fun3(GetCurrentThread(), &affinity[idx % returnedElements], + nullptr); // SetThreadGroupAffinity + free(affinity); + } } #endif -} // namespace WinProcGroup +} // namespace WinProcGroup #ifdef _WIN32 -#include -#define GETCWD _getcwd + #include + #define GETCWD _getcwd #else -#include -#define GETCWD getcwd + #include + #define GETCWD getcwd #endif -namespace CommandLine { -string argv0; // path+name of the executable binary, as given by argv[0] -string binaryDirectory; // path of the executable directory -string workingDirectory; // path of the working directory - -void init(int argc, char* argv[]) { - (void)argc; - string pathSeparator; - - // extract the path+name of the executable binary - argv0 = argv[0]; +std::string CommandLine::get_binary_directory(std::string argv0) { + std::string pathSeparator; #ifdef _WIN32 pathSeparator = "\\"; - #ifdef _MSC_VER + #ifdef _MSC_VER // Under windows argv[0] may not have the extension. Also _get_pgmptr() had - // issues in some windows 10 versions, so check returned values carefully. + // issues in some Windows 10 versions, so check returned values carefully. char* pgmptr = nullptr; if (!_get_pgmptr(&pgmptr) && pgmptr != nullptr && *pgmptr) argv0 = pgmptr; - #endif + #endif #else pathSeparator = "/"; #endif - // extract the working directory - workingDirectory = ""; - char buff[40000]; - char* cwd = GETCWD(buff, 40000); - if (cwd) - workingDirectory = cwd; + // Extract the working directory + auto workingDirectory = CommandLine::get_working_directory(); - // extract the binary directory path from argv0 - binaryDirectory = argv0; - size_t pos = binaryDirectory.find_last_of("\\/"); + // Extract the binary directory path from argv0 + auto binaryDirectory = argv0; + size_t pos = binaryDirectory.find_last_of("\\/"); if (pos == std::string::npos) binaryDirectory = "." + pathSeparator; else binaryDirectory.resize(pos + 1); - // pattern replacement: "./" at the start of path is replaced by the working directory + // Pattern replacement: "./" at the start of path is replaced by the working directory if (binaryDirectory.find("." + pathSeparator) == 0) binaryDirectory.replace(0, 1, workingDirectory); + + return binaryDirectory; +} + +std::string CommandLine::get_working_directory() { + std::string workingDirectory = ""; + char buff[40000]; + char* cwd = GETCWD(buff, 40000); + if (cwd) + workingDirectory = cwd; + + return workingDirectory; } -} // namespace CommandLine - -} // namespace Stockfish +} // namespace Stockfish diff --git a/src/misc.h b/src/misc.h index b9626733..d75b236f 100644 --- a/src/misc.h +++ b/src/misc.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,222 +19,217 @@ #ifndef MISC_H_INCLUDED #define MISC_H_INCLUDED +#include #include #include -#include +#include +#include +#include +#include #include #include -#include -#include "types.h" +#define stringify2(x) #x +#define stringify(x) stringify2(x) namespace Stockfish { std::string engine_info(bool to_uci = false); std::string compiler_info(); + +// Preloads the given address in L1/L2 cache. This is a non-blocking +// function that doesn't stall the CPU waiting for data to be loaded from memory, +// which can be quite slow. void prefetch(void* addr); -void start_logger(const std::string& fname); + +void start_logger(const std::string& fname); void* std_aligned_alloc(size_t alignment, size_t size); -void std_aligned_free(void* ptr); -void* aligned_large_pages_alloc(size_t size); // memory aligned by page size, min alignment: 4096 bytes -void aligned_large_pages_free(void* mem); // nop if mem == nullptr +void std_aligned_free(void* ptr); +// memory aligned by page size, min alignment: 4096 bytes +void* aligned_large_pages_alloc(size_t size); +// nop if mem == nullptr +void aligned_large_pages_free(void* mem); -void dbg_hit_on(bool b); -void dbg_hit_on(bool c, bool b); -void dbg_mean_of(int v); -void dbg_print(); - -typedef std::chrono::milliseconds::rep TimePoint; // A value in milliseconds -static_assert(sizeof(TimePoint) == sizeof(int64_t), "TimePoint should be 64 bits"); -inline TimePoint now() { - return std::chrono::duration_cast - (std::chrono::steady_clock::now().time_since_epoch()).count(); -} - -template -struct HashTable { - Entry* operator[](Key key) { return &table[(uint32_t)key & (Size - 1)]; } - -private: - std::vector table = std::vector(Size); // Allocate on the heap +// Deleter for automating release of memory area +template +struct AlignedDeleter { + void operator()(T* ptr) const { + ptr->~T(); + std_aligned_free(ptr); + } }; +template +struct LargePageDeleter { + void operator()(T* ptr) const { + ptr->~T(); + aligned_large_pages_free(ptr); + } +}; -enum SyncCout { IO_LOCK, IO_UNLOCK }; +template +using AlignedPtr = std::unique_ptr>; + +template +using LargePagePtr = std::unique_ptr>; + + +void dbg_hit_on(bool cond, int slot = 0); +void dbg_mean_of(int64_t value, int slot = 0); +void dbg_stdev_of(int64_t value, int slot = 0); +void dbg_correl_of(int64_t value1, int64_t value2, int slot = 0); +void dbg_print(); + +using TimePoint = std::chrono::milliseconds::rep; // A value in milliseconds +static_assert(sizeof(TimePoint) == sizeof(int64_t), "TimePoint should be 64 bits"); +inline TimePoint now() { + return std::chrono::duration_cast( + std::chrono::steady_clock::now().time_since_epoch()) + .count(); +} + + +enum SyncCout { + IO_LOCK, + IO_UNLOCK +}; std::ostream& operator<<(std::ostream&, SyncCout); #define sync_cout std::cout << IO_LOCK #define sync_endl std::endl << IO_UNLOCK -// align_ptr_up() : get the first aligned element of an array. +// Get the first aligned element of an array. // ptr must point to an array of size at least `sizeof(T) * N + alignment` bytes, // where N is the number of elements in the array. -template -T* align_ptr_up(T* ptr) -{ - static_assert(alignof(T) < Alignment); +template +T* align_ptr_up(T* ptr) { + static_assert(alignof(T) < Alignment); - const uintptr_t ptrint = reinterpret_cast(reinterpret_cast(ptr)); - return reinterpret_cast(reinterpret_cast((ptrint + (Alignment - 1)) / Alignment * Alignment)); + const uintptr_t ptrint = reinterpret_cast(reinterpret_cast(ptr)); + return reinterpret_cast( + reinterpret_cast((ptrint + (Alignment - 1)) / Alignment * Alignment)); } -// IsLittleEndian : true if and only if the binary is compiled on a little endian machine -static inline const union { uint32_t i; char c[4]; } Le = { 0x01020304 }; +// True if and only if the binary is compiled on a little-endian machine +static inline const union { + uint32_t i; + char c[4]; +} Le = {0x01020304}; static inline const bool IsLittleEndian = (Le.c[0] == 4); -// RunningAverage : a class to calculate a running average of a series of values. -// For efficiency, all computations are done with integers. -class RunningAverage { - public: - - // Constructor - RunningAverage() {} - - // Reset the running average to rational value p / q - void set(int64_t p, int64_t q) - { average = p * PERIOD * RESOLUTION / q; } - - // Update average with value v - void update(int64_t v) - { average = RESOLUTION * v + (PERIOD - 1) * average / PERIOD; } - - // Test if average is strictly greater than rational a / b - bool is_greater(int64_t a, int64_t b) - { return b * average > a * PERIOD * RESOLUTION ; } - - int64_t value() - { return average / (PERIOD * RESOLUTION); } - - private : - static constexpr int64_t PERIOD = 4096; - static constexpr int64_t RESOLUTION = 1024; - int64_t average; -}; - -template +template class ValueList { -public: - std::size_t size() const { return size_; } - void resize(std::size_t newSize) { size_ = newSize; } - void push_back(const T& value) { values_[size_++] = value; } - T& operator[](std::size_t index) { return values_[index]; } - T* begin() { return values_; } - T* end() { return values_ + size_; } - const T& operator[](std::size_t index) const { return values_[index]; } - const T* begin() const { return values_; } - const T* end() const { return values_ + size_; } + public: + std::size_t size() const { return size_; } + void push_back(const T& value) { values_[size_++] = value; } + const T* begin() const { return values_; } + const T* end() const { return values_ + size_; } + const T& operator[](int index) const { return values_[index]; } - void swap(ValueList& other) { - const std::size_t maxSize = std::max(size_, other.size_); - for (std::size_t i = 0; i < maxSize; ++i) { - std::swap(values_[i], other.values_[i]); - } - std::swap(size_, other.size_); - } - -private: - T values_[MaxSize]; - std::size_t size_ = 0; + private: + T values_[MaxSize]; + std::size_t size_ = 0; }; -/// sigmoid(t, x0, y0, C, P, Q) implements a sigmoid-like function using only integers, -/// with the following properties: -/// -/// - sigmoid is centered in (x0, y0) -/// - sigmoid has amplitude [-P/Q , P/Q] instead of [-1 , +1] -/// - limit is (y0 - P/Q) when t tends to -infinity -/// - limit is (y0 + P/Q) when t tends to +infinity -/// - the slope can be adjusted using C > 0, smaller C giving a steeper sigmoid -/// - the slope of the sigmoid when t = x0 is P/(Q*C) -/// - sigmoid is increasing with t when P > 0 and Q > 0 -/// - to get a decreasing sigmoid, call with -t, or change sign of P -/// - mean value of the sigmoid is y0 -/// -/// Use to draw the sigmoid - -inline int64_t sigmoid(int64_t t, int64_t x0, - int64_t y0, - int64_t C, - int64_t P, - int64_t Q) -{ - assert(C > 0); - return y0 + P * (t-x0) / (Q * (std::abs(t-x0) + C)) ; -} - - -/// xorshift64star Pseudo-Random Number Generator -/// This class is based on original code written and dedicated -/// to the public domain by Sebastiano Vigna (2014). -/// It has the following characteristics: -/// -/// - Outputs 64-bit numbers -/// - Passes Dieharder and SmallCrush test batteries -/// - Does not require warm-up, no zeroland to escape -/// - Internal state is a single 64-bit integer -/// - Period is 2^64 - 1 -/// - Speed: 1.60 ns/call (Core i7 @3.40GHz) -/// -/// For further analysis see -/// +// xorshift64star Pseudo-Random Number Generator +// This class is based on original code written and dedicated +// to the public domain by Sebastiano Vigna (2014). +// It has the following characteristics: +// +// - Outputs 64-bit numbers +// - Passes Dieharder and SmallCrush test batteries +// - Does not require warm-up, no zeroland to escape +// - Internal state is a single 64-bit integer +// - Period is 2^64 - 1 +// - Speed: 1.60 ns/call (Core i7 @3.40GHz) +// +// For further analysis see +// class PRNG { - uint64_t s; + uint64_t s; - uint64_t rand64() { + uint64_t rand64() { - s ^= s >> 12, s ^= s << 25, s ^= s >> 27; - return s * 2685821657736338717LL; - } + s ^= s >> 12, s ^= s << 25, s ^= s >> 27; + return s * 2685821657736338717LL; + } -public: - PRNG(uint64_t seed) : s(seed) { assert(seed); } + public: + PRNG(uint64_t seed) : + s(seed) { + assert(seed); + } - template T rand() { return T(rand64()); } + template + T rand() { + return T(rand64()); + } - /// Special generator used to fast init magic numbers. - /// Output values only have 1/8th of their bits set on average. - template T sparse_rand() - { return T(rand64() & rand64() & rand64()); } + // Special generator used to fast init magic numbers. + // Output values only have 1/8th of their bits set on average. + template + T sparse_rand() { + return T(rand64() & rand64() & rand64()); + } }; inline uint64_t mul_hi64(uint64_t a, uint64_t b) { #if defined(__GNUC__) && defined(IS_64BIT) - __extension__ typedef unsigned __int128 uint128; - return ((uint128)a * (uint128)b) >> 64; + __extension__ using uint128 = unsigned __int128; + return (uint128(a) * uint128(b)) >> 64; #else - uint64_t aL = (uint32_t)a, aH = a >> 32; - uint64_t bL = (uint32_t)b, bH = b >> 32; + uint64_t aL = uint32_t(a), aH = a >> 32; + uint64_t bL = uint32_t(b), bH = b >> 32; uint64_t c1 = (aL * bL) >> 32; uint64_t c2 = aH * bL + c1; - uint64_t c3 = aL * bH + (uint32_t)c2; + uint64_t c3 = aL * bH + uint32_t(c2); return aH * bH + (c2 >> 32) + (c3 >> 32); #endif } -/// Under Windows it is not possible for a process to run on more than one -/// logical processor group. This usually means to be limited to use max 64 -/// cores. To overcome this, some special platform specific API should be -/// called to set group affinity for each thread. Original code from Texel by -/// Peter Österlund. - +// Under Windows it is not possible for a process to run on more than one +// logical processor group. This usually means being limited to using max 64 +// cores. To overcome this, some special platform-specific API should be +// called to set group affinity for each thread. Original code from Texel by +// Peter Österlund. namespace WinProcGroup { - void bindThisThread(size_t idx); +void bind_this_thread(size_t idx); } -namespace CommandLine { - void init(int argc, char* argv[]); - extern std::string binaryDirectory; // path of the executable directory - extern std::string workingDirectory; // path of the working directory +struct CommandLine { + public: + CommandLine(int _argc, char** _argv) : + argc(_argc), + argv(_argv) {} + + static std::string get_binary_directory(std::string argv0); + static std::string get_working_directory(); + + int argc; + char** argv; +}; + +namespace Utility { + +template +void move_to_front(std::vector& vec, Predicate pred) { + auto it = std::find_if(vec.begin(), vec.end(), pred); + + if (it != vec.end()) + { + std::rotate(vec.begin(), it, it + 1); + } +} } -} // namespace Stockfish +} // namespace Stockfish -#endif // #ifndef MISC_H_INCLUDED +#endif // #ifndef MISC_H_INCLUDED diff --git a/src/movegen.cpp b/src/movegen.cpp index c7a3c29b..e6923067 100644 --- a/src/movegen.cpp +++ b/src/movegen.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -16,82 +16,86 @@ along with this program. If not, see . */ -#include - #include "movegen.h" + +#include +#include + +#include "bitboard.h" #include "position.h" namespace Stockfish { namespace { - template - ExtMove* make_promotions(ExtMove* moveList, Square to) { +template +ExtMove* make_promotions(ExtMove* moveList, [[maybe_unused]] Square to) { - if (Type == CAPTURES || Type == EVASIONS || Type == NON_EVASIONS) - *moveList++ = make(to - D, to, QUEEN); + constexpr bool all = Type == EVASIONS || Type == NON_EVASIONS; - if (Type == QUIETS || Type == EVASIONS || Type == NON_EVASIONS) + if constexpr (Type == CAPTURES || all) + *moveList++ = Move::make(to - D, to, QUEEN); + + if constexpr ((Type == CAPTURES && Enemy) || (Type == QUIETS && !Enemy) || all) { - *moveList++ = make(to - D, to, ROOK); - *moveList++ = make(to - D, to, BISHOP); - *moveList++ = make(to - D, to, KNIGHT); + *moveList++ = Move::make(to - D, to, ROOK); + *moveList++ = Move::make(to - D, to, BISHOP); + *moveList++ = Move::make(to - D, to, KNIGHT); } return moveList; - } +} - template - ExtMove* generate_pawn_moves(const Position& pos, ExtMove* moveList, Bitboard target) { +template +ExtMove* generate_pawn_moves(const Position& pos, ExtMove* moveList, Bitboard target) { constexpr Color Them = ~Us; - constexpr Bitboard TRank7BB = (Us == WHITE ? Rank7BB : Rank2BB); - constexpr Bitboard TRank3BB = (Us == WHITE ? Rank3BB : Rank6BB); + constexpr Bitboard TRank7BB = (Us == WHITE ? Rank7BB : Rank2BB); + constexpr Bitboard TRank3BB = (Us == WHITE ? Rank3BB : Rank6BB); constexpr Direction Up = pawn_push(Us); constexpr Direction UpRight = (Us == WHITE ? NORTH_EAST : SOUTH_WEST); constexpr Direction UpLeft = (Us == WHITE ? NORTH_WEST : SOUTH_EAST); const Bitboard emptySquares = ~pos.pieces(); - const Bitboard enemies = Type == EVASIONS ? pos.checkers() - : pos.pieces(Them); + const Bitboard enemies = Type == EVASIONS ? pos.checkers() : pos.pieces(Them); - Bitboard pawnsOn7 = pos.pieces(Us, PAWN) & TRank7BB; + Bitboard pawnsOn7 = pos.pieces(Us, PAWN) & TRank7BB; Bitboard pawnsNotOn7 = pos.pieces(Us, PAWN) & ~TRank7BB; // Single and double pawn pushes, no promotions - if (Type != CAPTURES) + if constexpr (Type != CAPTURES) { - Bitboard b1 = shift(pawnsNotOn7) & emptySquares; + Bitboard b1 = shift(pawnsNotOn7) & emptySquares; Bitboard b2 = shift(b1 & TRank3BB) & emptySquares; - if (Type == EVASIONS) // Consider only blocking squares + if constexpr (Type == EVASIONS) // Consider only blocking squares { b1 &= target; b2 &= target; } - if (Type == QUIET_CHECKS) + if constexpr (Type == QUIET_CHECKS) { // To make a quiet check, you either make a direct check by pushing a pawn // or push a blocker pawn that is not on the same file as the enemy king. // Discovered check promotion has been already generated amongst the captures. - Square ksq = pos.square(Them); + Square ksq = pos.square(Them); Bitboard dcCandidatePawns = pos.blockers_for_king(Them) & ~file_bb(ksq); - b1 &= pawn_attacks_bb(Them, ksq) | shift< Up>(dcCandidatePawns); - b2 &= pawn_attacks_bb(Them, ksq) | shift(dcCandidatePawns); + b1 &= pawn_attacks_bb(Them, ksq) | shift(dcCandidatePawns); + b2 &= pawn_attacks_bb(Them, ksq) | shift(dcCandidatePawns); } while (b1) { - Square to = pop_lsb(b1); - *moveList++ = make_move(to - Up, to); + Square to = pop_lsb(b1); + *moveList++ = Move(to - Up, to); } while (b2) { - Square to = pop_lsb(b2); - *moveList++ = make_move(to - Up - Up, to); + Square to = pop_lsb(b2); + *moveList++ = Move(to - Up - Up, to); } } @@ -99,38 +103,38 @@ namespace { if (pawnsOn7) { Bitboard b1 = shift(pawnsOn7) & enemies; - Bitboard b2 = shift(pawnsOn7) & enemies; - Bitboard b3 = shift(pawnsOn7) & emptySquares; + Bitboard b2 = shift(pawnsOn7) & enemies; + Bitboard b3 = shift(pawnsOn7) & emptySquares; - if (Type == EVASIONS) + if constexpr (Type == EVASIONS) b3 &= target; while (b1) - moveList = make_promotions(moveList, pop_lsb(b1)); + moveList = make_promotions(moveList, pop_lsb(b1)); while (b2) - moveList = make_promotions(moveList, pop_lsb(b2)); + moveList = make_promotions(moveList, pop_lsb(b2)); while (b3) - moveList = make_promotions(moveList, pop_lsb(b3)); + moveList = make_promotions(moveList, pop_lsb(b3)); } // Standard and en passant captures - if (Type == CAPTURES || Type == EVASIONS || Type == NON_EVASIONS) + if constexpr (Type == CAPTURES || Type == EVASIONS || Type == NON_EVASIONS) { Bitboard b1 = shift(pawnsNotOn7) & enemies; - Bitboard b2 = shift(pawnsNotOn7) & enemies; + Bitboard b2 = shift(pawnsNotOn7) & enemies; while (b1) { - Square to = pop_lsb(b1); - *moveList++ = make_move(to - UpRight, to); + Square to = pop_lsb(b1); + *moveList++ = Move(to - UpRight, to); } while (b2) { - Square to = pop_lsb(b2); - *moveList++ = make_move(to - UpLeft, to); + Square to = pop_lsb(b2); + *moveList++ = Move(to - UpLeft, to); } if (pos.ep_square() != SQ_NONE) @@ -146,16 +150,16 @@ namespace { assert(b1); while (b1) - *moveList++ = make(pop_lsb(b1), pos.ep_square()); + *moveList++ = Move::make(pop_lsb(b1), pos.ep_square()); } } return moveList; - } +} - template - ExtMove* generate_moves(const Position& pos, ExtMove* moveList, Bitboard target) { +template +ExtMove* generate_moves(const Position& pos, ExtMove* moveList, Bitboard target) { static_assert(Pt != KING && Pt != PAWN, "Unsupported piece type in generate_moves()"); @@ -163,43 +167,43 @@ namespace { while (bb) { - Square from = pop_lsb(bb); - Bitboard b = attacks_bb(from, pos.pieces()) & target; + Square from = pop_lsb(bb); + Bitboard b = attacks_bb(from, pos.pieces()) & target; // To check, you either move freely a blocker or make a direct check. if (Checks && (Pt == QUEEN || !(pos.blockers_for_king(~Us) & from))) b &= pos.check_squares(Pt); while (b) - *moveList++ = make_move(from, pop_lsb(b)); + *moveList++ = Move(from, pop_lsb(b)); } return moveList; - } +} - template - ExtMove* generate_all(const Position& pos, ExtMove* moveList) { +template +ExtMove* generate_all(const Position& pos, ExtMove* moveList) { static_assert(Type != LEGAL, "Unsupported type in generate_all()"); - constexpr bool Checks = Type == QUIET_CHECKS; // Reduce template instantiations - const Square ksq = pos.square(Us); - Bitboard target; + constexpr bool Checks = Type == QUIET_CHECKS; // Reduce template instantiations + const Square ksq = pos.square(Us); + Bitboard target; // Skip generating non-king moves when in double check if (Type != EVASIONS || !more_than_one(pos.checkers())) { - target = Type == EVASIONS ? between_bb(ksq, lsb(pos.checkers())) - : Type == NON_EVASIONS ? ~pos.pieces( Us) - : Type == CAPTURES ? pos.pieces(~Us) - : ~pos.pieces( ); // QUIETS || QUIET_CHECKS + target = Type == EVASIONS ? between_bb(ksq, lsb(pos.checkers())) + : Type == NON_EVASIONS ? ~pos.pieces(Us) + : Type == CAPTURES ? pos.pieces(~Us) + : ~pos.pieces(); // QUIETS || QUIET_CHECKS moveList = generate_pawn_moves(pos, moveList, target); moveList = generate_moves(pos, moveList, target); moveList = generate_moves(pos, moveList, target); - moveList = generate_moves(pos, moveList, target); - moveList = generate_moves(pos, moveList, target); + moveList = generate_moves(pos, moveList, target); + moveList = generate_moves(pos, moveList, target); } if (!Checks || pos.blockers_for_king(~Us) & ksq) @@ -209,38 +213,38 @@ namespace { b &= ~attacks_bb(pos.square(~Us)); while (b) - *moveList++ = make_move(ksq, pop_lsb(b)); + *moveList++ = Move(ksq, pop_lsb(b)); if ((Type == QUIETS || Type == NON_EVASIONS) && pos.can_castle(Us & ANY_CASTLING)) - for (CastlingRights cr : { Us & KING_SIDE, Us & QUEEN_SIDE } ) + for (CastlingRights cr : {Us & KING_SIDE, Us & QUEEN_SIDE}) if (!pos.castling_impeded(cr) && pos.can_castle(cr)) - *moveList++ = make(ksq, pos.castling_rook_square(cr)); + *moveList++ = Move::make(ksq, pos.castling_rook_square(cr)); } return moveList; - } +} -} // namespace +} // namespace -/// Generates all pseudo-legal captures plus queen promotions -/// Generates all pseudo-legal non-captures and underpromotions -/// Generates all pseudo-legal check evasions when the side to move is in check -/// Generates all pseudo-legal non-captures giving check, except castling and promotions -/// Generates all pseudo-legal captures and non-captures -/// -/// Returns a pointer to the end of the move list. - +// Generates all pseudo-legal captures plus queen promotions +// Generates all pseudo-legal non-captures and underpromotions +// Generates all pseudo-legal check evasions +// Generates all pseudo-legal captures and non-captures +// Generates all pseudo-legal non-captures giving check, +// except castling and promotions +// +// Returns a pointer to the end of the move list. template ExtMove* generate(const Position& pos, ExtMove* moveList) { - static_assert(Type != LEGAL, "Unsupported type in generate()"); - assert((Type == EVASIONS) == (bool)pos.checkers()); + static_assert(Type != LEGAL, "Unsupported type in generate()"); + assert((Type == EVASIONS) == bool(pos.checkers())); - Color us = pos.side_to_move(); + Color us = pos.side_to_move(); - return us == WHITE ? generate_all(pos, moveList) - : generate_all(pos, moveList); + return us == WHITE ? generate_all(pos, moveList) + : generate_all(pos, moveList); } // Explicit template instantiations @@ -251,26 +255,26 @@ template ExtMove* generate(const Position&, ExtMove*); template ExtMove* generate(const Position&, ExtMove*); -/// generate generates all the legal moves in the given position +// generate generates all the legal moves in the given position template<> ExtMove* generate(const Position& pos, ExtMove* moveList) { - Color us = pos.side_to_move(); - Bitboard pinned = pos.blockers_for_king(us) & pos.pieces(us); - Square ksq = pos.square(us); - ExtMove* cur = moveList; + Color us = pos.side_to_move(); + Bitboard pinned = pos.blockers_for_king(us) & pos.pieces(us); + Square ksq = pos.square(us); + ExtMove* cur = moveList; - moveList = pos.checkers() ? generate(pos, moveList) - : generate(pos, moveList); - while (cur != moveList) - if ( ((pinned && pinned & from_sq(*cur)) || from_sq(*cur) == ksq || type_of(*cur) == EN_PASSANT) - && !pos.legal(*cur)) - *cur = (--moveList)->move; - else - ++cur; + moveList = + pos.checkers() ? generate(pos, moveList) : generate(pos, moveList); + while (cur != moveList) + if (((pinned & cur->from_sq()) || cur->from_sq() == ksq || cur->type_of() == EN_PASSANT) + && !pos.legal(*cur)) + *cur = *(--moveList); + else + ++cur; - return moveList; + return moveList; } -} // namespace Stockfish +} // namespace Stockfish diff --git a/src/movegen.h b/src/movegen.h index bbb35b39..5f650d2e 100644 --- a/src/movegen.h +++ b/src/movegen.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,8 @@ #ifndef MOVEGEN_H_INCLUDED #define MOVEGEN_H_INCLUDED -#include +#include // IWYU pragma: keep +#include #include "types.h" @@ -28,50 +29,46 @@ namespace Stockfish { class Position; enum GenType { - CAPTURES, - QUIETS, - QUIET_CHECKS, - EVASIONS, - NON_EVASIONS, - LEGAL + CAPTURES, + QUIETS, + QUIET_CHECKS, + EVASIONS, + NON_EVASIONS, + LEGAL }; -struct ExtMove { - Move move; - int value; +struct ExtMove: public Move { + int value; - operator Move() const { return move; } - void operator=(Move m) { move = m; } + void operator=(Move m) { data = m.raw(); } - // Inhibit unwanted implicit conversions to Move - // with an ambiguity that yields to a compile error. - operator float() const = delete; + // Inhibit unwanted implicit conversions to Move + // with an ambiguity that yields to a compile error. + operator float() const = delete; }; -inline bool operator<(const ExtMove& f, const ExtMove& s) { - return f.value < s.value; -} +inline bool operator<(const ExtMove& f, const ExtMove& s) { return f.value < s.value; } template ExtMove* generate(const Position& pos, ExtMove* moveList); -/// The MoveList struct is a simple wrapper around generate(). It sometimes comes -/// in handy to use this class instead of the low level generate() function. +// The MoveList struct wraps the generate() function and returns a convenient +// list of moves. Using MoveList is sometimes preferable to directly calling +// the lower level generate() function. template struct MoveList { - explicit MoveList(const Position& pos) : last(generate(pos, moveList)) {} - const ExtMove* begin() const { return moveList; } - const ExtMove* end() const { return last; } - size_t size() const { return last - moveList; } - bool contains(Move move) const { - return std::find(begin(), end(), move) != end(); - } + explicit MoveList(const Position& pos) : + last(generate(pos, moveList)) {} + const ExtMove* begin() const { return moveList; } + const ExtMove* end() const { return last; } + size_t size() const { return last - moveList; } + bool contains(Move move) const { return std::find(begin(), end(), move) != end(); } -private: - ExtMove moveList[MAX_MOVES], *last; + private: + ExtMove moveList[MAX_MOVES], *last; }; -} // namespace Stockfish +} // namespace Stockfish -#endif // #ifndef MOVEGEN_H_INCLUDED +#endif // #ifndef MOVEGEN_H_INCLUDED diff --git a/src/movepick.cpp b/src/movepick.cpp index 694b9222..4a93662d 100644 --- a/src/movepick.cpp +++ b/src/movepick.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -16,258 +16,371 @@ along with this program. If not, see . */ -#include - #include "movepick.h" +#include +#include +#include +#include + +#include "bitboard.h" +#include "position.h" + namespace Stockfish { namespace { - enum Stages { - MAIN_TT, CAPTURE_INIT, GOOD_CAPTURE, REFUTATION, QUIET_INIT, QUIET, BAD_CAPTURE, - EVASION_TT, EVASION_INIT, EVASION, - PROBCUT_TT, PROBCUT_INIT, PROBCUT, - QSEARCH_TT, QCAPTURE_INIT, QCAPTURE, QCHECK_INIT, QCHECK - }; +enum Stages { + // generate main search moves + MAIN_TT, + CAPTURE_INIT, + GOOD_CAPTURE, + REFUTATION, + QUIET_INIT, + GOOD_QUIET, + BAD_CAPTURE, + BAD_QUIET, - // partial_insertion_sort() sorts moves in descending order up to and including - // a given limit. The order of moves smaller than the limit is left unspecified. - void partial_insertion_sort(ExtMove* begin, ExtMove* end, int limit) { + // generate evasion moves + EVASION_TT, + EVASION_INIT, + EVASION, + + // generate probcut moves + PROBCUT_TT, + PROBCUT_INIT, + PROBCUT, + + // generate qsearch moves + QSEARCH_TT, + QCAPTURE_INIT, + QCAPTURE, + QCHECK_INIT, + QCHECK +}; + +// Sort moves in descending order up to and including +// a given limit. The order of moves smaller than the limit is left unspecified. +void partial_insertion_sort(ExtMove* begin, ExtMove* end, int limit) { for (ExtMove *sortedEnd = begin, *p = begin + 1; p < end; ++p) if (p->value >= limit) { ExtMove tmp = *p, *q; - *p = *++sortedEnd; + *p = *++sortedEnd; for (q = sortedEnd; q != begin && *(q - 1) < tmp; --q) *q = *(q - 1); *q = tmp; } - } - -} // namespace - - -/// Constructors of the MovePicker class. As arguments we pass information -/// to help it to return the (presumably) good moves first, to decide which -/// moves to return (in the quiescence search, for instance, we only want to -/// search captures, promotions, and some checks) and how important good move -/// ordering is at the current node. - -/// MovePicker constructor for the main search -MovePicker::MovePicker(const Position& p, Move ttm, Depth d, const ButterflyHistory* mh, - const CapturePieceToHistory* cph, - const PieceToHistory** ch, - Move cm, - const Move* killers) - : pos(p), mainHistory(mh), captureHistory(cph), continuationHistory(ch), - ttMove(ttm), refutations{{killers[0], 0}, {killers[1], 0}, {cm, 0}}, depth(d) -{ - assert(d > 0); - - stage = (pos.checkers() ? EVASION_TT : MAIN_TT) + - !(ttm && pos.pseudo_legal(ttm)); } -/// MovePicker constructor for quiescence search -MovePicker::MovePicker(const Position& p, Move ttm, Depth d, const ButterflyHistory* mh, - const CapturePieceToHistory* cph, - const PieceToHistory** ch, - Square rs) - : pos(p), mainHistory(mh), captureHistory(cph), continuationHistory(ch), ttMove(ttm), recaptureSquare(rs), depth(d) -{ - assert(d <= 0); +} // namespace - stage = (pos.checkers() ? EVASION_TT : QSEARCH_TT) + - !( ttm - && (pos.checkers() || depth > DEPTH_QS_RECAPTURES || to_sq(ttm) == recaptureSquare) - && pos.pseudo_legal(ttm)); + +// Constructors of the MovePicker class. As arguments, we pass information +// to help it return the (presumably) good moves first, to decide which +// moves to return (in the quiescence search, for instance, we only want to +// search captures, promotions, and some checks) and how important a good +// move ordering is at the current node. + +// MovePicker constructor for the main search +MovePicker::MovePicker(const Position& p, + Move ttm, + Depth d, + const ButterflyHistory* mh, + const CapturePieceToHistory* cph, + const PieceToHistory** ch, + const PawnHistory* ph, + Move cm, + const Move* killers) : + pos(p), + mainHistory(mh), + captureHistory(cph), + continuationHistory(ch), + pawnHistory(ph), + ttMove(ttm), + refutations{{killers[0], 0}, {killers[1], 0}, {cm, 0}}, + depth(d) { + assert(d > 0); + + stage = (pos.checkers() ? EVASION_TT : MAIN_TT) + !(ttm && pos.pseudo_legal(ttm)); } -/// MovePicker constructor for ProbCut: we generate captures with SEE greater -/// than or equal to the given threshold. -MovePicker::MovePicker(const Position& p, Move ttm, Value th, const CapturePieceToHistory* cph) - : pos(p), captureHistory(cph), ttMove(ttm), threshold(th) -{ - assert(!pos.checkers()); +// Constructor for quiescence search +MovePicker::MovePicker(const Position& p, + Move ttm, + Depth d, + const ButterflyHistory* mh, + const CapturePieceToHistory* cph, + const PieceToHistory** ch, + const PawnHistory* ph) : + pos(p), + mainHistory(mh), + captureHistory(cph), + continuationHistory(ch), + pawnHistory(ph), + ttMove(ttm), + depth(d) { + assert(d <= 0); - stage = PROBCUT_TT + !(ttm && pos.capture(ttm) - && pos.pseudo_legal(ttm) - && pos.see_ge(ttm, threshold)); + stage = (pos.checkers() ? EVASION_TT : QSEARCH_TT) + !(ttm && pos.pseudo_legal(ttm)); } -/// MovePicker::score() assigns a numerical value to each move in a list, used -/// for sorting. Captures are ordered by Most Valuable Victim (MVV), preferring -/// captures with a good history. Quiets moves are ordered using the histories. +// Constructor for ProbCut: we generate captures with SEE greater +// than or equal to the given threshold. +MovePicker::MovePicker(const Position& p, Move ttm, int th, const CapturePieceToHistory* cph) : + pos(p), + captureHistory(cph), + ttMove(ttm), + threshold(th) { + assert(!pos.checkers()); + + stage = PROBCUT_TT + + !(ttm && pos.capture_stage(ttm) && pos.pseudo_legal(ttm) && pos.see_ge(ttm, threshold)); +} + +// Assigns a numerical value to each move in a list, used +// for sorting. Captures are ordered by Most Valuable Victim (MVV), preferring +// captures with a good history. Quiets moves are ordered using the history tables. template void MovePicker::score() { - static_assert(Type == CAPTURES || Type == QUIETS || Type == EVASIONS, "Wrong type"); + static_assert(Type == CAPTURES || Type == QUIETS || Type == EVASIONS, "Wrong type"); - for (auto& m : *this) - if constexpr (Type == CAPTURES) - m.value = int(PieceValue[MG][pos.piece_on(to_sq(m))]) * 6 - + (*captureHistory)[pos.moved_piece(m)][to_sq(m)][type_of(pos.piece_on(to_sq(m)))]; + [[maybe_unused]] Bitboard threatenedByPawn, threatenedByMinor, threatenedByRook, + threatenedPieces; + if constexpr (Type == QUIETS) + { + Color us = pos.side_to_move(); - else if constexpr (Type == QUIETS) - m.value = (*mainHistory)[pos.side_to_move()][from_to(m)] - + 2 * (*continuationHistory[0])[pos.moved_piece(m)][to_sq(m)] - + (*continuationHistory[1])[pos.moved_piece(m)][to_sq(m)] - + (*continuationHistory[3])[pos.moved_piece(m)][to_sq(m)] - + (*continuationHistory[5])[pos.moved_piece(m)][to_sq(m)]; + threatenedByPawn = pos.attacks_by(~us); + threatenedByMinor = + pos.attacks_by(~us) | pos.attacks_by(~us) | threatenedByPawn; + threatenedByRook = pos.attacks_by(~us) | threatenedByMinor; - else // Type == EVASIONS - { - if (pos.capture(m)) - m.value = PieceValue[MG][pos.piece_on(to_sq(m))] - - Value(type_of(pos.moved_piece(m))); - else - m.value = (*mainHistory)[pos.side_to_move()][from_to(m)] - + 2 * (*continuationHistory[0])[pos.moved_piece(m)][to_sq(m)] - - (1 << 28); - } + // Pieces threatened by pieces of lesser material value + threatenedPieces = (pos.pieces(us, QUEEN) & threatenedByRook) + | (pos.pieces(us, ROOK) & threatenedByMinor) + | (pos.pieces(us, KNIGHT, BISHOP) & threatenedByPawn); + } + + for (auto& m : *this) + if constexpr (Type == CAPTURES) + m.value = + 7 * int(PieceValue[pos.piece_on(m.to_sq())]) + + (*captureHistory)[pos.moved_piece(m)][m.to_sq()][type_of(pos.piece_on(m.to_sq()))]; + + else if constexpr (Type == QUIETS) + { + Piece pc = pos.moved_piece(m); + PieceType pt = type_of(pc); + Square from = m.from_sq(); + Square to = m.to_sq(); + + // histories + m.value = 2 * (*mainHistory)[pos.side_to_move()][m.from_to()]; + m.value += 2 * (*pawnHistory)[pawn_structure_index(pos)][pc][to]; + m.value += 2 * (*continuationHistory[0])[pc][to]; + m.value += (*continuationHistory[1])[pc][to]; + m.value += (*continuationHistory[2])[pc][to] / 4; + m.value += (*continuationHistory[3])[pc][to]; + m.value += (*continuationHistory[5])[pc][to]; + + // bonus for checks + m.value += bool(pos.check_squares(pt) & to) * 16384; + + // bonus for escaping from capture + m.value += threatenedPieces & from ? (pt == QUEEN && !(to & threatenedByRook) ? 51700 + : pt == ROOK && !(to & threatenedByMinor) ? 25600 + : !(to & threatenedByPawn) ? 14450 + : 0) + : 0; + + // malus for putting piece en prise + m.value -= !(threatenedPieces & from) + ? (pt == QUEEN ? bool(to & threatenedByRook) * 48150 + + bool(to & threatenedByMinor) * 10650 + : pt == ROOK ? bool(to & threatenedByMinor) * 24335 + : pt != PAWN ? bool(to & threatenedByPawn) * 14950 + : 0) + : 0; + } + + else // Type == EVASIONS + { + if (pos.capture_stage(m)) + m.value = + PieceValue[pos.piece_on(m.to_sq())] - type_of(pos.moved_piece(m)) + (1 << 28); + else + m.value = (*mainHistory)[pos.side_to_move()][m.from_to()] + + (*continuationHistory[0])[pos.moved_piece(m)][m.to_sq()] + + (*pawnHistory)[pawn_structure_index(pos)][pos.moved_piece(m)][m.to_sq()]; + } } -/// MovePicker::select() returns the next move satisfying a predicate function. -/// It never returns the TT move. +// Returns the next move satisfying a predicate function. +// It never returns the TT move. template Move MovePicker::select(Pred filter) { - while (cur < endMoves) - { - if (T == Best) - std::swap(*cur, *std::max_element(cur, endMoves)); + while (cur < endMoves) + { + if constexpr (T == Best) + std::swap(*cur, *std::max_element(cur, endMoves)); - if (*cur != ttMove && filter()) - return *cur++; + if (*cur != ttMove && filter()) + return *cur++; - cur++; - } - return MOVE_NONE; + cur++; + } + return Move::none(); } -/// MovePicker::next_move() is the most important method of the MovePicker class. It -/// returns a new pseudo-legal move every time it is called until there are no more -/// moves left, picking the move with the highest score from a list of generated moves. +// Most important method of the MovePicker class. It +// returns a new pseudo-legal move every time it is called until there are no more +// moves left, picking the move with the highest score from a list of generated moves. Move MovePicker::next_move(bool skipQuiets) { + auto quiet_threshold = [](Depth d) { return -3560 * d; }; + top: - switch (stage) { + switch (stage) + { - case MAIN_TT: - case EVASION_TT: - case QSEARCH_TT: - case PROBCUT_TT: - ++stage; - return ttMove; + case MAIN_TT : + case EVASION_TT : + case QSEARCH_TT : + case PROBCUT_TT : + ++stage; + return ttMove; - case CAPTURE_INIT: - case PROBCUT_INIT: - case QCAPTURE_INIT: - cur = endBadCaptures = moves; - endMoves = generate(pos, cur); + case CAPTURE_INIT : + case PROBCUT_INIT : + case QCAPTURE_INIT : + cur = endBadCaptures = moves; + endMoves = generate(pos, cur); - score(); - ++stage; - goto top; + score(); + partial_insertion_sort(cur, endMoves, std::numeric_limits::min()); + ++stage; + goto top; - case GOOD_CAPTURE: - if (select([&](){ - return pos.see_ge(*cur, Value(-69 * cur->value / 1024)) ? - // Move losing capture to endBadCaptures to be tried later - true : (*endBadCaptures++ = *cur, false); })) - return *(cur - 1); + case GOOD_CAPTURE : + if (select([&]() { + // Move losing capture to endBadCaptures to be tried later + return pos.see_ge(*cur, -cur->value / 18) ? true + : (*endBadCaptures++ = *cur, false); + })) + return *(cur - 1); - // Prepare the pointers to loop over the refutations array - cur = std::begin(refutations); - endMoves = std::end(refutations); + // Prepare the pointers to loop over the refutations array + cur = std::begin(refutations); + endMoves = std::end(refutations); - // If the countermove is the same as a killer, skip it - if ( refutations[0].move == refutations[2].move - || refutations[1].move == refutations[2].move) - --endMoves; + // If the countermove is the same as a killer, skip it + if (refutations[0] == refutations[2] || refutations[1] == refutations[2]) + --endMoves; - ++stage; - [[fallthrough]]; + ++stage; + [[fallthrough]]; - case REFUTATION: - if (select([&](){ return *cur != MOVE_NONE - && !pos.capture(*cur) - && pos.pseudo_legal(*cur); })) - return *(cur - 1); - ++stage; - [[fallthrough]]; + case REFUTATION : + if (select([&]() { + return *cur != Move::none() && !pos.capture_stage(*cur) && pos.pseudo_legal(*cur); + })) + return *(cur - 1); + ++stage; + [[fallthrough]]; - case QUIET_INIT: - if (!skipQuiets) - { - cur = endBadCaptures; - endMoves = generate(pos, cur); + case QUIET_INIT : + if (!skipQuiets) + { + cur = endBadCaptures; + endMoves = beginBadQuiets = endBadQuiets = generate(pos, cur); - score(); - partial_insertion_sort(cur, endMoves, -3000 * depth); - } + score(); + partial_insertion_sort(cur, endMoves, quiet_threshold(depth)); + } - ++stage; - [[fallthrough]]; + ++stage; + [[fallthrough]]; - case QUIET: - if ( !skipQuiets - && select([&](){return *cur != refutations[0].move - && *cur != refutations[1].move - && *cur != refutations[2].move;})) - return *(cur - 1); + case GOOD_QUIET : + if (!skipQuiets && select([&]() { + return *cur != refutations[0] && *cur != refutations[1] && *cur != refutations[2]; + })) + { + if ((cur - 1)->value > -7998 || (cur - 1)->value <= quiet_threshold(depth)) + return *(cur - 1); - // Prepare the pointers to loop over the bad captures - cur = moves; - endMoves = endBadCaptures; + // Remaining quiets are bad + beginBadQuiets = cur - 1; + } - ++stage; - [[fallthrough]]; + // Prepare the pointers to loop over the bad captures + cur = moves; + endMoves = endBadCaptures; - case BAD_CAPTURE: - return select([](){ return true; }); + ++stage; + [[fallthrough]]; - case EVASION_INIT: - cur = moves; - endMoves = generate(pos, cur); + case BAD_CAPTURE : + if (select([]() { return true; })) + return *(cur - 1); - score(); - ++stage; - [[fallthrough]]; + // Prepare the pointers to loop over the bad quiets + cur = beginBadQuiets; + endMoves = endBadQuiets; - case EVASION: - return select([](){ return true; }); + ++stage; + [[fallthrough]]; - case PROBCUT: - return select([&](){ return pos.see_ge(*cur, threshold); }); + case BAD_QUIET : + if (!skipQuiets) + return select([&]() { + return *cur != refutations[0] && *cur != refutations[1] && *cur != refutations[2]; + }); - case QCAPTURE: - if (select([&](){ return depth > DEPTH_QS_RECAPTURES - || to_sq(*cur) == recaptureSquare; })) - return *(cur - 1); + return Move::none(); - // If we did not find any move and we do not try checks, we have finished - if (depth != DEPTH_QS_CHECKS) - return MOVE_NONE; + case EVASION_INIT : + cur = moves; + endMoves = generate(pos, cur); - ++stage; - [[fallthrough]]; + score(); + ++stage; + [[fallthrough]]; - case QCHECK_INIT: - cur = moves; - endMoves = generate(pos, cur); + case EVASION : + return select([]() { return true; }); - ++stage; - [[fallthrough]]; + case PROBCUT : + return select([&]() { return pos.see_ge(*cur, threshold); }); - case QCHECK: - return select([](){ return true; }); - } + case QCAPTURE : + if (select([]() { return true; })) + return *(cur - 1); - assert(false); - return MOVE_NONE; // Silence warning + // If we did not find any move and we do not try checks, we have finished + if (depth != DEPTH_QS_CHECKS) + return Move::none(); + + ++stage; + [[fallthrough]]; + + case QCHECK_INIT : + cur = moves; + endMoves = generate(pos, cur); + + ++stage; + [[fallthrough]]; + + case QCHECK : + return select([]() { return true; }); + } + + assert(false); + return Move::none(); // Silence warning } -} // namespace Stockfish +} // namespace Stockfish diff --git a/src/movepick.h b/src/movepick.h index e2cbfcde..b81f76e1 100644 --- a/src/movepick.h +++ b/src/movepick.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,9 +19,14 @@ #ifndef MOVEPICK_H_INCLUDED #define MOVEPICK_H_INCLUDED +#include #include +#include +#include +#include +#include #include -#include +#include // IWYU pragma: keep #include "movegen.h" #include "position.h" @@ -29,125 +34,169 @@ namespace Stockfish { -/// StatsEntry stores the stat table value. It is usually a number but could -/// be a move or even a nested history. We use a class instead of naked value -/// to directly call history update operator<<() on the entry so to use stats -/// tables at caller sites as simple multi-dim arrays. +constexpr int PAWN_HISTORY_SIZE = 512; // has to be a power of 2 +constexpr int CORRECTION_HISTORY_SIZE = 16384; // has to be a power of 2 +constexpr int CORRECTION_HISTORY_LIMIT = 1024; + +static_assert((PAWN_HISTORY_SIZE & (PAWN_HISTORY_SIZE - 1)) == 0, + "PAWN_HISTORY_SIZE has to be a power of 2"); + +static_assert((CORRECTION_HISTORY_SIZE & (CORRECTION_HISTORY_SIZE - 1)) == 0, + "CORRECTION_HISTORY_SIZE has to be a power of 2"); + +enum PawnHistoryType { + Normal, + Correction +}; + +template +inline int pawn_structure_index(const Position& pos) { + return pos.pawn_key() & ((T == Normal ? PAWN_HISTORY_SIZE : CORRECTION_HISTORY_SIZE) - 1); +} + +// StatsEntry stores the stat table value. It is usually a number but could +// be a move or even a nested history. We use a class instead of a naked value +// to directly call history update operator<<() on the entry so to use stats +// tables at caller sites as simple multi-dim arrays. template class StatsEntry { - T entry; + T entry; -public: - void operator=(const T& v) { entry = v; } - T* operator&() { return &entry; } - T* operator->() { return &entry; } - operator const T&() const { return entry; } + public: + void operator=(const T& v) { entry = v; } + T* operator&() { return &entry; } + T* operator->() { return &entry; } + operator const T&() const { return entry; } - void operator<<(int bonus) { - assert(abs(bonus) <= D); // Ensure range is [-D, D] - static_assert(D <= std::numeric_limits::max(), "D overflows T"); + void operator<<(int bonus) { + static_assert(D <= std::numeric_limits::max(), "D overflows T"); - entry += bonus - entry * abs(bonus) / D; + // Make sure that bonus is in range [-D, D] + int clampedBonus = std::clamp(bonus, -D, D); + entry += clampedBonus - entry * std::abs(clampedBonus) / D; - assert(abs(entry) <= D); - } + assert(std::abs(entry) <= D); + } }; -/// Stats is a generic N-dimensional array used to store various statistics. -/// The first template parameter T is the base type of the array, the second -/// template parameter D limits the range of updates in [-D, D] when we update -/// values with the << operator, while the last parameters (Size and Sizes) -/// encode the dimensions of the array. -template -struct Stats : public std::array, Size> -{ - typedef Stats stats; +// Stats is a generic N-dimensional array used to store various statistics. +// The first template parameter T is the base type of the array, and the second +// template parameter D limits the range of updates in [-D, D] when we update +// values with the << operator, while the last parameters (Size and Sizes) +// encode the dimensions of the array. +template +struct Stats: public std::array, Size> { + using stats = Stats; - void fill(const T& v) { + void fill(const T& v) { - // For standard-layout 'this' points to first struct member - assert(std::is_standard_layout::value); + // For standard-layout 'this' points to the first struct member + assert(std::is_standard_layout_v); - typedef StatsEntry entry; - entry* p = reinterpret_cast(this); - std::fill(p, p + sizeof(*this) / sizeof(entry), v); - } + using entry = StatsEntry; + entry* p = reinterpret_cast(this); + std::fill(p, p + sizeof(*this) / sizeof(entry), v); + } }; -template -struct Stats : public std::array, Size> {}; +template +struct Stats: public std::array, Size> {}; -/// In stats table, D=0 means that the template parameter is not used -enum StatsParams { NOT_USED = 0 }; -enum StatsType { NoCaptures, Captures }; +// In stats table, D=0 means that the template parameter is not used +enum StatsParams { + NOT_USED = 0 +}; +enum StatsType { + NoCaptures, + Captures +}; -/// ButterflyHistory records how often quiet moves have been successful or -/// unsuccessful during the current search, and is used for reduction and move -/// ordering decisions. It uses 2 tables (one for each color) indexed by -/// the move's from and to squares, see www.chessprogramming.org/Butterfly_Boards -typedef Stats ButterflyHistory; +// ButterflyHistory records how often quiet moves have been successful or unsuccessful +// during the current search, and is used for reduction and move ordering decisions. +// It uses 2 tables (one for each color) indexed by the move's from and to squares, +// see www.chessprogramming.org/Butterfly_Boards (~11 elo) +using ButterflyHistory = Stats; -/// CounterMoveHistory stores counter moves indexed by [piece][to] of the previous -/// move, see www.chessprogramming.org/Countermove_Heuristic -typedef Stats CounterMoveHistory; +// CounterMoveHistory stores counter moves indexed by [piece][to] of the previous +// move, see www.chessprogramming.org/Countermove_Heuristic +using CounterMoveHistory = Stats; -/// CapturePieceToHistory is addressed by a move's [piece][to][captured piece type] -typedef Stats CapturePieceToHistory; +// CapturePieceToHistory is addressed by a move's [piece][to][captured piece type] +using CapturePieceToHistory = Stats; -/// PieceToHistory is like ButterflyHistory but is addressed by a move's [piece][to] -typedef Stats PieceToHistory; +// PieceToHistory is like ButterflyHistory but is addressed by a move's [piece][to] +using PieceToHistory = Stats; -/// ContinuationHistory is the combined history of a given pair of moves, usually -/// the current one given a previous one. The nested history table is based on -/// PieceToHistory instead of ButterflyBoards. -typedef Stats ContinuationHistory; +// ContinuationHistory is the combined history of a given pair of moves, usually +// the current one given a previous one. The nested history table is based on +// PieceToHistory instead of ButterflyBoards. +// (~63 elo) +using ContinuationHistory = Stats; +// PawnHistory is addressed by the pawn structure and a move's [piece][to] +using PawnHistory = Stats; -/// MovePicker class is used to pick one pseudo-legal move at a time from the -/// current position. The most important method is next_move(), which returns a -/// new pseudo-legal move each time it is called, until there are no moves left, -/// when MOVE_NONE is returned. In order to improve the efficiency of the -/// alpha-beta algorithm, MovePicker attempts to return the moves which are most -/// likely to get a cut-off first. +// CorrectionHistory is addressed by color and pawn structure +using CorrectionHistory = + Stats; + +// MovePicker class is used to pick one pseudo-legal move at a time from the +// current position. The most important method is next_move(), which returns a +// new pseudo-legal move each time it is called, until there are no moves left, +// when Move::none() is returned. In order to improve the efficiency of the +// alpha-beta algorithm, MovePicker attempts to return the moves which are most +// likely to get a cut-off first. class MovePicker { - enum PickType { Next, Best }; + enum PickType { + Next, + Best + }; -public: - MovePicker(const MovePicker&) = delete; - MovePicker& operator=(const MovePicker&) = delete; - MovePicker(const Position&, Move, Depth, const ButterflyHistory*, - const CapturePieceToHistory*, - const PieceToHistory**, - Move, - const Move*); - MovePicker(const Position&, Move, Depth, const ButterflyHistory*, - const CapturePieceToHistory*, - const PieceToHistory**, - Square); - MovePicker(const Position&, Move, Value, const CapturePieceToHistory*); - Move next_move(bool skipQuiets = false); + public: + MovePicker(const MovePicker&) = delete; + MovePicker& operator=(const MovePicker&) = delete; + MovePicker(const Position&, + Move, + Depth, + const ButterflyHistory*, + const CapturePieceToHistory*, + const PieceToHistory**, + const PawnHistory*, + Move, + const Move*); + MovePicker(const Position&, + Move, + Depth, + const ButterflyHistory*, + const CapturePieceToHistory*, + const PieceToHistory**, + const PawnHistory*); + MovePicker(const Position&, Move, int, const CapturePieceToHistory*); + Move next_move(bool skipQuiets = false); -private: - template Move select(Pred); - template void score(); - ExtMove* begin() { return cur; } - ExtMove* end() { return endMoves; } + private: + template + Move select(Pred); + template + void score(); + ExtMove* begin() { return cur; } + ExtMove* end() { return endMoves; } - const Position& pos; - const ButterflyHistory* mainHistory; - const CapturePieceToHistory* captureHistory; - const PieceToHistory** continuationHistory; - Move ttMove; - ExtMove refutations[3], *cur, *endMoves, *endBadCaptures; - int stage; - Square recaptureSquare; - Value threshold; - Depth depth; - ExtMove moves[MAX_MOVES]; + const Position& pos; + const ButterflyHistory* mainHistory; + const CapturePieceToHistory* captureHistory; + const PieceToHistory** continuationHistory; + const PawnHistory* pawnHistory; + Move ttMove; + ExtMove refutations[3], *cur, *endMoves, *endBadCaptures, *beginBadQuiets, *endBadQuiets; + int stage; + int threshold; + Depth depth; + ExtMove moves[MAX_MOVES]; }; -} // namespace Stockfish +} // namespace Stockfish -#endif // #ifndef MOVEPICK_H_INCLUDED +#endif // #ifndef MOVEPICK_H_INCLUDED diff --git a/src/nnue/evaluate_nnue.cpp b/src/nnue/evaluate_nnue.cpp deleted file mode 100644 index 0fd58462..00000000 --- a/src/nnue/evaluate_nnue.cpp +++ /dev/null @@ -1,403 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -// Code for calculating NNUE evaluation function - -#include -#include -#include -#include -#include - -#include "../evaluate.h" -#include "../position.h" -#include "../misc.h" -#include "../uci.h" -#include "../types.h" - -#include "evaluate_nnue.h" - -namespace Stockfish::Eval::NNUE { - - // Input feature converter - LargePagePtr featureTransformer; - - // Evaluation function - AlignedPtr network[LayerStacks]; - - // Evaluation function file name - std::string fileName; - std::string netDescription; - - namespace Detail { - - // Initialize the evaluation function parameters - template - void initialize(AlignedPtr& pointer) { - - pointer.reset(reinterpret_cast(std_aligned_alloc(alignof(T), sizeof(T)))); - std::memset(pointer.get(), 0, sizeof(T)); - } - - template - void initialize(LargePagePtr& pointer) { - - static_assert(alignof(T) <= 4096, "aligned_large_pages_alloc() may fail for such a big alignment requirement of T"); - pointer.reset(reinterpret_cast(aligned_large_pages_alloc(sizeof(T)))); - std::memset(pointer.get(), 0, sizeof(T)); - } - - // Read evaluation function parameters - template - bool read_parameters(std::istream& stream, T& reference) { - - std::uint32_t header; - header = read_little_endian(stream); - if (!stream || header != T::get_hash_value()) return false; - return reference.read_parameters(stream); - } - - // Write evaluation function parameters - template - bool write_parameters(std::ostream& stream, const T& reference) { - - write_little_endian(stream, T::get_hash_value()); - return reference.write_parameters(stream); - } - - } // namespace Detail - - // Initialize the evaluation function parameters - void initialize() { - - Detail::initialize(featureTransformer); - for (std::size_t i = 0; i < LayerStacks; ++i) - Detail::initialize(network[i]); - } - - // Read network header - bool read_header(std::istream& stream, std::uint32_t* hashValue, std::string* desc) - { - std::uint32_t version, size; - - version = read_little_endian(stream); - *hashValue = read_little_endian(stream); - size = read_little_endian(stream); - if (!stream || version != Version) return false; - desc->resize(size); - stream.read(&(*desc)[0], size); - return !stream.fail(); - } - - // Write network header - bool write_header(std::ostream& stream, std::uint32_t hashValue, const std::string& desc) - { - write_little_endian(stream, Version); - write_little_endian(stream, hashValue); - write_little_endian(stream, desc.size()); - stream.write(&desc[0], desc.size()); - return !stream.fail(); - } - - // Read network parameters - bool read_parameters(std::istream& stream) { - - std::uint32_t hashValue; - if (!read_header(stream, &hashValue, &netDescription)) return false; - if (hashValue != HashValue) return false; - if (!Detail::read_parameters(stream, *featureTransformer)) return false; - for (std::size_t i = 0; i < LayerStacks; ++i) - if (!Detail::read_parameters(stream, *(network[i]))) return false; - return stream && stream.peek() == std::ios::traits_type::eof(); - } - - // Write network parameters - bool write_parameters(std::ostream& stream) { - - if (!write_header(stream, HashValue, netDescription)) return false; - if (!Detail::write_parameters(stream, *featureTransformer)) return false; - for (std::size_t i = 0; i < LayerStacks; ++i) - if (!Detail::write_parameters(stream, *(network[i]))) return false; - return (bool)stream; - } - - // Evaluation function. Perform differential calculation. - Value evaluate(const Position& pos, bool adjusted) { - - // We manually align the arrays on the stack because with gcc < 9.3 - // overaligning stack variables with alignas() doesn't work correctly. - - constexpr uint64_t alignment = CacheLineSize; - int delta = 7; - -#if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN) - TransformedFeatureType transformedFeaturesUnaligned[ - FeatureTransformer::BufferSize + alignment / sizeof(TransformedFeatureType)]; - - auto* transformedFeatures = align_ptr_up(&transformedFeaturesUnaligned[0]); -#else - alignas(alignment) - TransformedFeatureType transformedFeatures[FeatureTransformer::BufferSize]; -#endif - - ASSERT_ALIGNED(transformedFeatures, alignment); - - const std::size_t bucket = (pos.count() - 1) / 4; - const auto psqt = featureTransformer->transform(pos, transformedFeatures, bucket); - const auto positional = network[bucket]->propagate(transformedFeatures); - - // Give more value to positional evaluation when adjusted flag is set - if (adjusted) - return static_cast(((128 - delta) * psqt + (128 + delta) * positional) / 128 / OutputScale); - else - return static_cast((psqt + positional) / OutputScale); - } - - struct NnueEvalTrace { - static_assert(LayerStacks == PSQTBuckets); - - Value psqt[LayerStacks]; - Value positional[LayerStacks]; - std::size_t correctBucket; - }; - - static NnueEvalTrace trace_evaluate(const Position& pos) { - - // We manually align the arrays on the stack because with gcc < 9.3 - // overaligning stack variables with alignas() doesn't work correctly. - - constexpr uint64_t alignment = CacheLineSize; - -#if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN) - TransformedFeatureType transformedFeaturesUnaligned[ - FeatureTransformer::BufferSize + alignment / sizeof(TransformedFeatureType)]; - - auto* transformedFeatures = align_ptr_up(&transformedFeaturesUnaligned[0]); -#else - alignas(alignment) - TransformedFeatureType transformedFeatures[FeatureTransformer::BufferSize]; -#endif - - ASSERT_ALIGNED(transformedFeatures, alignment); - - NnueEvalTrace t{}; - t.correctBucket = (pos.count() - 1) / 4; - for (std::size_t bucket = 0; bucket < LayerStacks; ++bucket) { - const auto materialist = featureTransformer->transform(pos, transformedFeatures, bucket); - const auto positional = network[bucket]->propagate(transformedFeatures); - - t.psqt[bucket] = static_cast( materialist / OutputScale ); - t.positional[bucket] = static_cast( positional / OutputScale ); - } - - return t; - } - - static const std::string PieceToChar(" PNBRQK pnbrqk"); - - - // format_cp_compact() converts a Value into (centi)pawns and writes it in a buffer. - // The buffer must have capacity for at least 5 chars. - static void format_cp_compact(Value v, char* buffer) { - - buffer[0] = (v < 0 ? '-' : v > 0 ? '+' : ' '); - - int cp = std::abs(100 * v / PawnValueEg); - if (cp >= 10000) - { - buffer[1] = '0' + cp / 10000; cp %= 10000; - buffer[2] = '0' + cp / 1000; cp %= 1000; - buffer[3] = '0' + cp / 100; - buffer[4] = ' '; - } - else if (cp >= 1000) - { - buffer[1] = '0' + cp / 1000; cp %= 1000; - buffer[2] = '0' + cp / 100; cp %= 100; - buffer[3] = '.'; - buffer[4] = '0' + cp / 10; - } - else - { - buffer[1] = '0' + cp / 100; cp %= 100; - buffer[2] = '.'; - buffer[3] = '0' + cp / 10; cp %= 10; - buffer[4] = '0' + cp / 1; - } - } - - - // format_cp_aligned_dot() converts a Value into (centi)pawns and writes it in a buffer, - // always keeping two decimals. The buffer must have capacity for at least 7 chars. - static void format_cp_aligned_dot(Value v, char* buffer) { - - buffer[0] = (v < 0 ? '-' : v > 0 ? '+' : ' '); - - double cp = 1.0 * std::abs(int(v)) / PawnValueEg; - sprintf(&buffer[1], "%6.2f", cp); - } - - - // trace() returns a string with the value of each piece on a board, - // and a table for (PSQT, Layers) values bucket by bucket. - - std::string trace(Position& pos) { - - std::stringstream ss; - - char board[3*8+1][8*8+2]; - std::memset(board, ' ', sizeof(board)); - for (int row = 0; row < 3*8+1; ++row) - board[row][8*8+1] = '\0'; - - // A lambda to output one box of the board - auto writeSquare = [&board](File file, Rank rank, Piece pc, Value value) { - - const int x = ((int)file) * 8; - const int y = (7 - (int)rank) * 3; - for (int i = 1; i < 8; ++i) - board[y][x+i] = board[y+3][x+i] = '-'; - for (int i = 1; i < 3; ++i) - board[y+i][x] = board[y+i][x+8] = '|'; - board[y][x] = board[y][x+8] = board[y+3][x+8] = board[y+3][x] = '+'; - if (pc != NO_PIECE) - board[y+1][x+4] = PieceToChar[pc]; - if (value != VALUE_NONE) - format_cp_compact(value, &board[y+2][x+2]); - }; - - // We estimate the value of each piece by doing a differential evaluation from - // the current base eval, simulating the removal of the piece from its square. - Value base = evaluate(pos); - base = pos.side_to_move() == WHITE ? base : -base; - - for (File f = FILE_A; f <= FILE_H; ++f) - for (Rank r = RANK_1; r <= RANK_8; ++r) - { - Square sq = make_square(f, r); - Piece pc = pos.piece_on(sq); - Value v = VALUE_NONE; - - if (pc != NO_PIECE && type_of(pc) != KING) - { - auto st = pos.state(); - - pos.remove_piece(sq); - st->accumulator.computed[WHITE] = false; - st->accumulator.computed[BLACK] = false; - - Value eval = evaluate(pos); - eval = pos.side_to_move() == WHITE ? eval : -eval; - v = base - eval; - - pos.put_piece(pc, sq); - st->accumulator.computed[WHITE] = false; - st->accumulator.computed[BLACK] = false; - } - - writeSquare(f, r, pc, v); - } - - ss << " NNUE derived piece values:\n"; - for (int row = 0; row < 3*8+1; ++row) - ss << board[row] << '\n'; - ss << '\n'; - - auto t = trace_evaluate(pos); - - ss << " NNUE network contributions " - << (pos.side_to_move() == WHITE ? "(White to move)" : "(Black to move)") << std::endl - << "+------------+------------+------------+------------+\n" - << "| Bucket | Material | Positional | Total |\n" - << "| | (PSQT) | (Layers) | |\n" - << "+------------+------------+------------+------------+\n"; - - for (std::size_t bucket = 0; bucket < LayerStacks; ++bucket) - { - char buffer[3][8]; - std::memset(buffer, '\0', sizeof(buffer)); - - format_cp_aligned_dot(t.psqt[bucket], buffer[0]); - format_cp_aligned_dot(t.positional[bucket], buffer[1]); - format_cp_aligned_dot(t.psqt[bucket] + t.positional[bucket], buffer[2]); - - ss << "| " << bucket << " " - << " | " << buffer[0] << " " - << " | " << buffer[1] << " " - << " | " << buffer[2] << " " - << " |"; - if (bucket == t.correctBucket) - ss << " <-- this bucket is used"; - ss << '\n'; - } - - ss << "+------------+------------+------------+------------+\n"; - - return ss.str(); - } - - - // Load eval, from a file stream or a memory stream - bool load_eval(std::string name, std::istream& stream) { - - initialize(); - fileName = name; - return read_parameters(stream); - } - - // Save eval, to a file stream or a memory stream - bool save_eval(std::ostream& stream) { - - if (fileName.empty()) - return false; - - return write_parameters(stream); - } - - /// Save eval, to a file given by its name - bool save_eval(const std::optional& filename) { - - std::string actualFilename; - std::string msg; - - if (filename.has_value()) - actualFilename = filename.value(); - else - { - if (currentEvalFileName != EvalFileDefaultName) - { - msg = "Failed to export a net. A non-embedded net can only be saved if the filename is specified"; - - sync_cout << msg << sync_endl; - return false; - } - actualFilename = EvalFileDefaultName; - } - - std::ofstream stream(actualFilename, std::ios_base::binary); - bool saved = save_eval(stream); - - msg = saved ? "Network saved successfully to " + actualFilename - : "Failed to export a net"; - - sync_cout << msg << sync_endl; - return saved; - } - - -} // namespace Stockfish::Eval::NNUE diff --git a/src/nnue/evaluate_nnue.h b/src/nnue/evaluate_nnue.h deleted file mode 100644 index 2e4f1f50..00000000 --- a/src/nnue/evaluate_nnue.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -// header used in NNUE evaluation function - -#ifndef NNUE_EVALUATE_NNUE_H_INCLUDED -#define NNUE_EVALUATE_NNUE_H_INCLUDED - -#include "nnue_feature_transformer.h" - -#include - -namespace Stockfish::Eval::NNUE { - - // Hash value of evaluation function structure - constexpr std::uint32_t HashValue = - FeatureTransformer::get_hash_value() ^ Network::get_hash_value(); - - // Deleter for automating release of memory area - template - struct AlignedDeleter { - void operator()(T* ptr) const { - ptr->~T(); - std_aligned_free(ptr); - } - }; - - template - struct LargePageDeleter { - void operator()(T* ptr) const { - ptr->~T(); - aligned_large_pages_free(ptr); - } - }; - - template - using AlignedPtr = std::unique_ptr>; - - template - using LargePagePtr = std::unique_ptr>; - -} // namespace Stockfish::Eval::NNUE - -#endif // #ifndef NNUE_EVALUATE_NNUE_H_INCLUDED diff --git a/src/nnue/features/half_ka_v2_hm.cpp b/src/nnue/features/half_ka_v2_hm.cpp index 07a1d7a1..71782a7b 100644 --- a/src/nnue/features/half_ka_v2_hm.cpp +++ b/src/nnue/features/half_ka_v2_hm.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,64 +20,69 @@ #include "half_ka_v2_hm.h" +#include "../../bitboard.h" #include "../../position.h" +#include "../../types.h" +#include "../nnue_accumulator.h" namespace Stockfish::Eval::NNUE::Features { - // Orient a square according to perspective (rotates by 180 for black) - inline Square HalfKAv2_hm::orient(Color perspective, Square s, Square ksq) { - return Square(int(s) ^ (bool(perspective) * SQ_A8) ^ ((file_of(ksq) < FILE_E) * SQ_H1)); - } +// Index of a feature for a given king position and another piece on some square +template +inline IndexType HalfKAv2_hm::make_index(Square s, Piece pc, Square ksq) { + return IndexType((int(s) ^ OrientTBL[Perspective][ksq]) + PieceSquareIndex[Perspective][pc] + + KingBuckets[Perspective][ksq]); +} - // Index of a feature for a given king position and another piece on some square - inline IndexType HalfKAv2_hm::make_index(Color perspective, Square s, Piece pc, Square ksq) { - Square o_ksq = orient(perspective, ksq, ksq); - return IndexType(orient(perspective, s, ksq) + PieceSquareIndex[perspective][pc] + PS_NB * KingBuckets[o_ksq]); - } - - // Get a list of indices for active features - void HalfKAv2_hm::append_active_indices( - const Position& pos, - Color perspective, - IndexList& active - ) { - Square ksq = pos.square(perspective); - Bitboard bb = pos.pieces(); +// Get a list of indices for active features +template +void HalfKAv2_hm::append_active_indices(const Position& pos, IndexList& active) { + Square ksq = pos.square(Perspective); + Bitboard bb = pos.pieces(); while (bb) { - Square s = pop_lsb(bb); - active.push_back(make_index(perspective, s, pos.piece_on(s), ksq)); + Square s = pop_lsb(bb); + active.push_back(make_index(s, pos.piece_on(s), ksq)); } - } +} +// Explicit template instantiations +template void HalfKAv2_hm::append_active_indices(const Position& pos, IndexList& active); +template void HalfKAv2_hm::append_active_indices(const Position& pos, IndexList& active); +template IndexType HalfKAv2_hm::make_index(Square s, Piece pc, Square ksq); +template IndexType HalfKAv2_hm::make_index(Square s, Piece pc, Square ksq); - // append_changed_indices() : get a list of indices for recently changed features - - void HalfKAv2_hm::append_changed_indices( - Square ksq, - const DirtyPiece& dp, - Color perspective, - IndexList& removed, - IndexList& added - ) { - for (int i = 0; i < dp.dirty_num; ++i) { - if (dp.from[i] != SQ_NONE) - removed.push_back(make_index(perspective, dp.from[i], dp.piece[i], ksq)); - if (dp.to[i] != SQ_NONE) - added.push_back(make_index(perspective, dp.to[i], dp.piece[i], ksq)); +// Get a list of indices for recently changed features +template +void HalfKAv2_hm::append_changed_indices(Square ksq, + const DirtyPiece& dp, + IndexList& removed, + IndexList& added) { + for (int i = 0; i < dp.dirty_num; ++i) + { + if (dp.from[i] != SQ_NONE) + removed.push_back(make_index(dp.from[i], dp.piece[i], ksq)); + if (dp.to[i] != SQ_NONE) + added.push_back(make_index(dp.to[i], dp.piece[i], ksq)); } - } +} - int HalfKAv2_hm::update_cost(const StateInfo* st) { - return st->dirtyPiece.dirty_num; - } +// Explicit template instantiations +template void HalfKAv2_hm::append_changed_indices(Square ksq, + const DirtyPiece& dp, + IndexList& removed, + IndexList& added); +template void HalfKAv2_hm::append_changed_indices(Square ksq, + const DirtyPiece& dp, + IndexList& removed, + IndexList& added); - int HalfKAv2_hm::refresh_cost(const Position& pos) { - return pos.count(); - } +int HalfKAv2_hm::update_cost(const StateInfo* st) { return st->dirtyPiece.dirty_num; } - bool HalfKAv2_hm::requires_refresh(const StateInfo* st, Color perspective) { +int HalfKAv2_hm::refresh_cost(const Position& pos) { return pos.count(); } + +bool HalfKAv2_hm::requires_refresh(const StateInfo* st, Color perspective) { return st->dirtyPiece.piece[0] == make_piece(perspective, KING); - } +} } // namespace Stockfish::Eval::NNUE::Features diff --git a/src/nnue/features/half_ka_v2_hm.h b/src/nnue/features/half_ka_v2_hm.h index 1e6da0bf..96349704 100644 --- a/src/nnue/features/half_ka_v2_hm.h +++ b/src/nnue/features/half_ka_v2_hm.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -21,52 +21,47 @@ #ifndef NNUE_FEATURES_HALF_KA_V2_HM_H_INCLUDED #define NNUE_FEATURES_HALF_KA_V2_HM_H_INCLUDED +#include + +#include "../../misc.h" +#include "../../types.h" #include "../nnue_common.h" -#include "../../evaluate.h" -#include "../../misc.h" - namespace Stockfish { - struct StateInfo; +struct StateInfo; +class Position; } namespace Stockfish::Eval::NNUE::Features { - // Feature HalfKAv2_hm: Combination of the position of own king - // and the position of pieces. Position mirrored such that king always on e..h files. - class HalfKAv2_hm { +// Feature HalfKAv2_hm: Combination of the position of own king and the +// position of pieces. Position mirrored such that king is always on e..h files. +class HalfKAv2_hm { - // unique number for each piece type on each square + // Unique number for each piece type on each square enum { - PS_NONE = 0, - PS_W_PAWN = 0, - PS_B_PAWN = 1 * SQUARE_NB, - PS_W_KNIGHT = 2 * SQUARE_NB, - PS_B_KNIGHT = 3 * SQUARE_NB, - PS_W_BISHOP = 4 * SQUARE_NB, - PS_B_BISHOP = 5 * SQUARE_NB, - PS_W_ROOK = 6 * SQUARE_NB, - PS_B_ROOK = 7 * SQUARE_NB, - PS_W_QUEEN = 8 * SQUARE_NB, - PS_B_QUEEN = 9 * SQUARE_NB, - PS_KING = 10 * SQUARE_NB, - PS_NB = 11 * SQUARE_NB + PS_NONE = 0, + PS_W_PAWN = 0, + PS_B_PAWN = 1 * SQUARE_NB, + PS_W_KNIGHT = 2 * SQUARE_NB, + PS_B_KNIGHT = 3 * SQUARE_NB, + PS_W_BISHOP = 4 * SQUARE_NB, + PS_B_BISHOP = 5 * SQUARE_NB, + PS_W_ROOK = 6 * SQUARE_NB, + PS_B_ROOK = 7 * SQUARE_NB, + PS_W_QUEEN = 8 * SQUARE_NB, + PS_B_QUEEN = 9 * SQUARE_NB, + PS_KING = 10 * SQUARE_NB, + PS_NB = 11 * SQUARE_NB }; static constexpr IndexType PieceSquareIndex[COLOR_NB][PIECE_NB] = { - // convention: W - us, B - them - // viewed from other side, W and B are reversed - { PS_NONE, PS_W_PAWN, PS_W_KNIGHT, PS_W_BISHOP, PS_W_ROOK, PS_W_QUEEN, PS_KING, PS_NONE, - PS_NONE, PS_B_PAWN, PS_B_KNIGHT, PS_B_BISHOP, PS_B_ROOK, PS_B_QUEEN, PS_KING, PS_NONE }, - { PS_NONE, PS_B_PAWN, PS_B_KNIGHT, PS_B_BISHOP, PS_B_ROOK, PS_B_QUEEN, PS_KING, PS_NONE, - PS_NONE, PS_W_PAWN, PS_W_KNIGHT, PS_W_BISHOP, PS_W_ROOK, PS_W_QUEEN, PS_KING, PS_NONE } - }; - - // Orient a square according to perspective (rotates by 180 for black) - static Square orient(Color perspective, Square s, Square ksq); - - // Index of a feature for a given king position and another piece on some square - static IndexType make_index(Color perspective, Square s, Piece pc, Square ksq); + // Convention: W - us, B - them + // Viewed from other side, W and B are reversed + {PS_NONE, PS_W_PAWN, PS_W_KNIGHT, PS_W_BISHOP, PS_W_ROOK, PS_W_QUEEN, PS_KING, PS_NONE, + PS_NONE, PS_B_PAWN, PS_B_KNIGHT, PS_B_BISHOP, PS_B_ROOK, PS_B_QUEEN, PS_KING, PS_NONE}, + {PS_NONE, PS_B_PAWN, PS_B_KNIGHT, PS_B_BISHOP, PS_B_ROOK, PS_B_QUEEN, PS_KING, PS_NONE, + PS_NONE, PS_W_PAWN, PS_W_KNIGHT, PS_W_BISHOP, PS_W_ROOK, PS_W_QUEEN, PS_KING, PS_NONE}}; public: // Feature name @@ -77,48 +72,79 @@ namespace Stockfish::Eval::NNUE::Features { // Number of feature dimensions static constexpr IndexType Dimensions = - static_cast(SQUARE_NB) * static_cast(PS_NB) / 2; + static_cast(SQUARE_NB) * static_cast(PS_NB) / 2; - static constexpr int KingBuckets[64] = { - -1, -1, -1, -1, 31, 30, 29, 28, - -1, -1, -1, -1, 27, 26, 25, 24, - -1, -1, -1, -1, 23, 22, 21, 20, - -1, -1, -1, -1, 19, 18, 17, 16, - -1, -1, -1, -1, 15, 14, 13, 12, - -1, -1, -1, -1, 11, 10, 9, 8, - -1, -1, -1, -1, 7, 6, 5, 4, - -1, -1, -1, -1, 3, 2, 1, 0 +#define B(v) (v * PS_NB) + // clang-format off + static constexpr int KingBuckets[COLOR_NB][SQUARE_NB] = { + { B(28), B(29), B(30), B(31), B(31), B(30), B(29), B(28), + B(24), B(25), B(26), B(27), B(27), B(26), B(25), B(24), + B(20), B(21), B(22), B(23), B(23), B(22), B(21), B(20), + B(16), B(17), B(18), B(19), B(19), B(18), B(17), B(16), + B(12), B(13), B(14), B(15), B(15), B(14), B(13), B(12), + B( 8), B( 9), B(10), B(11), B(11), B(10), B( 9), B( 8), + B( 4), B( 5), B( 6), B( 7), B( 7), B( 6), B( 5), B( 4), + B( 0), B( 1), B( 2), B( 3), B( 3), B( 2), B( 1), B( 0) }, + { B( 0), B( 1), B( 2), B( 3), B( 3), B( 2), B( 1), B( 0), + B( 4), B( 5), B( 6), B( 7), B( 7), B( 6), B( 5), B( 4), + B( 8), B( 9), B(10), B(11), B(11), B(10), B( 9), B( 8), + B(12), B(13), B(14), B(15), B(15), B(14), B(13), B(12), + B(16), B(17), B(18), B(19), B(19), B(18), B(17), B(16), + B(20), B(21), B(22), B(23), B(23), B(22), B(21), B(20), + B(24), B(25), B(26), B(27), B(27), B(26), B(25), B(24), + B(28), B(29), B(30), B(31), B(31), B(30), B(29), B(28) } }; + // clang-format on +#undef B + // clang-format off + // Orient a square according to perspective (rotates by 180 for black) + static constexpr int OrientTBL[COLOR_NB][SQUARE_NB] = { + { SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1, + SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1, + SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1, + SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1, + SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1, + SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1, + SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1, + SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1 }, + { SQ_H8, SQ_H8, SQ_H8, SQ_H8, SQ_A8, SQ_A8, SQ_A8, SQ_A8, + SQ_H8, SQ_H8, SQ_H8, SQ_H8, SQ_A8, SQ_A8, SQ_A8, SQ_A8, + SQ_H8, SQ_H8, SQ_H8, SQ_H8, SQ_A8, SQ_A8, SQ_A8, SQ_A8, + SQ_H8, SQ_H8, SQ_H8, SQ_H8, SQ_A8, SQ_A8, SQ_A8, SQ_A8, + SQ_H8, SQ_H8, SQ_H8, SQ_H8, SQ_A8, SQ_A8, SQ_A8, SQ_A8, + SQ_H8, SQ_H8, SQ_H8, SQ_H8, SQ_A8, SQ_A8, SQ_A8, SQ_A8, + SQ_H8, SQ_H8, SQ_H8, SQ_H8, SQ_A8, SQ_A8, SQ_A8, SQ_A8, + SQ_H8, SQ_H8, SQ_H8, SQ_H8, SQ_A8, SQ_A8, SQ_A8, SQ_A8 } + }; + // clang-format on // Maximum number of simultaneously active features. static constexpr IndexType MaxActiveDimensions = 32; - using IndexList = ValueList; + using IndexList = ValueList; + + // Index of a feature for a given king position and another piece on some square + template + static IndexType make_index(Square s, Piece pc, Square ksq); // Get a list of indices for active features - static void append_active_indices( - const Position& pos, - Color perspective, - IndexList& active); + template + static void append_active_indices(const Position& pos, IndexList& active); // Get a list of indices for recently changed features - static void append_changed_indices( - Square ksq, - const DirtyPiece& dp, - Color perspective, - IndexList& removed, - IndexList& added - ); + template + static void + append_changed_indices(Square ksq, const DirtyPiece& dp, IndexList& removed, IndexList& added); // Returns the cost of updating one perspective, the most costly one. // Assumes no refresh needed. static int update_cost(const StateInfo* st); static int refresh_cost(const Position& pos); - // Returns whether the change stored in this StateInfo means that - // a full accumulator refresh is required. + // Returns whether the change stored in this StateInfo means + // that a full accumulator refresh is required. static bool requires_refresh(const StateInfo* st, Color perspective); - }; +}; } // namespace Stockfish::Eval::NNUE::Features -#endif // #ifndef NNUE_FEATURES_HALF_KA_V2_HM_H_INCLUDED +#endif // #ifndef NNUE_FEATURES_HALF_KA_V2_HM_H_INCLUDED diff --git a/src/nnue/layers/affine_transform.h b/src/nnue/layers/affine_transform.h index 22451915..ad9167c0 100644 --- a/src/nnue/layers/affine_transform.h +++ b/src/nnue/layers/affine_transform.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -21,34 +21,18 @@ #ifndef NNUE_LAYERS_AFFINE_TRANSFORM_H_INCLUDED #define NNUE_LAYERS_AFFINE_TRANSFORM_H_INCLUDED +#include #include -#include -#include + #include "../nnue_common.h" -#include "../../simd.h" +#include "simd.h" /* This file contains the definition for a fully connected layer (aka affine transform). - Two approaches are employed, depending on the sizes of the transform. - Approach 1: - - used when the PaddedInputDimensions >= 128 - - uses AVX512 if possible - - processes inputs in batches of 2*InputSimdWidth - - so in batches of 128 for AVX512 - - the weight blocks of size InputSimdWidth are transposed such that - access is sequential - - N columns of the weight matrix are processed a time, where N - depends on the architecture (the amount of registers) - - accumulate + hadd is used - - Approach 2: - - used when the PaddedInputDimensions < 128 - - does not use AVX512 - expected use-case is for when PaddedInputDimensions == 32 and InputDimensions <= 32. - that's why AVX512 is hard to implement - expected use-case is small layers - - not optimized as well as the approach 1 - inputs are processed in chunks of 4, weights are respectively transposed - accumulation happens directly to int32s */ @@ -56,111 +40,104 @@ namespace Stockfish::Eval::NNUE::Layers { // Fallback implementation for older/other architectures. -// Identical for both approaches. Requires the input to be padded to at least 16 values. +// Requires the input to be padded to at least 16 values. #if !defined(USE_SSSE3) - template - static void affine_transform_non_ssse3(std::int32_t* output, const std::int8_t* weights, const std::int32_t* biases, const std::uint8_t* input) - { -# if defined(USE_SSE2) +template +static void affine_transform_non_ssse3(std::int32_t* output, + const std::int8_t* weights, + const std::int32_t* biases, + const std::uint8_t* input) { + #if defined(USE_SSE2) || defined(USE_NEON_DOTPROD) || defined(USE_NEON) + #if defined(USE_SSE2) // At least a multiple of 16, with SSE2. - constexpr IndexType NumChunks = ceil_to_multiple(InputDimensions, 16) / 16; - const __m128i Zeros = _mm_setzero_si128(); - const auto inputVector = reinterpret_cast(input); + constexpr IndexType NumChunks = ceil_to_multiple(InputDimensions, 16) / 16; + const __m128i Zeros = _mm_setzero_si128(); + const auto inputVector = reinterpret_cast(input); -# elif defined(USE_MMX) - constexpr IndexType NumChunks = ceil_to_multiple(InputDimensions, 8) / 8; - const __m64 Zeros = _mm_setzero_si64(); - const auto inputVector = reinterpret_cast(input); + #elif defined(USE_NEON_DOTPROD) + constexpr IndexType NumChunks = ceil_to_multiple(InputDimensions, 16) / 16; + const auto inputVector = reinterpret_cast(input); -# elif defined(USE_NEON) - constexpr IndexType NumChunks = ceil_to_multiple(InputDimensions, 16) / 16; - const auto inputVector = reinterpret_cast(input); -# endif + #elif defined(USE_NEON) + constexpr IndexType NumChunks = ceil_to_multiple(InputDimensions, 16) / 16; + const auto inputVector = reinterpret_cast(input); + #endif - for (IndexType i = 0; i < OutputDimensions; ++i) { - const IndexType offset = i * PaddedInputDimensions; + for (IndexType i = 0; i < OutputDimensions; ++i) + { + const IndexType offset = i * PaddedInputDimensions; -# if defined(USE_SSE2) - __m128i sumLo = _mm_cvtsi32_si128(biases[i]); - __m128i sumHi = Zeros; - const auto row = reinterpret_cast(&weights[offset]); - for (IndexType j = 0; j < NumChunks; ++j) { - __m128i row_j = _mm_load_si128(&row[j]); - __m128i input_j = _mm_load_si128(&inputVector[j]); - __m128i extendedRowLo = _mm_srai_epi16(_mm_unpacklo_epi8(row_j, row_j), 8); - __m128i extendedRowHi = _mm_srai_epi16(_mm_unpackhi_epi8(row_j, row_j), 8); - __m128i extendedInputLo = _mm_unpacklo_epi8(input_j, Zeros); - __m128i extendedInputHi = _mm_unpackhi_epi8(input_j, Zeros); - __m128i productLo = _mm_madd_epi16(extendedRowLo, extendedInputLo); - __m128i productHi = _mm_madd_epi16(extendedRowHi, extendedInputHi); - sumLo = _mm_add_epi32(sumLo, productLo); - sumHi = _mm_add_epi32(sumHi, productHi); - } - __m128i sum = _mm_add_epi32(sumLo, sumHi); - __m128i sumHigh_64 = _mm_shuffle_epi32(sum, _MM_SHUFFLE(1, 0, 3, 2)); - sum = _mm_add_epi32(sum, sumHigh_64); - __m128i sum_second_32 = _mm_shufflelo_epi16(sum, _MM_SHUFFLE(1, 0, 3, 2)); - sum = _mm_add_epi32(sum, sum_second_32); - output[i] = _mm_cvtsi128_si32(sum); + #if defined(USE_SSE2) + __m128i sumLo = _mm_cvtsi32_si128(biases[i]); + __m128i sumHi = Zeros; + const auto row = reinterpret_cast(&weights[offset]); + for (IndexType j = 0; j < NumChunks; ++j) + { + __m128i row_j = _mm_load_si128(&row[j]); + __m128i input_j = _mm_load_si128(&inputVector[j]); + __m128i extendedRowLo = _mm_srai_epi16(_mm_unpacklo_epi8(row_j, row_j), 8); + __m128i extendedRowHi = _mm_srai_epi16(_mm_unpackhi_epi8(row_j, row_j), 8); + __m128i extendedInputLo = _mm_unpacklo_epi8(input_j, Zeros); + __m128i extendedInputHi = _mm_unpackhi_epi8(input_j, Zeros); + __m128i productLo = _mm_madd_epi16(extendedRowLo, extendedInputLo); + __m128i productHi = _mm_madd_epi16(extendedRowHi, extendedInputHi); + sumLo = _mm_add_epi32(sumLo, productLo); + sumHi = _mm_add_epi32(sumHi, productHi); + } + __m128i sum = _mm_add_epi32(sumLo, sumHi); + __m128i sumHigh_64 = _mm_shuffle_epi32(sum, _MM_SHUFFLE(1, 0, 3, 2)); + sum = _mm_add_epi32(sum, sumHigh_64); + __m128i sum_second_32 = _mm_shufflelo_epi16(sum, _MM_SHUFFLE(1, 0, 3, 2)); + sum = _mm_add_epi32(sum, sum_second_32); + output[i] = _mm_cvtsi128_si32(sum); -# elif defined(USE_MMX) - __m64 sumLo = _mm_cvtsi32_si64(biases[i]); - __m64 sumHi = Zeros; - const auto row = reinterpret_cast(&weights[offset]); - for (IndexType j = 0; j < NumChunks; ++j) { - __m64 row_j = row[j]; - __m64 input_j = inputVector[j]; - __m64 extendedRowLo = _mm_srai_pi16(_mm_unpacklo_pi8(row_j, row_j), 8); - __m64 extendedRowHi = _mm_srai_pi16(_mm_unpackhi_pi8(row_j, row_j), 8); - __m64 extendedInputLo = _mm_unpacklo_pi8(input_j, Zeros); - __m64 extendedInputHi = _mm_unpackhi_pi8(input_j, Zeros); - __m64 productLo = _mm_madd_pi16(extendedRowLo, extendedInputLo); - __m64 productHi = _mm_madd_pi16(extendedRowHi, extendedInputHi); - sumLo = _mm_add_pi32(sumLo, productLo); - sumHi = _mm_add_pi32(sumHi, productHi); - } - __m64 sum = _mm_add_pi32(sumLo, sumHi); - sum = _mm_add_pi32(sum, _mm_unpackhi_pi32(sum, sum)); - output[i] = _mm_cvtsi64_si32(sum); + #elif defined(USE_NEON_DOTPROD) + int32x4_t sum = {biases[i]}; + const auto row = reinterpret_cast(&weights[offset]); + for (IndexType j = 0; j < NumChunks; ++j) + { + sum = vdotq_s32(sum, inputVector[j], row[j]); + } + output[i] = vaddvq_s32(sum); -# elif defined(USE_NEON) - int32x4_t sum = {biases[i]}; - const auto row = reinterpret_cast(&weights[offset]); - for (IndexType j = 0; j < NumChunks; ++j) { - int16x8_t product = vmull_s8(inputVector[j * 2], row[j * 2]); - product = vmlal_s8(product, inputVector[j * 2 + 1], row[j * 2 + 1]); - sum = vpadalq_s16(sum, product); - } - output[i] = sum[0] + sum[1] + sum[2] + sum[3]; + #elif defined(USE_NEON) + int32x4_t sum = {biases[i]}; + const auto row = reinterpret_cast(&weights[offset]); + for (IndexType j = 0; j < NumChunks; ++j) + { + int16x8_t product = vmull_s8(inputVector[j * 2], row[j * 2]); + product = vmlal_s8(product, inputVector[j * 2 + 1], row[j * 2 + 1]); + sum = vpadalq_s16(sum, product); + } + output[i] = sum[0] + sum[1] + sum[2] + sum[3]; -# else - std::int32_t sum = biases[i]; - for (IndexType j = 0; j < InputDimensions; ++j) { - sum += weights[offset + j] * input[j]; - } - output[i] = sum; -# endif + #endif } + #else + std::memcpy(output, biases, sizeof(std::int32_t) * OutputDimensions); -# if defined(USE_MMX) - _mm_empty(); -# endif - } + // Traverse weights in transpose order to take advantage of input sparsity + for (IndexType i = 0; i < InputDimensions; ++i) + if (input[i]) + { + const std::int8_t* w = &weights[i]; + const int in = input[i]; + for (IndexType j = 0; j < OutputDimensions; ++j) + output[j] += w[j * PaddedInputDimensions] * in; + } + #endif +} #endif - template - class AffineTransform; - - // A specialization for large inputs. - template - class AffineTransform(InDims, MaxSimdWidth) >= 2*64)>> { +template +class AffineTransform { public: // Input/output type - using InputType = std::uint8_t; + using InputType = std::uint8_t; using OutputType = std::int32_t; // Number of input/output dimensions - static constexpr IndexType InputDimensions = InDims; + static constexpr IndexType InputDimensions = InDims; static constexpr IndexType OutputDimensions = OutDims; static constexpr IndexType PaddedInputDimensions = @@ -170,370 +147,162 @@ namespace Stockfish::Eval::NNUE::Layers { using OutputBuffer = OutputType[PaddedOutputDimensions]; - static_assert(PaddedInputDimensions >= 128, "Something went wrong. This specialization should not have been chosen."); - -#if defined (USE_AVX512) - static constexpr const IndexType InputSimdWidth = 64; - static constexpr const IndexType MaxNumOutputRegs = 16; -#elif defined (USE_AVX2) - static constexpr const IndexType InputSimdWidth = 32; - static constexpr const IndexType MaxNumOutputRegs = 8; -#elif defined (USE_SSSE3) - static constexpr const IndexType InputSimdWidth = 16; - static constexpr const IndexType MaxNumOutputRegs = 8; -#elif defined (USE_NEON) - static constexpr const IndexType InputSimdWidth = 8; - static constexpr const IndexType MaxNumOutputRegs = 8; -#else - // The fallback implementation will not have permuted weights. - // We define these to avoid a lot of ifdefs later. - static constexpr const IndexType InputSimdWidth = 1; - static constexpr const IndexType MaxNumOutputRegs = 1; -#endif - - // A big block is a region in the weight matrix of the size [PaddedInputDimensions, NumOutputRegs]. - // A small block is a region of size [InputSimdWidth, 1] - - static constexpr const IndexType NumOutputRegs = std::min(MaxNumOutputRegs, OutputDimensions); - static constexpr const IndexType SmallBlockSize = InputSimdWidth; - static constexpr const IndexType BigBlockSize = NumOutputRegs * PaddedInputDimensions; - static constexpr const IndexType NumSmallBlocksInBigBlock = BigBlockSize / SmallBlockSize; - static constexpr const IndexType NumSmallBlocksPerOutput = PaddedInputDimensions / SmallBlockSize; - static constexpr const IndexType NumBigBlocks = OutputDimensions / NumOutputRegs; - - static_assert(OutputDimensions % NumOutputRegs == 0); - // Hash value embedded in the evaluation file static constexpr std::uint32_t get_hash_value(std::uint32_t prevHash) { - std::uint32_t hashValue = 0xCC03DAE4u; - hashValue += OutputDimensions; - hashValue ^= prevHash >> 1; - hashValue ^= prevHash << 31; - return hashValue; + std::uint32_t hashValue = 0xCC03DAE4u; + hashValue += OutputDimensions; + hashValue ^= prevHash >> 1; + hashValue ^= prevHash << 31; + return hashValue; } - /* - Transposes the small blocks within a block. - Effectively means that weights can be traversed sequentially during inference. - */ - static IndexType get_weight_index(IndexType i) - { - const IndexType smallBlock = (i / SmallBlockSize) % NumSmallBlocksInBigBlock; - const IndexType smallBlockCol = smallBlock / NumSmallBlocksPerOutput; - const IndexType smallBlockRow = smallBlock % NumSmallBlocksPerOutput; - const IndexType bigBlock = i / BigBlockSize; - const IndexType rest = i % SmallBlockSize; - - const IndexType idx = - bigBlock * BigBlockSize - + smallBlockRow * SmallBlockSize * NumOutputRegs - + smallBlockCol * SmallBlockSize - + rest; - - return idx; + static constexpr IndexType get_weight_index_scrambled(IndexType i) { + return (i / 4) % (PaddedInputDimensions / 4) * OutputDimensions * 4 + + i / PaddedInputDimensions * 4 + i % 4; } - // Read network parameters - bool read_parameters(std::istream& stream) { - for (std::size_t i = 0; i < OutputDimensions; ++i) - biases[i] = read_little_endian(stream); - - for (std::size_t i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) - weights[get_weight_index(i)] = read_little_endian(stream); - - return !stream.fail(); - } - - // Write network parameters - bool write_parameters(std::ostream& stream) const { - for (std::size_t i = 0; i < OutputDimensions; ++i) - write_little_endian(stream, biases[i]); - - for (std::size_t i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) - write_little_endian(stream, weights[get_weight_index(i)]); - - return !stream.fail(); - } - - // Forward propagation - const OutputType* propagate( - const InputType* input, OutputType* output) const { - -#if defined (USE_AVX512) - using acc_vec_t = __m512i; - using bias_vec_t = __m128i; - using weight_vec_t = __m512i; - using in_vec_t = __m512i; - #define vec_zero _mm512_setzero_si512() - #define vec_add_dpbusd_32x2 Simd::m512_add_dpbusd_epi32x2 - #define vec_hadd Simd::m512_hadd - #define vec_haddx4 Simd::m512_haddx4 -#elif defined (USE_AVX2) - using acc_vec_t = __m256i; - using bias_vec_t = __m128i; - using weight_vec_t = __m256i; - using in_vec_t = __m256i; - #define vec_zero _mm256_setzero_si256() - #define vec_add_dpbusd_32x2 Simd::m256_add_dpbusd_epi32x2 - #define vec_hadd Simd::m256_hadd - #define vec_haddx4 Simd::m256_haddx4 -#elif defined (USE_SSSE3) - using acc_vec_t = __m128i; - using bias_vec_t = __m128i; - using weight_vec_t = __m128i; - using in_vec_t = __m128i; - #define vec_zero _mm_setzero_si128() - #define vec_add_dpbusd_32x2 Simd::m128_add_dpbusd_epi32x2 - #define vec_hadd Simd::m128_hadd - #define vec_haddx4 Simd::m128_haddx4 -#elif defined (USE_NEON) - using acc_vec_t = int32x4_t; - using bias_vec_t = int32x4_t; - using weight_vec_t = int8x8_t; - using in_vec_t = int8x8_t; - #define vec_zero {0} - #define vec_add_dpbusd_32x2 Simd::neon_m128_add_dpbusd_epi32x2 - #define vec_hadd Simd::neon_m128_hadd - #define vec_haddx4 Simd::neon_m128_haddx4 -#endif - -#if defined (USE_SSSE3) || defined (USE_NEON) - const in_vec_t* invec = reinterpret_cast(input); - - // Perform accumulation to registers for each big block - for (IndexType bigBlock = 0; bigBlock < NumBigBlocks; ++bigBlock) - { - acc_vec_t acc[NumOutputRegs] = { vec_zero }; - - // Each big block has NumOutputRegs small blocks in each "row", one per register. - // We process two small blocks at a time to save on one addition without VNNI. - for (IndexType smallBlock = 0; smallBlock < NumSmallBlocksPerOutput; smallBlock += 2) - { - const weight_vec_t* weightvec = - reinterpret_cast( - weights - + bigBlock * BigBlockSize - + smallBlock * SmallBlockSize * NumOutputRegs); - - const in_vec_t in0 = invec[smallBlock + 0]; - const in_vec_t in1 = invec[smallBlock + 1]; - - for (IndexType k = 0; k < NumOutputRegs; ++k) - vec_add_dpbusd_32x2(acc[k], in0, weightvec[k], in1, weightvec[k + NumOutputRegs]); - } - - // Horizontally add all accumulators. - if constexpr (NumOutputRegs % 4 == 0) - { - bias_vec_t* outputvec = reinterpret_cast(output); - const bias_vec_t* biasvec = reinterpret_cast(biases); - - for (IndexType k = 0; k < NumOutputRegs; k += 4) - { - const IndexType idx = (bigBlock * NumOutputRegs + k) / 4; - outputvec[idx] = vec_haddx4(acc[k+0], acc[k+1], acc[k+2], acc[k+3], biasvec[idx]); - } - } - else - { - for (IndexType k = 0; k < NumOutputRegs; ++k) - { - const IndexType idx = (bigBlock * NumOutputRegs + k); - output[idx] = vec_hadd(acc[k], biases[idx]); - } - } - } - -# undef vec_zero -# undef vec_add_dpbusd_32x2 -# undef vec_hadd -# undef vec_haddx4 + static constexpr IndexType get_weight_index(IndexType i) { +#if defined(USE_SSSE3) + return get_weight_index_scrambled(i); #else - // Use old implementation for the other architectures. - affine_transform_non_ssse3< - InputDimensions, - PaddedInputDimensions, - OutputDimensions>(output, weights, biases, input); - -#endif - - return output; - } - - private: - using BiasType = OutputType; - using WeightType = std::int8_t; - - alignas(CacheLineSize) BiasType biases[OutputDimensions]; - alignas(CacheLineSize) WeightType weights[OutputDimensions * PaddedInputDimensions]; - }; - - template - class AffineTransform(InDims, MaxSimdWidth) < 2*64)>> { - public: - // Input/output type - // Input/output type - using InputType = std::uint8_t; - using OutputType = std::int32_t; - - // Number of input/output dimensions - static constexpr IndexType InputDimensions = InDims; - static constexpr IndexType OutputDimensions = OutDims; - - static constexpr IndexType PaddedInputDimensions = - ceil_to_multiple(InputDimensions, MaxSimdWidth); - static constexpr IndexType PaddedOutputDimensions = - ceil_to_multiple(OutputDimensions, MaxSimdWidth); - - using OutputBuffer = OutputType[PaddedOutputDimensions]; - - static_assert(PaddedInputDimensions < 128, "Something went wrong. This specialization should not have been chosen."); - -#if defined (USE_SSSE3) - static constexpr const IndexType OutputSimdWidth = SimdWidth / 4; - static constexpr const IndexType InputSimdWidth = SimdWidth; -#endif - - // Hash value embedded in the evaluation file - static constexpr std::uint32_t get_hash_value(std::uint32_t prevHash) { - std::uint32_t hashValue = 0xCC03DAE4u; - hashValue += OutputDimensions; - hashValue ^= prevHash >> 1; - hashValue ^= prevHash << 31; - return hashValue; - } - - static IndexType get_weight_index_scrambled(IndexType i) - { - return - (i / 4) % (PaddedInputDimensions / 4) * OutputDimensions * 4 + - i / PaddedInputDimensions * 4 + - i % 4; - } - - static IndexType get_weight_index(IndexType i) - { -#if defined (USE_SSSE3) - return get_weight_index_scrambled(i); -#else - return i; + return i; #endif } // Read network parameters bool read_parameters(std::istream& stream) { - for (std::size_t i = 0; i < OutputDimensions; ++i) - biases[i] = read_little_endian(stream); - for (std::size_t i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) - weights[get_weight_index(i)] = read_little_endian(stream); + read_little_endian(stream, biases, OutputDimensions); + for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) + weights[get_weight_index(i)] = read_little_endian(stream); - return !stream.fail(); + return !stream.fail(); } // Write network parameters bool write_parameters(std::ostream& stream) const { - for (std::size_t i = 0; i < OutputDimensions; ++i) - write_little_endian(stream, biases[i]); + write_little_endian(stream, biases, OutputDimensions); - for (std::size_t i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) - write_little_endian(stream, weights[get_weight_index(i)]); + for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) + write_little_endian(stream, weights[get_weight_index(i)]); - return !stream.fail(); + return !stream.fail(); } // Forward propagation - const OutputType* propagate( - const InputType* input, OutputType* output) const { + void propagate(const InputType* input, OutputType* output) const { -#if defined (USE_AVX2) - using vec_t = __m256i; - #define vec_setzero _mm256_setzero_si256 - #define vec_set_32 _mm256_set1_epi32 - #define vec_add_dpbusd_32 Simd::m256_add_dpbusd_epi32 - #define vec_add_dpbusd_32x2 Simd::m256_add_dpbusd_epi32x2 - #define vec_add_dpbusd_32x4 Simd::m256_add_dpbusd_epi32x4 - #define vec_hadd Simd::m256_hadd - #define vec_haddx4 Simd::m256_haddx4 -#elif defined (USE_SSSE3) - using vec_t = __m128i; - #define vec_setzero _mm_setzero_si128 - #define vec_set_32 _mm_set1_epi32 - #define vec_add_dpbusd_32 Simd::m128_add_dpbusd_epi32 - #define vec_add_dpbusd_32x2 Simd::m128_add_dpbusd_epi32x2 - #define vec_add_dpbusd_32x4 Simd::m128_add_dpbusd_epi32x4 - #define vec_hadd Simd::m128_hadd - #define vec_haddx4 Simd::m128_haddx4 -#endif +#if defined(USE_SSSE3) -#if defined (USE_SSSE3) - const auto inputVector = reinterpret_cast(input); - - static_assert(OutputDimensions % OutputSimdWidth == 0 || OutputDimensions == 1); - - if constexpr (OutputDimensions % OutputSimdWidth == 0) - { - constexpr IndexType NumChunks = ceil_to_multiple(InputDimensions, 8) / 4; - constexpr IndexType NumRegs = OutputDimensions / OutputSimdWidth; - - const auto input32 = reinterpret_cast(input); - const vec_t* biasvec = reinterpret_cast(biases); - vec_t acc[NumRegs]; - for (IndexType k = 0; k < NumRegs; ++k) - acc[k] = biasvec[k]; - - for (IndexType i = 0; i < NumChunks; i += 2) + if constexpr (OutputDimensions > 1) { - const vec_t in0 = vec_set_32(input32[i + 0]); - const vec_t in1 = vec_set_32(input32[i + 1]); - const auto col0 = reinterpret_cast(&weights[(i + 0) * OutputDimensions * 4]); - const auto col1 = reinterpret_cast(&weights[(i + 1) * OutputDimensions * 4]); - for (IndexType k = 0; k < NumRegs; ++k) - vec_add_dpbusd_32x2(acc[k], in0, col0[k], in1, col1[k]); + + #if defined(USE_AVX512) + using vec_t = __m512i; + #define vec_setzero _mm512_setzero_si512 + #define vec_set_32 _mm512_set1_epi32 + #define vec_add_dpbusd_32 Simd::m512_add_dpbusd_epi32 + #define vec_hadd Simd::m512_hadd + #elif defined(USE_AVX2) + using vec_t = __m256i; + #define vec_setzero _mm256_setzero_si256 + #define vec_set_32 _mm256_set1_epi32 + #define vec_add_dpbusd_32 Simd::m256_add_dpbusd_epi32 + #define vec_hadd Simd::m256_hadd + #elif defined(USE_SSSE3) + using vec_t = __m128i; + #define vec_setzero _mm_setzero_si128 + #define vec_set_32 _mm_set1_epi32 + #define vec_add_dpbusd_32 Simd::m128_add_dpbusd_epi32 + #define vec_hadd Simd::m128_hadd + #endif + + static constexpr IndexType OutputSimdWidth = sizeof(vec_t) / sizeof(OutputType); + + static_assert(OutputDimensions % OutputSimdWidth == 0); + + constexpr IndexType NumChunks = ceil_to_multiple(InputDimensions, 8) / 4; + constexpr IndexType NumRegs = OutputDimensions / OutputSimdWidth; + + const auto input32 = reinterpret_cast(input); + const vec_t* biasvec = reinterpret_cast(biases); + vec_t acc[NumRegs]; + for (IndexType k = 0; k < NumRegs; ++k) + acc[k] = biasvec[k]; + + for (IndexType i = 0; i < NumChunks; ++i) + { + const vec_t in0 = vec_set_32(input32[i]); + const auto col0 = + reinterpret_cast(&weights[i * OutputDimensions * 4]); + + for (IndexType k = 0; k < NumRegs; ++k) + vec_add_dpbusd_32(acc[k], in0, col0[k]); + } + + vec_t* outptr = reinterpret_cast(output); + for (IndexType k = 0; k < NumRegs; ++k) + outptr[k] = acc[k]; + + #undef vec_setzero + #undef vec_set_32 + #undef vec_add_dpbusd_32 + #undef vec_hadd } - - vec_t* outptr = reinterpret_cast(output); - for (IndexType k = 0; k < NumRegs; ++k) - outptr[k] = acc[k]; - } - else if constexpr (OutputDimensions == 1) - { - constexpr IndexType NumChunks = PaddedInputDimensions / SimdWidth; - vec_t sum0 = vec_setzero(); - const auto row0 = reinterpret_cast(&weights[0]); - - for (int j = 0; j < (int)NumChunks; ++j) + else if constexpr (OutputDimensions == 1) { - const vec_t in = inputVector[j]; - vec_add_dpbusd_32(sum0, in, row0[j]); - } - output[0] = vec_hadd(sum0, biases[0]); - } -# undef vec_setzero -# undef vec_set_32 -# undef vec_add_dpbusd_32 -# undef vec_add_dpbusd_32x2 -# undef vec_add_dpbusd_32x4 -# undef vec_hadd -# undef vec_haddx4 + // We cannot use AVX512 for the last layer because there are only 32 inputs + // and the buffer is not padded to 64 elements. + #if defined(USE_AVX2) + using vec_t = __m256i; + #define vec_setzero _mm256_setzero_si256 + #define vec_set_32 _mm256_set1_epi32 + #define vec_add_dpbusd_32 Simd::m256_add_dpbusd_epi32 + #define vec_hadd Simd::m256_hadd + #elif defined(USE_SSSE3) + using vec_t = __m128i; + #define vec_setzero _mm_setzero_si128 + #define vec_set_32 _mm_set1_epi32 + #define vec_add_dpbusd_32 Simd::m128_add_dpbusd_epi32 + #define vec_hadd Simd::m128_hadd + #endif + + const auto inputVector = reinterpret_cast(input); + + static constexpr IndexType InputSimdWidth = sizeof(vec_t) / sizeof(InputType); + + static_assert(PaddedInputDimensions % InputSimdWidth == 0); + + constexpr IndexType NumChunks = PaddedInputDimensions / InputSimdWidth; + vec_t sum0 = vec_setzero(); + const auto row0 = reinterpret_cast(&weights[0]); + + for (int j = 0; j < int(NumChunks); ++j) + { + const vec_t in = inputVector[j]; + vec_add_dpbusd_32(sum0, in, row0[j]); + } + output[0] = vec_hadd(sum0, biases[0]); + + #undef vec_setzero + #undef vec_set_32 + #undef vec_add_dpbusd_32 + #undef vec_hadd + } #else - // Use old implementation for the other architectures. - affine_transform_non_ssse3< - InputDimensions, - PaddedInputDimensions, - OutputDimensions>(output, weights, biases, input); + // Use old implementation for the other architectures. + affine_transform_non_ssse3( + output, weights, biases, input); #endif - - return output; } private: - using BiasType = OutputType; + using BiasType = OutputType; using WeightType = std::int8_t; alignas(CacheLineSize) BiasType biases[OutputDimensions]; alignas(CacheLineSize) WeightType weights[OutputDimensions * PaddedInputDimensions]; - }; +}; } // namespace Stockfish::Eval::NNUE::Layers -#endif // #ifndef NNUE_LAYERS_AFFINE_TRANSFORM_H_INCLUDED +#endif // #ifndef NNUE_LAYERS_AFFINE_TRANSFORM_H_INCLUDED diff --git a/src/nnue/layers/affine_transform_sparse_input.h b/src/nnue/layers/affine_transform_sparse_input.h new file mode 100644 index 00000000..0ac557ab --- /dev/null +++ b/src/nnue/layers/affine_transform_sparse_input.h @@ -0,0 +1,278 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +// Definition of layer AffineTransformSparseInput of NNUE evaluation function + +#ifndef NNUE_LAYERS_AFFINE_TRANSFORM_SPARSE_INPUT_H_INCLUDED +#define NNUE_LAYERS_AFFINE_TRANSFORM_SPARSE_INPUT_H_INCLUDED + +#include +#include +#include +#include + +#include "../../bitboard.h" +#include "../nnue_common.h" +#include "affine_transform.h" +#include "simd.h" + +/* + This file contains the definition for a fully connected layer (aka affine transform) with block sparse input. +*/ + +namespace Stockfish::Eval::NNUE::Layers { + +#if (USE_SSSE3 | (USE_NEON >= 8)) +alignas(CacheLineSize) static inline const + std::array, 256> lookup_indices = []() { + std::array, 256> v{}; + for (unsigned i = 0; i < 256; ++i) + { + std::uint64_t j = i, k = 0; + while (j) + v[i][k++] = pop_lsb(j); + } + return v; + }(); + +// Find indices of nonzero numbers in an int32_t array +template +void find_nnz(const std::int32_t* input, std::uint16_t* out, IndexType& count_out) { + #if defined(USE_SSSE3) + #if defined(USE_AVX512) + using vec_t = __m512i; + #define vec_nnz(a) _mm512_cmpgt_epi32_mask(a, _mm512_setzero_si512()) + #elif defined(USE_AVX2) + using vec_t = __m256i; + #if defined(USE_VNNI) && !defined(USE_AVXVNNI) + #define vec_nnz(a) _mm256_cmpgt_epi32_mask(a, _mm256_setzero_si256()) + #else + #define vec_nnz(a) \ + _mm256_movemask_ps( \ + _mm256_castsi256_ps(_mm256_cmpgt_epi32(a, _mm256_setzero_si256()))) + #endif + #elif defined(USE_SSSE3) + using vec_t = __m128i; + #define vec_nnz(a) \ + _mm_movemask_ps(_mm_castsi128_ps(_mm_cmpgt_epi32(a, _mm_setzero_si128()))) + #endif + using vec128_t = __m128i; + #define vec128_zero _mm_setzero_si128() + #define vec128_set_16(a) _mm_set1_epi16(a) + #define vec128_load(a) _mm_load_si128(a) + #define vec128_storeu(a, b) _mm_storeu_si128(a, b) + #define vec128_add(a, b) _mm_add_epi16(a, b) + #elif defined(USE_NEON) + using vec_t = uint32x4_t; + static const std::uint32_t Mask[4] = {1, 2, 4, 8}; + #define vec_nnz(a) vaddvq_u32(vandq_u32(vtstq_u32(a, a), vld1q_u32(Mask))) + using vec128_t = uint16x8_t; + #define vec128_zero vdupq_n_u16(0) + #define vec128_set_16(a) vdupq_n_u16(a) + #define vec128_load(a) vld1q_u16(reinterpret_cast(a)) + #define vec128_storeu(a, b) vst1q_u16(reinterpret_cast(a), b) + #define vec128_add(a, b) vaddq_u16(a, b) + #endif + constexpr IndexType InputSimdWidth = sizeof(vec_t) / sizeof(std::int32_t); + // Inputs are processed InputSimdWidth at a time and outputs are processed 8 at a time so we process in chunks of max(InputSimdWidth, 8) + constexpr IndexType ChunkSize = std::max(InputSimdWidth, 8); + constexpr IndexType NumChunks = InputDimensions / ChunkSize; + constexpr IndexType InputsPerChunk = ChunkSize / InputSimdWidth; + constexpr IndexType OutputsPerChunk = ChunkSize / 8; + + const auto inputVector = reinterpret_cast(input); + IndexType count = 0; + vec128_t base = vec128_zero; + const vec128_t increment = vec128_set_16(8); + for (IndexType i = 0; i < NumChunks; ++i) + { + // bitmask of nonzero values in this chunk + unsigned nnz = 0; + for (IndexType j = 0; j < InputsPerChunk; ++j) + { + const vec_t inputChunk = inputVector[i * InputsPerChunk + j]; + nnz |= unsigned(vec_nnz(inputChunk)) << (j * InputSimdWidth); + } + for (IndexType j = 0; j < OutputsPerChunk; ++j) + { + const auto lookup = (nnz >> (j * 8)) & 0xFF; + const auto offsets = + vec128_load(reinterpret_cast(&lookup_indices[lookup])); + vec128_storeu(reinterpret_cast(out + count), vec128_add(base, offsets)); + count += popcount(lookup); + base = vec128_add(base, increment); + } + } + count_out = count; +} + #undef vec_nnz + #undef vec128_zero + #undef vec128_set_16 + #undef vec128_load + #undef vec128_storeu + #undef vec128_add +#endif + +// Sparse input implementation +template +class AffineTransformSparseInput { + public: + // Input/output type + using InputType = std::uint8_t; + using OutputType = std::int32_t; + + // Number of input/output dimensions + static constexpr IndexType InputDimensions = InDims; + static constexpr IndexType OutputDimensions = OutDims; + + static_assert(OutputDimensions % 16 == 0, + "Only implemented for OutputDimensions divisible by 16."); + + static constexpr IndexType PaddedInputDimensions = + ceil_to_multiple(InputDimensions, MaxSimdWidth); + static constexpr IndexType PaddedOutputDimensions = + ceil_to_multiple(OutputDimensions, MaxSimdWidth); + +#if (USE_SSSE3 | (USE_NEON >= 8)) + static constexpr IndexType ChunkSize = 4; +#else + static constexpr IndexType ChunkSize = 1; +#endif + + using OutputBuffer = OutputType[PaddedOutputDimensions]; + + // Hash value embedded in the evaluation file + static constexpr std::uint32_t get_hash_value(std::uint32_t prevHash) { + std::uint32_t hashValue = 0xCC03DAE4u; + hashValue += OutputDimensions; + hashValue ^= prevHash >> 1; + hashValue ^= prevHash << 31; + return hashValue; + } + + static constexpr IndexType get_weight_index_scrambled(IndexType i) { + return (i / ChunkSize) % (PaddedInputDimensions / ChunkSize) * OutputDimensions * ChunkSize + + i / PaddedInputDimensions * ChunkSize + i % ChunkSize; + } + + static constexpr IndexType get_weight_index(IndexType i) { +#if (USE_SSSE3 | (USE_NEON >= 8)) + return get_weight_index_scrambled(i); +#else + return i; +#endif + } + + // Read network parameters + bool read_parameters(std::istream& stream) { + read_little_endian(stream, biases, OutputDimensions); + for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) + weights[get_weight_index(i)] = read_little_endian(stream); + + return !stream.fail(); + } + + // Write network parameters + bool write_parameters(std::ostream& stream) const { + write_little_endian(stream, biases, OutputDimensions); + + for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) + write_little_endian(stream, weights[get_weight_index(i)]); + + return !stream.fail(); + } + // Forward propagation + void propagate(const InputType* input, OutputType* output) const { + +#if (USE_SSSE3 | (USE_NEON >= 8)) + #if defined(USE_AVX512) + using invec_t = __m512i; + using outvec_t = __m512i; + #define vec_set_32 _mm512_set1_epi32 + #define vec_add_dpbusd_32 Simd::m512_add_dpbusd_epi32 + #elif defined(USE_AVX2) + using invec_t = __m256i; + using outvec_t = __m256i; + #define vec_set_32 _mm256_set1_epi32 + #define vec_add_dpbusd_32 Simd::m256_add_dpbusd_epi32 + #elif defined(USE_SSSE3) + using invec_t = __m128i; + using outvec_t = __m128i; + #define vec_set_32 _mm_set1_epi32 + #define vec_add_dpbusd_32 Simd::m128_add_dpbusd_epi32 + #elif defined(USE_NEON_DOTPROD) + using invec_t = int8x16_t; + using outvec_t = int32x4_t; + #define vec_set_32(a) vreinterpretq_s8_u32(vdupq_n_u32(a)) + #define vec_add_dpbusd_32 Simd::dotprod_m128_add_dpbusd_epi32 + #elif defined(USE_NEON) + using invec_t = int8x16_t; + using outvec_t = int32x4_t; + #define vec_set_32(a) vreinterpretq_s8_u32(vdupq_n_u32(a)) + #define vec_add_dpbusd_32 Simd::neon_m128_add_dpbusd_epi32 + #endif + static constexpr IndexType OutputSimdWidth = sizeof(outvec_t) / sizeof(OutputType); + + constexpr IndexType NumChunks = ceil_to_multiple(InputDimensions, 8) / ChunkSize; + constexpr IndexType NumRegs = OutputDimensions / OutputSimdWidth; + std::uint16_t nnz[NumChunks]; + IndexType count; + + const auto input32 = reinterpret_cast(input); + + // Find indices of nonzero 32-bit blocks + find_nnz(input32, nnz, count); + + const outvec_t* biasvec = reinterpret_cast(biases); + outvec_t acc[NumRegs]; + for (IndexType k = 0; k < NumRegs; ++k) + acc[k] = biasvec[k]; + + for (IndexType j = 0; j < count; ++j) + { + const auto i = nnz[j]; + const invec_t in = vec_set_32(input32[i]); + const auto col = + reinterpret_cast(&weights[i * OutputDimensions * ChunkSize]); + for (IndexType k = 0; k < NumRegs; ++k) + vec_add_dpbusd_32(acc[k], in, col[k]); + } + + outvec_t* outptr = reinterpret_cast(output); + for (IndexType k = 0; k < NumRegs; ++k) + outptr[k] = acc[k]; + #undef vec_set_32 + #undef vec_add_dpbusd_32 +#else + // Use dense implementation for the other architectures. + affine_transform_non_ssse3( + output, weights, biases, input); +#endif + } + + private: + using BiasType = OutputType; + using WeightType = std::int8_t; + + alignas(CacheLineSize) BiasType biases[OutputDimensions]; + alignas(CacheLineSize) WeightType weights[OutputDimensions * PaddedInputDimensions]; +}; + +} // namespace Stockfish::Eval::NNUE::Layers + +#endif // #ifndef NNUE_LAYERS_AFFINE_TRANSFORM_SPARSE_INPUT_H_INCLUDED diff --git a/src/nnue/layers/clipped_relu.h b/src/nnue/layers/clipped_relu.h index ffd2e3b7..813234c5 100644 --- a/src/nnue/layers/clipped_relu.h +++ b/src/nnue/layers/clipped_relu.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -21,168 +21,148 @@ #ifndef NNUE_LAYERS_CLIPPED_RELU_H_INCLUDED #define NNUE_LAYERS_CLIPPED_RELU_H_INCLUDED +#include +#include +#include + #include "../nnue_common.h" namespace Stockfish::Eval::NNUE::Layers { - // Clipped ReLU - template - class ClippedReLU { +// Clipped ReLU +template +class ClippedReLU { public: // Input/output type - using InputType = std::int32_t; + using InputType = std::int32_t; using OutputType = std::uint8_t; // Number of input/output dimensions - static constexpr IndexType InputDimensions = InDims; + static constexpr IndexType InputDimensions = InDims; static constexpr IndexType OutputDimensions = InputDimensions; static constexpr IndexType PaddedOutputDimensions = - ceil_to_multiple(OutputDimensions, 32); + ceil_to_multiple(OutputDimensions, 32); using OutputBuffer = OutputType[PaddedOutputDimensions]; // Hash value embedded in the evaluation file static constexpr std::uint32_t get_hash_value(std::uint32_t prevHash) { - std::uint32_t hashValue = 0x538D24C7u; - hashValue += prevHash; - return hashValue; + std::uint32_t hashValue = 0x538D24C7u; + hashValue += prevHash; + return hashValue; } // Read network parameters - bool read_parameters(std::istream&) { - return true; - } + bool read_parameters(std::istream&) { return true; } // Write network parameters - bool write_parameters(std::ostream&) const { - return true; - } + bool write_parameters(std::ostream&) const { return true; } // Forward propagation - const OutputType* propagate( - const InputType* input, OutputType* output) const { + void propagate(const InputType* input, OutputType* output) const { - #if defined(USE_AVX2) - if constexpr (InputDimensions % SimdWidth == 0) { +#if defined(USE_AVX2) + if constexpr (InputDimensions % SimdWidth == 0) + { + constexpr IndexType NumChunks = InputDimensions / SimdWidth; + const __m256i Zero = _mm256_setzero_si256(); + const __m256i Offsets = _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0); + const auto in = reinterpret_cast(input); + const auto out = reinterpret_cast<__m256i*>(output); + for (IndexType i = 0; i < NumChunks; ++i) + { + const __m256i words0 = + _mm256_srai_epi16(_mm256_packs_epi32(_mm256_load_si256(&in[i * 4 + 0]), + _mm256_load_si256(&in[i * 4 + 1])), + WeightScaleBits); + const __m256i words1 = + _mm256_srai_epi16(_mm256_packs_epi32(_mm256_load_si256(&in[i * 4 + 2]), + _mm256_load_si256(&in[i * 4 + 3])), + WeightScaleBits); + _mm256_store_si256( + &out[i], _mm256_permutevar8x32_epi32( + _mm256_max_epi8(_mm256_packs_epi16(words0, words1), Zero), Offsets)); + } + } + else + { + constexpr IndexType NumChunks = InputDimensions / (SimdWidth / 2); + const __m128i Zero = _mm_setzero_si128(); + const auto in = reinterpret_cast(input); + const auto out = reinterpret_cast<__m128i*>(output); + for (IndexType i = 0; i < NumChunks; ++i) + { + const __m128i words0 = _mm_srai_epi16( + _mm_packs_epi32(_mm_load_si128(&in[i * 4 + 0]), _mm_load_si128(&in[i * 4 + 1])), + WeightScaleBits); + const __m128i words1 = _mm_srai_epi16( + _mm_packs_epi32(_mm_load_si128(&in[i * 4 + 2]), _mm_load_si128(&in[i * 4 + 3])), + WeightScaleBits); + const __m128i packedbytes = _mm_packs_epi16(words0, words1); + _mm_store_si128(&out[i], _mm_max_epi8(packedbytes, Zero)); + } + } + constexpr IndexType Start = InputDimensions % SimdWidth == 0 + ? InputDimensions / SimdWidth * SimdWidth + : InputDimensions / (SimdWidth / 2) * (SimdWidth / 2); + +#elif defined(USE_SSE2) constexpr IndexType NumChunks = InputDimensions / SimdWidth; - const __m256i Zero = _mm256_setzero_si256(); - const __m256i Offsets = _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0); - const auto in = reinterpret_cast(input); - const auto out = reinterpret_cast<__m256i*>(output); - for (IndexType i = 0; i < NumChunks; ++i) { - const __m256i words0 = _mm256_srai_epi16(_mm256_packs_epi32( - _mm256_load_si256(&in[i * 4 + 0]), - _mm256_load_si256(&in[i * 4 + 1])), WeightScaleBits); - const __m256i words1 = _mm256_srai_epi16(_mm256_packs_epi32( - _mm256_load_si256(&in[i * 4 + 2]), - _mm256_load_si256(&in[i * 4 + 3])), WeightScaleBits); - _mm256_store_si256(&out[i], _mm256_permutevar8x32_epi32(_mm256_max_epi8( - _mm256_packs_epi16(words0, words1), Zero), Offsets)); - } - } else { - constexpr IndexType NumChunks = InputDimensions / (SimdWidth / 2); + + #ifdef USE_SSE41 const __m128i Zero = _mm_setzero_si128(); - const auto in = reinterpret_cast(input); + #else + const __m128i k0x80s = _mm_set1_epi8(-128); + #endif + + const auto in = reinterpret_cast(input); const auto out = reinterpret_cast<__m128i*>(output); - for (IndexType i = 0; i < NumChunks; ++i) { - const __m128i words0 = _mm_srai_epi16(_mm_packs_epi32( - _mm_load_si128(&in[i * 4 + 0]), - _mm_load_si128(&in[i * 4 + 1])), WeightScaleBits); - const __m128i words1 = _mm_srai_epi16(_mm_packs_epi32( - _mm_load_si128(&in[i * 4 + 2]), - _mm_load_si128(&in[i * 4 + 3])), WeightScaleBits); - const __m128i packedbytes = _mm_packs_epi16(words0, words1); - _mm_store_si128(&out[i], _mm_max_epi8(packedbytes, Zero)); + for (IndexType i = 0; i < NumChunks; ++i) + { + const __m128i words0 = _mm_srai_epi16( + _mm_packs_epi32(_mm_load_si128(&in[i * 4 + 0]), _mm_load_si128(&in[i * 4 + 1])), + WeightScaleBits); + const __m128i words1 = _mm_srai_epi16( + _mm_packs_epi32(_mm_load_si128(&in[i * 4 + 2]), _mm_load_si128(&in[i * 4 + 3])), + WeightScaleBits); + const __m128i packedbytes = _mm_packs_epi16(words0, words1); + _mm_store_si128(&out[i], + + #ifdef USE_SSE41 + _mm_max_epi8(packedbytes, Zero) + #else + _mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s) + #endif + + ); } - } - constexpr IndexType Start = - InputDimensions % SimdWidth == 0 - ? InputDimensions / SimdWidth * SimdWidth - : InputDimensions / (SimdWidth / 2) * (SimdWidth / 2); + constexpr IndexType Start = NumChunks * SimdWidth; - #elif defined(USE_SSE2) - constexpr IndexType NumChunks = InputDimensions / SimdWidth; +#elif defined(USE_NEON) + constexpr IndexType NumChunks = InputDimensions / (SimdWidth / 2); + const int8x8_t Zero = {0}; + const auto in = reinterpret_cast(input); + const auto out = reinterpret_cast(output); + for (IndexType i = 0; i < NumChunks; ++i) + { + int16x8_t shifted; + const auto pack = reinterpret_cast(&shifted); + pack[0] = vqshrn_n_s32(in[i * 2 + 0], WeightScaleBits); + pack[1] = vqshrn_n_s32(in[i * 2 + 1], WeightScaleBits); + out[i] = vmax_s8(vqmovn_s16(shifted), Zero); + } + constexpr IndexType Start = NumChunks * (SimdWidth / 2); +#else + constexpr IndexType Start = 0; +#endif - #ifdef USE_SSE41 - const __m128i Zero = _mm_setzero_si128(); - #else - const __m128i k0x80s = _mm_set1_epi8(-128); - #endif - - const auto in = reinterpret_cast(input); - const auto out = reinterpret_cast<__m128i*>(output); - for (IndexType i = 0; i < NumChunks; ++i) { - const __m128i words0 = _mm_srai_epi16(_mm_packs_epi32( - _mm_load_si128(&in[i * 4 + 0]), - _mm_load_si128(&in[i * 4 + 1])), WeightScaleBits); - const __m128i words1 = _mm_srai_epi16(_mm_packs_epi32( - _mm_load_si128(&in[i * 4 + 2]), - _mm_load_si128(&in[i * 4 + 3])), WeightScaleBits); - const __m128i packedbytes = _mm_packs_epi16(words0, words1); - _mm_store_si128(&out[i], - - #ifdef USE_SSE41 - _mm_max_epi8(packedbytes, Zero) - #else - _mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s) - #endif - - ); - } - constexpr IndexType Start = NumChunks * SimdWidth; - - #elif defined(USE_MMX) - constexpr IndexType NumChunks = InputDimensions / SimdWidth; - const __m64 k0x80s = _mm_set1_pi8(-128); - const auto in = reinterpret_cast(input); - const auto out = reinterpret_cast<__m64*>(output); - for (IndexType i = 0; i < NumChunks; ++i) { - const __m64 words0 = _mm_srai_pi16( - _mm_packs_pi32(in[i * 4 + 0], in[i * 4 + 1]), - WeightScaleBits); - const __m64 words1 = _mm_srai_pi16( - _mm_packs_pi32(in[i * 4 + 2], in[i * 4 + 3]), - WeightScaleBits); - const __m64 packedbytes = _mm_packs_pi16(words0, words1); - out[i] = _mm_subs_pi8(_mm_adds_pi8(packedbytes, k0x80s), k0x80s); - } - _mm_empty(); - constexpr IndexType Start = NumChunks * SimdWidth; - - #elif defined(USE_NEON) - constexpr IndexType NumChunks = InputDimensions / (SimdWidth / 2); - const int8x8_t Zero = {0}; - const auto in = reinterpret_cast(input); - const auto out = reinterpret_cast(output); - for (IndexType i = 0; i < NumChunks; ++i) { - int16x8_t shifted; - const auto pack = reinterpret_cast(&shifted); - pack[0] = vqshrn_n_s32(in[i * 2 + 0], WeightScaleBits); - pack[1] = vqshrn_n_s32(in[i * 2 + 1], WeightScaleBits); - out[i] = vmax_s8(vqmovn_s16(shifted), Zero); - } - constexpr IndexType Start = NumChunks * (SimdWidth / 2); - #else - constexpr IndexType Start = 0; - #endif - - for (IndexType i = Start; i < InputDimensions; ++i) { - output[i] = static_cast( - std::max(0, std::min(127, input[i] >> WeightScaleBits))); - } - - // Affine transform layers expect that there is at least - // ceil_to_multiple(OutputDimensions, 32) initialized values. - // We cannot do this in the affine transform because it requires - // preallocating space here. - for (IndexType i = OutputDimensions; i < PaddedOutputDimensions; ++i) { - output[i] = 0; - } - - return output; + for (IndexType i = Start; i < InputDimensions; ++i) + { + output[i] = static_cast(std::clamp(input[i] >> WeightScaleBits, 0, 127)); + } } - }; +}; } // namespace Stockfish::Eval::NNUE::Layers -#endif // NNUE_LAYERS_CLIPPED_RELU_H_INCLUDED +#endif // NNUE_LAYERS_CLIPPED_RELU_H_INCLUDED diff --git a/src/nnue/layers/simd.h b/src/nnue/layers/simd.h new file mode 100644 index 00000000..cec41474 --- /dev/null +++ b/src/nnue/layers/simd.h @@ -0,0 +1,167 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifndef STOCKFISH_SIMD_H_INCLUDED +#define STOCKFISH_SIMD_H_INCLUDED + +#if defined(USE_AVX2) + #include + +#elif defined(USE_SSE41) + #include + +#elif defined(USE_SSSE3) + #include + +#elif defined(USE_SSE2) + #include + +#elif defined(USE_NEON) + #include +#endif + +namespace Stockfish::Simd { + +#if defined(USE_AVX512) + +[[maybe_unused]] static int m512_hadd(__m512i sum, int bias) { + return _mm512_reduce_add_epi32(sum) + bias; +} + +/* + Parameters: + sum0 = [zmm0.i128[0], zmm0.i128[1], zmm0.i128[2], zmm0.i128[3]] + sum1 = [zmm1.i128[0], zmm1.i128[1], zmm1.i128[2], zmm1.i128[3]] + sum2 = [zmm2.i128[0], zmm2.i128[1], zmm2.i128[2], zmm2.i128[3]] + sum3 = [zmm3.i128[0], zmm3.i128[1], zmm3.i128[2], zmm3.i128[3]] + + Returns: + ret = [ + reduce_add_epi32(zmm0.i128[0]), reduce_add_epi32(zmm1.i128[0]), reduce_add_epi32(zmm2.i128[0]), reduce_add_epi32(zmm3.i128[0]), + reduce_add_epi32(zmm0.i128[1]), reduce_add_epi32(zmm1.i128[1]), reduce_add_epi32(zmm2.i128[1]), reduce_add_epi32(zmm3.i128[1]), + reduce_add_epi32(zmm0.i128[2]), reduce_add_epi32(zmm1.i128[2]), reduce_add_epi32(zmm2.i128[2]), reduce_add_epi32(zmm3.i128[2]), + reduce_add_epi32(zmm0.i128[3]), reduce_add_epi32(zmm1.i128[3]), reduce_add_epi32(zmm2.i128[3]), reduce_add_epi32(zmm3.i128[3]) + ] + */ +[[maybe_unused]] static __m512i +m512_hadd128x16_interleave(__m512i sum0, __m512i sum1, __m512i sum2, __m512i sum3) { + + __m512i sum01a = _mm512_unpacklo_epi32(sum0, sum1); + __m512i sum01b = _mm512_unpackhi_epi32(sum0, sum1); + + __m512i sum23a = _mm512_unpacklo_epi32(sum2, sum3); + __m512i sum23b = _mm512_unpackhi_epi32(sum2, sum3); + + __m512i sum01 = _mm512_add_epi32(sum01a, sum01b); + __m512i sum23 = _mm512_add_epi32(sum23a, sum23b); + + __m512i sum0123a = _mm512_unpacklo_epi64(sum01, sum23); + __m512i sum0123b = _mm512_unpackhi_epi64(sum01, sum23); + + return _mm512_add_epi32(sum0123a, sum0123b); +} + +[[maybe_unused]] static void m512_add_dpbusd_epi32(__m512i& acc, __m512i a, __m512i b) { + + #if defined(USE_VNNI) + acc = _mm512_dpbusd_epi32(acc, a, b); + #else + __m512i product0 = _mm512_maddubs_epi16(a, b); + product0 = _mm512_madd_epi16(product0, _mm512_set1_epi16(1)); + acc = _mm512_add_epi32(acc, product0); + #endif +} + +#endif + +#if defined(USE_AVX2) + +[[maybe_unused]] static int m256_hadd(__m256i sum, int bias) { + __m128i sum128 = _mm_add_epi32(_mm256_castsi256_si128(sum), _mm256_extracti128_si256(sum, 1)); + sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_BADC)); + sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_CDAB)); + return _mm_cvtsi128_si32(sum128) + bias; +} + +[[maybe_unused]] static void m256_add_dpbusd_epi32(__m256i& acc, __m256i a, __m256i b) { + + #if defined(USE_VNNI) + acc = _mm256_dpbusd_epi32(acc, a, b); + #else + __m256i product0 = _mm256_maddubs_epi16(a, b); + product0 = _mm256_madd_epi16(product0, _mm256_set1_epi16(1)); + acc = _mm256_add_epi32(acc, product0); + #endif +} + +#endif + +#if defined(USE_SSSE3) + +[[maybe_unused]] static int m128_hadd(__m128i sum, int bias) { + sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0x4E)); //_MM_PERM_BADC + sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0xB1)); //_MM_PERM_CDAB + return _mm_cvtsi128_si32(sum) + bias; +} + +[[maybe_unused]] static void m128_add_dpbusd_epi32(__m128i& acc, __m128i a, __m128i b) { + + __m128i product0 = _mm_maddubs_epi16(a, b); + product0 = _mm_madd_epi16(product0, _mm_set1_epi16(1)); + acc = _mm_add_epi32(acc, product0); +} + +#endif + +#if defined(USE_NEON_DOTPROD) + +[[maybe_unused]] static void +dotprod_m128_add_dpbusd_epi32(int32x4_t& acc, int8x16_t a, int8x16_t b) { + + acc = vdotq_s32(acc, a, b); +} +#endif + +#if defined(USE_NEON) + +[[maybe_unused]] static int neon_m128_reduce_add_epi32(int32x4_t s) { + #if USE_NEON >= 8 + return vaddvq_s32(s); + #else + return s[0] + s[1] + s[2] + s[3]; + #endif +} + +[[maybe_unused]] static int neon_m128_hadd(int32x4_t sum, int bias) { + return neon_m128_reduce_add_epi32(sum) + bias; +} + +#endif + +#if USE_NEON >= 8 +[[maybe_unused]] static void neon_m128_add_dpbusd_epi32(int32x4_t& acc, int8x16_t a, int8x16_t b) { + + int16x8_t product0 = vmull_s8(vget_low_s8(a), vget_low_s8(b)); + int16x8_t product1 = vmull_high_s8(a, b); + int16x8_t sum = vpaddq_s16(product0, product1); + acc = vpadalq_s16(acc, sum); +} +#endif +} + +#endif // STOCKFISH_SIMD_H_INCLUDED diff --git a/src/nnue/layers/sqr_clipped_relu.h b/src/nnue/layers/sqr_clipped_relu.h new file mode 100644 index 00000000..9c20df9d --- /dev/null +++ b/src/nnue/layers/sqr_clipped_relu.h @@ -0,0 +1,103 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +// Definition of layer ClippedReLU of NNUE evaluation function + +#ifndef NNUE_LAYERS_SQR_CLIPPED_RELU_H_INCLUDED +#define NNUE_LAYERS_SQR_CLIPPED_RELU_H_INCLUDED + +#include +#include +#include + +#include "../nnue_common.h" + +namespace Stockfish::Eval::NNUE::Layers { + +// Clipped ReLU +template +class SqrClippedReLU { + public: + // Input/output type + using InputType = std::int32_t; + using OutputType = std::uint8_t; + + // Number of input/output dimensions + static constexpr IndexType InputDimensions = InDims; + static constexpr IndexType OutputDimensions = InputDimensions; + static constexpr IndexType PaddedOutputDimensions = + ceil_to_multiple(OutputDimensions, 32); + + using OutputBuffer = OutputType[PaddedOutputDimensions]; + + // Hash value embedded in the evaluation file + static constexpr std::uint32_t get_hash_value(std::uint32_t prevHash) { + std::uint32_t hashValue = 0x538D24C7u; + hashValue += prevHash; + return hashValue; + } + + // Read network parameters + bool read_parameters(std::istream&) { return true; } + + // Write network parameters + bool write_parameters(std::ostream&) const { return true; } + + // Forward propagation + void propagate(const InputType* input, OutputType* output) const { + +#if defined(USE_SSE2) + constexpr IndexType NumChunks = InputDimensions / 16; + + static_assert(WeightScaleBits == 6); + const auto in = reinterpret_cast(input); + const auto out = reinterpret_cast<__m128i*>(output); + for (IndexType i = 0; i < NumChunks; ++i) + { + __m128i words0 = + _mm_packs_epi32(_mm_load_si128(&in[i * 4 + 0]), _mm_load_si128(&in[i * 4 + 1])); + __m128i words1 = + _mm_packs_epi32(_mm_load_si128(&in[i * 4 + 2]), _mm_load_si128(&in[i * 4 + 3])); + + // We shift by WeightScaleBits * 2 = 12 and divide by 128 + // which is an additional shift-right of 7, meaning 19 in total. + // MulHi strips the lower 16 bits so we need to shift out 3 more to match. + words0 = _mm_srli_epi16(_mm_mulhi_epi16(words0, words0), 3); + words1 = _mm_srli_epi16(_mm_mulhi_epi16(words1, words1), 3); + + _mm_store_si128(&out[i], _mm_packs_epi16(words0, words1)); + } + constexpr IndexType Start = NumChunks * 16; + +#else + constexpr IndexType Start = 0; +#endif + + for (IndexType i = Start; i < InputDimensions; ++i) + { + output[i] = static_cast( + // Really should be /127 but we need to make it fast so we right-shift + // by an extra 7 bits instead. Needs to be accounted for in the trainer. + std::min(127ll, ((long long) (input[i]) * input[i]) >> (2 * WeightScaleBits + 7))); + } + } +}; + +} // namespace Stockfish::Eval::NNUE::Layers + +#endif // NNUE_LAYERS_SQR_CLIPPED_RELU_H_INCLUDED diff --git a/src/nnue/network.cpp b/src/nnue/network.cpp new file mode 100644 index 00000000..de2c7eca --- /dev/null +++ b/src/nnue/network.cpp @@ -0,0 +1,448 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include "network.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../evaluate.h" +#include "../incbin/incbin.h" +#include "../misc.h" +#include "../position.h" +#include "../types.h" +#include "nnue_architecture.h" +#include "nnue_common.h" +#include "nnue_misc.h" + +namespace { +// Macro to embed the default efficiently updatable neural network (NNUE) file +// data in the engine binary (using incbin.h, by Dale Weiler). +// This macro invocation will declare the following three variables +// const unsigned char gEmbeddedNNUEData[]; // a pointer to the embedded data +// const unsigned char *const gEmbeddedNNUEEnd; // a marker to the end +// const unsigned int gEmbeddedNNUESize; // the size of the embedded file +// Note that this does not work in Microsoft Visual Studio. +#if !defined(_MSC_VER) && !defined(NNUE_EMBEDDING_OFF) +INCBIN(EmbeddedNNUEBig, EvalFileDefaultNameBig); +INCBIN(EmbeddedNNUESmall, EvalFileDefaultNameSmall); +#else +const unsigned char gEmbeddedNNUEBigData[1] = {0x0}; +const unsigned char* const gEmbeddedNNUEBigEnd = &gEmbeddedNNUEBigData[1]; +const unsigned int gEmbeddedNNUEBigSize = 1; +const unsigned char gEmbeddedNNUESmallData[1] = {0x0}; +const unsigned char* const gEmbeddedNNUESmallEnd = &gEmbeddedNNUESmallData[1]; +const unsigned int gEmbeddedNNUESmallSize = 1; +#endif + +struct EmbeddedNNUE { + EmbeddedNNUE(const unsigned char* embeddedData, + const unsigned char* embeddedEnd, + const unsigned int embeddedSize) : + data(embeddedData), + end(embeddedEnd), + size(embeddedSize) {} + const unsigned char* data; + const unsigned char* end; + const unsigned int size; +}; + +using namespace Stockfish::Eval::NNUE; + +EmbeddedNNUE get_embedded(EmbeddedNNUEType type) { + if (type == EmbeddedNNUEType::BIG) + return EmbeddedNNUE(gEmbeddedNNUEBigData, gEmbeddedNNUEBigEnd, gEmbeddedNNUEBigSize); + else + return EmbeddedNNUE(gEmbeddedNNUESmallData, gEmbeddedNNUESmallEnd, gEmbeddedNNUESmallSize); +} + +} + + +namespace Stockfish::Eval::NNUE { + + +namespace Detail { + +// Initialize the evaluation function parameters +template +void initialize(AlignedPtr& pointer) { + + pointer.reset(reinterpret_cast(std_aligned_alloc(alignof(T), sizeof(T)))); + std::memset(pointer.get(), 0, sizeof(T)); +} + +template +void initialize(LargePagePtr& pointer) { + + static_assert(alignof(T) <= 4096, + "aligned_large_pages_alloc() may fail for such a big alignment requirement of T"); + pointer.reset(reinterpret_cast(aligned_large_pages_alloc(sizeof(T)))); + std::memset(pointer.get(), 0, sizeof(T)); +} + +// Read evaluation function parameters +template +bool read_parameters(std::istream& stream, T& reference) { + + std::uint32_t header; + header = read_little_endian(stream); + if (!stream || header != T::get_hash_value()) + return false; + return reference.read_parameters(stream); +} + +// Write evaluation function parameters +template +bool write_parameters(std::ostream& stream, const T& reference) { + + write_little_endian(stream, T::get_hash_value()); + return reference.write_parameters(stream); +} + +} // namespace Detail + + +template +void Network::load(const std::string& rootDirectory, std::string evalfilePath) { +#if defined(DEFAULT_NNUE_DIRECTORY) + std::vector dirs = {"", "", rootDirectory, + stringify(DEFAULT_NNUE_DIRECTORY)}; +#else + std::vector dirs = {"", "", rootDirectory}; +#endif + + if (evalfilePath.empty()) + evalfilePath = evalFile.defaultName; + + for (const auto& directory : dirs) + { + if (evalFile.current != evalfilePath) + { + if (directory != "") + { + load_user_net(directory, evalfilePath); + } + + if (directory == "" && evalfilePath == evalFile.defaultName) + { + load_internal(); + } + } + } +} + + +template +bool Network::save(const std::optional& filename) const { + std::string actualFilename; + std::string msg; + + if (filename.has_value()) + actualFilename = filename.value(); + else + { + if (evalFile.current != evalFile.defaultName) + { + msg = "Failed to export a net. " + "A non-embedded net can only be saved if the filename is specified"; + + sync_cout << msg << sync_endl; + return false; + } + + actualFilename = evalFile.defaultName; + } + + std::ofstream stream(actualFilename, std::ios_base::binary); + bool saved = save(stream, evalFile.current, evalFile.netDescription); + + msg = saved ? "Network saved successfully to " + actualFilename : "Failed to export a net"; + + sync_cout << msg << sync_endl; + return saved; +} + + +template +Value Network::evaluate(const Position& pos, + AccumulatorCaches::Cache* cache, + bool adjusted, + int* complexity) const { + // We manually align the arrays on the stack because with gcc < 9.3 + // overaligning stack variables with alignas() doesn't work correctly. + + constexpr uint64_t alignment = CacheLineSize; + constexpr int delta = 24; + +#if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN) + TransformedFeatureType + transformedFeaturesUnaligned[FeatureTransformer::BufferSize + + alignment / sizeof(TransformedFeatureType)]; + + auto* transformedFeatures = align_ptr_up(&transformedFeaturesUnaligned[0]); +#else + alignas(alignment) TransformedFeatureType + transformedFeatures[FeatureTransformer::BufferSize]; +#endif + + ASSERT_ALIGNED(transformedFeatures, alignment); + + const int bucket = (pos.count() - 1) / 4; + const auto psqt = featureTransformer->transform(pos, cache, transformedFeatures, bucket); + const auto positional = network[bucket]->propagate(transformedFeatures); + + if (complexity) + *complexity = std::abs(psqt - positional) / OutputScale; + + // Give more value to positional evaluation when adjusted flag is set + if (adjusted) + return static_cast(((1024 - delta) * psqt + (1024 + delta) * positional) + / (1024 * OutputScale)); + else + return static_cast((psqt + positional) / OutputScale); +} + + +template +void Network::verify(std::string evalfilePath) const { + if (evalfilePath.empty()) + evalfilePath = evalFile.defaultName; + + if (evalFile.current != evalfilePath) + { + std::string msg1 = + "Network evaluation parameters compatible with the engine must be available."; + std::string msg2 = "The network file " + evalfilePath + " was not loaded successfully."; + std::string msg3 = "The UCI option EvalFile might need to specify the full path, " + "including the directory name, to the network file."; + std::string msg4 = "The default net can be downloaded from: " + "https://tests.stockfishchess.org/api/nn/" + + evalFile.defaultName; + std::string msg5 = "The engine will be terminated now."; + + sync_cout << "info string ERROR: " << msg1 << sync_endl; + sync_cout << "info string ERROR: " << msg2 << sync_endl; + sync_cout << "info string ERROR: " << msg3 << sync_endl; + sync_cout << "info string ERROR: " << msg4 << sync_endl; + sync_cout << "info string ERROR: " << msg5 << sync_endl; + exit(EXIT_FAILURE); + } + + size_t size = sizeof(*featureTransformer) + sizeof(*network) * LayerStacks; + sync_cout << "info string NNUE evaluation using " << evalfilePath << " (" + << size / (1024 * 1024) << "MiB, (" << featureTransformer->InputDimensions << ", " + << network[0]->TransformedFeatureDimensions << ", " << network[0]->FC_0_OUTPUTS + << ", " << network[0]->FC_1_OUTPUTS << ", 1))" << sync_endl; +} + + +template +void Network::hint_common_access( + const Position& pos, AccumulatorCaches::Cache* cache) const { + featureTransformer->hint_common_access(pos, cache); +} + +template +NnueEvalTrace +Network::trace_evaluate(const Position& pos, + AccumulatorCaches::Cache* cache) const { + // We manually align the arrays on the stack because with gcc < 9.3 + // overaligning stack variables with alignas() doesn't work correctly. + constexpr uint64_t alignment = CacheLineSize; + +#if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN) + TransformedFeatureType + transformedFeaturesUnaligned[FeatureTransformer::BufferSize + + alignment / sizeof(TransformedFeatureType)]; + + auto* transformedFeatures = align_ptr_up(&transformedFeaturesUnaligned[0]); +#else + alignas(alignment) TransformedFeatureType + transformedFeatures[FeatureTransformer::BufferSize]; +#endif + + ASSERT_ALIGNED(transformedFeatures, alignment); + + NnueEvalTrace t{}; + t.correctBucket = (pos.count() - 1) / 4; + for (IndexType bucket = 0; bucket < LayerStacks; ++bucket) + { + const auto materialist = + featureTransformer->transform(pos, cache, transformedFeatures, bucket); + const auto positional = network[bucket]->propagate(transformedFeatures); + + t.psqt[bucket] = static_cast(materialist / OutputScale); + t.positional[bucket] = static_cast(positional / OutputScale); + } + + return t; +} + + +template +void Network::load_user_net(const std::string& dir, + const std::string& evalfilePath) { + std::ifstream stream(dir + evalfilePath, std::ios::binary); + auto description = load(stream); + + if (description.has_value()) + { + evalFile.current = evalfilePath; + evalFile.netDescription = description.value(); + } +} + + +template +void Network::load_internal() { + // C++ way to prepare a buffer for a memory stream + class MemoryBuffer: public std::basic_streambuf { + public: + MemoryBuffer(char* p, size_t n) { + setg(p, p, p + n); + setp(p, p + n); + } + }; + + const auto embedded = get_embedded(embeddedType); + + MemoryBuffer buffer(const_cast(reinterpret_cast(embedded.data)), + size_t(embedded.size)); + + std::istream stream(&buffer); + auto description = load(stream); + + if (description.has_value()) + { + evalFile.current = evalFile.defaultName; + evalFile.netDescription = description.value(); + } +} + + +template +void Network::initialize() { + Detail::initialize(featureTransformer); + for (std::size_t i = 0; i < LayerStacks; ++i) + Detail::initialize(network[i]); +} + + +template +bool Network::save(std::ostream& stream, + const std::string& name, + const std::string& netDescription) const { + if (name.empty() || name == "None") + return false; + + return write_parameters(stream, netDescription); +} + + +template +std::optional Network::load(std::istream& stream) { + initialize(); + std::string description; + + return read_parameters(stream, description) ? std::make_optional(description) : std::nullopt; +} + + +// Read network header +template +bool Network::read_header(std::istream& stream, + std::uint32_t* hashValue, + std::string* desc) const { + std::uint32_t version, size; + + version = read_little_endian(stream); + *hashValue = read_little_endian(stream); + size = read_little_endian(stream); + if (!stream || version != Version) + return false; + desc->resize(size); + stream.read(&(*desc)[0], size); + return !stream.fail(); +} + + +// Write network header +template +bool Network::write_header(std::ostream& stream, + std::uint32_t hashValue, + const std::string& desc) const { + write_little_endian(stream, Version); + write_little_endian(stream, hashValue); + write_little_endian(stream, std::uint32_t(desc.size())); + stream.write(&desc[0], desc.size()); + return !stream.fail(); +} + + +template +bool Network::read_parameters(std::istream& stream, + std::string& netDescription) const { + std::uint32_t hashValue; + if (!read_header(stream, &hashValue, &netDescription)) + return false; + if (hashValue != Network::hash) + return false; + if (!Detail::read_parameters(stream, *featureTransformer)) + return false; + for (std::size_t i = 0; i < LayerStacks; ++i) + { + if (!Detail::read_parameters(stream, *(network[i]))) + return false; + } + return stream && stream.peek() == std::ios::traits_type::eof(); +} + + +template +bool Network::write_parameters(std::ostream& stream, + const std::string& netDescription) const { + if (!write_header(stream, Network::hash, netDescription)) + return false; + if (!Detail::write_parameters(stream, *featureTransformer)) + return false; + for (std::size_t i = 0; i < LayerStacks; ++i) + { + if (!Detail::write_parameters(stream, *(network[i]))) + return false; + } + return bool(stream); +} + +// Explicit template instantiation + +template class Network< + NetworkArchitecture, + FeatureTransformer>; + +template class Network< + NetworkArchitecture, + FeatureTransformer>; + +} // namespace Stockfish::Eval::NNUE diff --git a/src/nnue/network.h b/src/nnue/network.h new file mode 100644 index 00000000..23f56663 --- /dev/null +++ b/src/nnue/network.h @@ -0,0 +1,126 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifndef NETWORK_H_INCLUDED +#define NETWORK_H_INCLUDED + +#include +#include +#include +#include +#include + +#include "../misc.h" +#include "../position.h" +#include "../types.h" +#include "nnue_architecture.h" +#include "nnue_feature_transformer.h" +#include "nnue_misc.h" +#include "nnue_accumulator.h" + +namespace Stockfish::Eval::NNUE { + +enum class EmbeddedNNUEType { + BIG, + SMALL, +}; + + +template +class Network { + static constexpr IndexType FTDimensions = Arch::TransformedFeatureDimensions; + + public: + Network(EvalFile file, EmbeddedNNUEType type) : + evalFile(file), + embeddedType(type) {} + + void load(const std::string& rootDirectory, std::string evalfilePath); + bool save(const std::optional& filename) const; + + Value evaluate(const Position& pos, + AccumulatorCaches::Cache* cache, + bool adjusted = false, + int* complexity = nullptr) const; + + + void hint_common_access(const Position& pos, + AccumulatorCaches::Cache* cache) const; + + void verify(std::string evalfilePath) const; + NnueEvalTrace trace_evaluate(const Position& pos, + AccumulatorCaches::Cache* cache) const; + + private: + void load_user_net(const std::string&, const std::string&); + void load_internal(); + + void initialize(); + + bool save(std::ostream&, const std::string&, const std::string&) const; + std::optional load(std::istream&); + + bool read_header(std::istream&, std::uint32_t*, std::string*) const; + bool write_header(std::ostream&, std::uint32_t, const std::string&) const; + + bool read_parameters(std::istream&, std::string&) const; + bool write_parameters(std::ostream&, const std::string&) const; + + // Input feature converter + LargePagePtr featureTransformer; + + // Evaluation function + AlignedPtr network[LayerStacks]; + + EvalFile evalFile; + EmbeddedNNUEType embeddedType; + + // Hash value of evaluation function structure + static constexpr std::uint32_t hash = Transformer::get_hash_value() ^ Arch::get_hash_value(); + + template + friend struct AccumulatorCaches::Cache; +}; + +// Definitions of the network types +using SmallFeatureTransformer = + FeatureTransformer; +using SmallNetworkArchitecture = + NetworkArchitecture; + +using BigFeatureTransformer = + FeatureTransformer; +using BigNetworkArchitecture = NetworkArchitecture; + +using NetworkBig = Network; +using NetworkSmall = Network; + + +struct Networks { + Networks(NetworkBig&& nB, NetworkSmall&& nS) : + big(std::move(nB)), + small(std::move(nS)) {} + + NetworkBig big; + NetworkSmall small; +}; + + +} // namespace Stockfish + +#endif diff --git a/src/nnue/nnue_accumulator.h b/src/nnue/nnue_accumulator.h index 600483b5..b8dcf1e4 100644 --- a/src/nnue/nnue_accumulator.h +++ b/src/nnue/nnue_accumulator.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -21,17 +21,85 @@ #ifndef NNUE_ACCUMULATOR_H_INCLUDED #define NNUE_ACCUMULATOR_H_INCLUDED +#include + #include "nnue_architecture.h" +#include "nnue_common.h" namespace Stockfish::Eval::NNUE { - // Class that holds the result of affine transformation of input features - struct alignas(CacheLineSize) Accumulator { - std::int16_t accumulation[2][TransformedFeatureDimensions]; - std::int32_t psqtAccumulation[2][PSQTBuckets]; - bool computed[2]; - }; +using BiasType = std::int16_t; +using PSQTWeightType = std::int32_t; +using IndexType = std::uint32_t; + +// Class that holds the result of affine transformation of input features +template +struct alignas(CacheLineSize) Accumulator { + std::int16_t accumulation[COLOR_NB][Size]; + std::int32_t psqtAccumulation[COLOR_NB][PSQTBuckets]; + bool computed[COLOR_NB]; +}; + + +// AccumulatorCaches struct provides per-thread accumulator caches, where each +// cache contains multiple entries for each of the possible king squares. +// When the accumulator needs to be refreshed, the cached entry is used to more +// efficiently update the accumulator, instead of rebuilding it from scratch. +// This idea, was first described by Luecx (author of Koivisto) and +// is commonly referred to as "Finny Tables". +struct AccumulatorCaches { + + template + AccumulatorCaches(const Networks& networks) { + clear(networks); + } + + template + struct alignas(CacheLineSize) Cache { + + struct alignas(CacheLineSize) Entry { + BiasType accumulation[Size]; + PSQTWeightType psqtAccumulation[PSQTBuckets]; + Bitboard byColorBB[COLOR_NB]; + Bitboard byTypeBB[PIECE_TYPE_NB]; + + // To initialize a refresh entry, we set all its bitboards empty, + // so we put the biases in the accumulation, without any weights on top + void clear(const BiasType* biases) { + + std::memcpy(accumulation, biases, sizeof(accumulation)); + std::memset((uint8_t*) this + offsetof(Entry, psqtAccumulation), 0, + sizeof(Entry) - offsetof(Entry, psqtAccumulation)); + } + }; + + template + void clear(const Network& network) { + for (auto& entries1D : entries) + for (auto& entry : entries1D) + entry.clear(network.featureTransformer->biases); + } + + void clear(const BiasType* biases) { + for (auto& entry : entries) + entry.clear(biases); + } + + std::array& operator[](Square sq) { return entries[sq]; } + + std::array, SQUARE_NB> entries; + }; + + template + void clear(const Networks& networks) { + big.clear(networks.big); + small.clear(networks.small); + } + + Cache big; + Cache small; +}; } // namespace Stockfish::Eval::NNUE -#endif // NNUE_ACCUMULATOR_H_INCLUDED +#endif // NNUE_ACCUMULATOR_H_INCLUDED diff --git a/src/nnue/nnue_architecture.h b/src/nnue/nnue_architecture.h index 725b40fb..7f73f87f 100644 --- a/src/nnue/nnue_architecture.h +++ b/src/nnue/nnue_architecture.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -21,14 +21,16 @@ #ifndef NNUE_ARCHITECTURE_H_INCLUDED #define NNUE_ARCHITECTURE_H_INCLUDED -#include "nnue_common.h" +#include +#include +#include #include "features/half_ka_v2_hm.h" - #include "layers/affine_transform.h" +#include "layers/affine_transform_sparse_input.h" #include "layers/clipped_relu.h" - -#include "../misc.h" +#include "layers/sqr_clipped_relu.h" +#include "nnue_common.h" namespace Stockfish::Eval::NNUE { @@ -36,96 +38,100 @@ namespace Stockfish::Eval::NNUE { using FeatureSet = Features::HalfKAv2_hm; // Number of input feature dimensions after conversion -constexpr IndexType TransformedFeatureDimensions = 1024; +constexpr IndexType TransformedFeatureDimensionsBig = 3072; +constexpr int L2Big = 15; +constexpr int L3Big = 32; + +constexpr IndexType TransformedFeatureDimensionsSmall = 128; +constexpr int L2Small = 15; +constexpr int L3Small = 32; + constexpr IndexType PSQTBuckets = 8; constexpr IndexType LayerStacks = 8; -struct Network -{ - static constexpr int FC_0_OUTPUTS = 15; - static constexpr int FC_1_OUTPUTS = 32; +template +struct NetworkArchitecture { + static constexpr IndexType TransformedFeatureDimensions = L1; + static constexpr int FC_0_OUTPUTS = L2; + static constexpr int FC_1_OUTPUTS = L3; - Layers::AffineTransform fc_0; - Layers::ClippedReLU ac_0; - Layers::AffineTransform fc_1; - Layers::ClippedReLU ac_1; - Layers::AffineTransform fc_2; + Layers::AffineTransformSparseInput fc_0; + Layers::SqrClippedReLU ac_sqr_0; + Layers::ClippedReLU ac_0; + Layers::AffineTransform fc_1; + Layers::ClippedReLU ac_1; + Layers::AffineTransform fc_2; - // Hash value embedded in the evaluation file - static constexpr std::uint32_t get_hash_value() { - // input slice hash - std::uint32_t hashValue = 0xEC42E90Du; - hashValue ^= TransformedFeatureDimensions * 2; + // Hash value embedded in the evaluation file + static constexpr std::uint32_t get_hash_value() { + // input slice hash + std::uint32_t hashValue = 0xEC42E90Du; + hashValue ^= TransformedFeatureDimensions * 2; - hashValue = decltype(fc_0)::get_hash_value(hashValue); - hashValue = decltype(ac_0)::get_hash_value(hashValue); - hashValue = decltype(fc_1)::get_hash_value(hashValue); - hashValue = decltype(ac_1)::get_hash_value(hashValue); - hashValue = decltype(fc_2)::get_hash_value(hashValue); + hashValue = decltype(fc_0)::get_hash_value(hashValue); + hashValue = decltype(ac_0)::get_hash_value(hashValue); + hashValue = decltype(fc_1)::get_hash_value(hashValue); + hashValue = decltype(ac_1)::get_hash_value(hashValue); + hashValue = decltype(fc_2)::get_hash_value(hashValue); - return hashValue; - } + return hashValue; + } - // Read network parameters - bool read_parameters(std::istream& stream) { - if (!fc_0.read_parameters(stream)) return false; - if (!ac_0.read_parameters(stream)) return false; - if (!fc_1.read_parameters(stream)) return false; - if (!ac_1.read_parameters(stream)) return false; - if (!fc_2.read_parameters(stream)) return false; - return true; - } + // Read network parameters + bool read_parameters(std::istream& stream) { + return fc_0.read_parameters(stream) && ac_0.read_parameters(stream) + && fc_1.read_parameters(stream) && ac_1.read_parameters(stream) + && fc_2.read_parameters(stream); + } - // Read network parameters - bool write_parameters(std::ostream& stream) const { - if (!fc_0.write_parameters(stream)) return false; - if (!ac_0.write_parameters(stream)) return false; - if (!fc_1.write_parameters(stream)) return false; - if (!ac_1.write_parameters(stream)) return false; - if (!fc_2.write_parameters(stream)) return false; - return true; - } + // Write network parameters + bool write_parameters(std::ostream& stream) const { + return fc_0.write_parameters(stream) && ac_0.write_parameters(stream) + && fc_1.write_parameters(stream) && ac_1.write_parameters(stream) + && fc_2.write_parameters(stream); + } - std::int32_t propagate(const TransformedFeatureType* transformedFeatures) - { - constexpr uint64_t alignment = CacheLineSize; + std::int32_t propagate(const TransformedFeatureType* transformedFeatures) { + struct alignas(CacheLineSize) Buffer { + alignas(CacheLineSize) typename decltype(fc_0)::OutputBuffer fc_0_out; + alignas(CacheLineSize) typename decltype(ac_sqr_0)::OutputType + ac_sqr_0_out[ceil_to_multiple(FC_0_OUTPUTS * 2, 32)]; + alignas(CacheLineSize) typename decltype(ac_0)::OutputBuffer ac_0_out; + alignas(CacheLineSize) typename decltype(fc_1)::OutputBuffer fc_1_out; + alignas(CacheLineSize) typename decltype(ac_1)::OutputBuffer ac_1_out; + alignas(CacheLineSize) typename decltype(fc_2)::OutputBuffer fc_2_out; - struct Buffer - { - alignas(CacheLineSize) decltype(fc_0)::OutputBuffer fc_0_out; - alignas(CacheLineSize) decltype(ac_0)::OutputBuffer ac_0_out; - alignas(CacheLineSize) decltype(fc_1)::OutputBuffer fc_1_out; - alignas(CacheLineSize) decltype(ac_1)::OutputBuffer ac_1_out; - alignas(CacheLineSize) decltype(fc_2)::OutputBuffer fc_2_out; - }; + Buffer() { std::memset(this, 0, sizeof(*this)); } + }; -#if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN) - char bufferRaw[sizeof(Buffer) + alignment]; - char* bufferRawAligned = align_ptr_up(&bufferRaw[0]); - Buffer& buffer = *(new (bufferRawAligned) Buffer); +#if defined(__clang__) && (__APPLE__) + // workaround for a bug reported with xcode 12 + static thread_local auto tlsBuffer = std::make_unique(); + // Access TLS only once, cache result. + Buffer& buffer = *tlsBuffer; #else - alignas(alignment) Buffer buffer; + alignas(CacheLineSize) static thread_local Buffer buffer; #endif - fc_0.propagate(transformedFeatures, buffer.fc_0_out); - ac_0.propagate(buffer.fc_0_out, buffer.ac_0_out); - fc_1.propagate(buffer.ac_0_out, buffer.fc_1_out); - ac_1.propagate(buffer.fc_1_out, buffer.ac_1_out); - fc_2.propagate(buffer.ac_1_out, buffer.fc_2_out); + fc_0.propagate(transformedFeatures, buffer.fc_0_out); + ac_sqr_0.propagate(buffer.fc_0_out, buffer.ac_sqr_0_out); + ac_0.propagate(buffer.fc_0_out, buffer.ac_0_out); + std::memcpy(buffer.ac_sqr_0_out + FC_0_OUTPUTS, buffer.ac_0_out, + FC_0_OUTPUTS * sizeof(typename decltype(ac_0)::OutputType)); + fc_1.propagate(buffer.ac_sqr_0_out, buffer.fc_1_out); + ac_1.propagate(buffer.fc_1_out, buffer.ac_1_out); + fc_2.propagate(buffer.ac_1_out, buffer.fc_2_out); - // buffer.fc_0_out[FC_0_OUTPUTS] is such that 1.0 is equal to 127*(1< +#include +#include #include #include +#include -#include "../misc.h" // for IsLittleEndian +#include "../misc.h" #if defined(USE_AVX2) -#include + #include #elif defined(USE_SSE41) -#include + #include #elif defined(USE_SSSE3) -#include + #include #elif defined(USE_SSE2) -#include - -#elif defined(USE_MMX) -#include + #include #elif defined(USE_NEON) -#include + #include #endif namespace Stockfish::Eval::NNUE { - // Version of the evaluation file - constexpr std::uint32_t Version = 0x7AF32F20u; +// Version of the evaluation file +constexpr std::uint32_t Version = 0x7AF32F20u; - // Constant used in evaluation value calculation - constexpr int OutputScale = 16; - constexpr int WeightScaleBits = 6; +// Constant used in evaluation value calculation +constexpr int OutputScale = 16; +constexpr int WeightScaleBits = 6; - // Size of cache line (in bytes) - constexpr std::size_t CacheLineSize = 64; +// Size of cache line (in bytes) +constexpr std::size_t CacheLineSize = 64; - // SIMD width (in bytes) - #if defined(USE_AVX2) - constexpr std::size_t SimdWidth = 32; +constexpr const char Leb128MagicString[] = "COMPRESSED_LEB128"; +constexpr const std::size_t Leb128MagicStringSize = sizeof(Leb128MagicString) - 1; - #elif defined(USE_SSE2) - constexpr std::size_t SimdWidth = 16; +// SIMD width (in bytes) +#if defined(USE_AVX2) +constexpr std::size_t SimdWidth = 32; - #elif defined(USE_MMX) - constexpr std::size_t SimdWidth = 8; +#elif defined(USE_SSE2) +constexpr std::size_t SimdWidth = 16; - #elif defined(USE_NEON) - constexpr std::size_t SimdWidth = 16; - #endif +#elif defined(USE_NEON) +constexpr std::size_t SimdWidth = 16; +#endif - constexpr std::size_t MaxSimdWidth = 32; +constexpr std::size_t MaxSimdWidth = 32; - // Type of input feature after conversion - using TransformedFeatureType = std::uint8_t; - using IndexType = std::uint32_t; +// Type of input feature after conversion +using TransformedFeatureType = std::uint8_t; +using IndexType = std::uint32_t; - // Round n up to be a multiple of base - template - constexpr IntType ceil_to_multiple(IntType n, IntType base) { - return (n + base - 1) / base * base; - } +// Round n up to be a multiple of base +template +constexpr IntType ceil_to_multiple(IntType n, IntType base) { + return (n + base - 1) / base * base; +} - // read_little_endian() is our utility to read an integer (signed or unsigned, any size) - // from a stream in little-endian order. We swap the byte order after the read if - // necessary to return a result with the byte ordering of the compiling machine. - template - inline IntType read_little_endian(std::istream& stream) { - IntType result; - if (IsLittleEndian) - stream.read(reinterpret_cast(&result), sizeof(IntType)); - else - { - std::uint8_t u[sizeof(IntType)]; - typename std::make_unsigned::type v = 0; +// Utility to read an integer (signed or unsigned, any size) +// from a stream in little-endian order. We swap the byte order after the read if +// necessary to return a result with the byte ordering of the compiling machine. +template +inline IntType read_little_endian(std::istream& stream) { + IntType result; - stream.read(reinterpret_cast(u), sizeof(IntType)); - for (std::size_t i = 0; i < sizeof(IntType); ++i) - v = (v << 8) | u[sizeof(IntType) - i - 1]; + if (IsLittleEndian) + stream.read(reinterpret_cast(&result), sizeof(IntType)); + else + { + std::uint8_t u[sizeof(IntType)]; + std::make_unsigned_t v = 0; - std::memcpy(&result, &v, sizeof(IntType)); - } + stream.read(reinterpret_cast(u), sizeof(IntType)); + for (std::size_t i = 0; i < sizeof(IntType); ++i) + v = (v << 8) | u[sizeof(IntType) - i - 1]; - return result; - } + std::memcpy(&result, &v, sizeof(IntType)); + } - // write_little_endian() is our utility to write an integer (signed or unsigned, any size) - // to a stream in little-endian order. We swap the byte order before the write if - // necessary to always write in little endian order, independently of the byte - // ordering of the compiling machine. - template - inline void write_little_endian(std::ostream& stream, IntType value) { + return result; +} - if (IsLittleEndian) - stream.write(reinterpret_cast(&value), sizeof(IntType)); - else - { - std::uint8_t u[sizeof(IntType)]; - typename std::make_unsigned::type v = value; - std::size_t i = 0; - // if constexpr to silence the warning about shift by 8 - if constexpr (sizeof(IntType) > 1) - { +// Utility to write an integer (signed or unsigned, any size) +// to a stream in little-endian order. We swap the byte order before the write if +// necessary to always write in little-endian order, independently of the byte +// ordering of the compiling machine. +template +inline void write_little_endian(std::ostream& stream, IntType value) { + + if (IsLittleEndian) + stream.write(reinterpret_cast(&value), sizeof(IntType)); + else + { + std::uint8_t u[sizeof(IntType)]; + std::make_unsigned_t v = value; + + std::size_t i = 0; + // if constexpr to silence the warning about shift by 8 + if constexpr (sizeof(IntType) > 1) + { for (; i + 1 < sizeof(IntType); ++i) { - u[i] = v; + u[i] = std::uint8_t(v); v >>= 8; } - } - u[i] = v; + } + u[i] = std::uint8_t(v); - stream.write(reinterpret_cast(u), sizeof(IntType)); - } - } + stream.write(reinterpret_cast(u), sizeof(IntType)); + } +} - // read_little_endian(s, out, N) : read integers in bulk from a little indian stream. - // This reads N integers from stream s and put them in array out. - template - inline void read_little_endian(std::istream& stream, IntType* out, std::size_t count) { - if (IsLittleEndian) - stream.read(reinterpret_cast(out), sizeof(IntType) * count); - else - for (std::size_t i = 0; i < count; ++i) - out[i] = read_little_endian(stream); - } - // write_little_endian(s, values, N) : write integers in bulk to a little indian stream. - // This takes N integers from array values and writes them on stream s. - template - inline void write_little_endian(std::ostream& stream, const IntType* values, std::size_t count) { - if (IsLittleEndian) - stream.write(reinterpret_cast(values), sizeof(IntType) * count); - else - for (std::size_t i = 0; i < count; ++i) - write_little_endian(stream, values[i]); - } +// Read integers in bulk from a little-endian stream. +// This reads N integers from stream s and puts them in array out. +template +inline void read_little_endian(std::istream& stream, IntType* out, std::size_t count) { + if (IsLittleEndian) + stream.read(reinterpret_cast(out), sizeof(IntType) * count); + else + for (std::size_t i = 0; i < count; ++i) + out[i] = read_little_endian(stream); +} + + +// Write integers in bulk to a little-endian stream. +// This takes N integers from array values and writes them on stream s. +template +inline void write_little_endian(std::ostream& stream, const IntType* values, std::size_t count) { + if (IsLittleEndian) + stream.write(reinterpret_cast(values), sizeof(IntType) * count); + else + for (std::size_t i = 0; i < count; ++i) + write_little_endian(stream, values[i]); +} + + +// Read N signed integers from the stream s, putting them in the array out. +// The stream is assumed to be compressed using the signed LEB128 format. +// See https://en.wikipedia.org/wiki/LEB128 for a description of the compression scheme. +template +inline void read_leb_128(std::istream& stream, IntType* out, std::size_t count) { + + // Check the presence of our LEB128 magic string + char leb128MagicString[Leb128MagicStringSize]; + stream.read(leb128MagicString, Leb128MagicStringSize); + assert(strncmp(Leb128MagicString, leb128MagicString, Leb128MagicStringSize) == 0); + + static_assert(std::is_signed_v, "Not implemented for unsigned types"); + + const std::uint32_t BUF_SIZE = 4096; + std::uint8_t buf[BUF_SIZE]; + + auto bytes_left = read_little_endian(stream); + + std::uint32_t buf_pos = BUF_SIZE; + for (std::size_t i = 0; i < count; ++i) + { + IntType result = 0; + size_t shift = 0; + do + { + if (buf_pos == BUF_SIZE) + { + stream.read(reinterpret_cast(buf), std::min(bytes_left, BUF_SIZE)); + buf_pos = 0; + } + + std::uint8_t byte = buf[buf_pos++]; + --bytes_left; + result |= (byte & 0x7f) << shift; + shift += 7; + + if ((byte & 0x80) == 0) + { + out[i] = (sizeof(IntType) * 8 <= shift || (byte & 0x40) == 0) + ? result + : result | ~((1 << shift) - 1); + break; + } + } while (shift < sizeof(IntType) * 8); + } + + assert(bytes_left == 0); +} + + +// Write signed integers to a stream with LEB128 compression. +// This takes N integers from array values, compresses them with +// the LEB128 algorithm and writes the result on the stream s. +// See https://en.wikipedia.org/wiki/LEB128 for a description of the compression scheme. +template +inline void write_leb_128(std::ostream& stream, const IntType* values, std::size_t count) { + + // Write our LEB128 magic string + stream.write(Leb128MagicString, Leb128MagicStringSize); + + static_assert(std::is_signed_v, "Not implemented for unsigned types"); + + std::uint32_t byte_count = 0; + for (std::size_t i = 0; i < count; ++i) + { + IntType value = values[i]; + std::uint8_t byte; + do + { + byte = value & 0x7f; + value >>= 7; + ++byte_count; + } while ((byte & 0x40) == 0 ? value != 0 : value != -1); + } + + write_little_endian(stream, byte_count); + + const std::uint32_t BUF_SIZE = 4096; + std::uint8_t buf[BUF_SIZE]; + std::uint32_t buf_pos = 0; + + auto flush = [&]() { + if (buf_pos > 0) + { + stream.write(reinterpret_cast(buf), buf_pos); + buf_pos = 0; + } + }; + + auto write = [&](std::uint8_t byte) { + buf[buf_pos++] = byte; + if (buf_pos == BUF_SIZE) + flush(); + }; + + for (std::size_t i = 0; i < count; ++i) + { + IntType value = values[i]; + while (true) + { + std::uint8_t byte = value & 0x7f; + value >>= 7; + if ((byte & 0x40) == 0 ? value == 0 : value == -1) + { + write(byte); + break; + } + write(byte | 0x80); + } + } + + flush(); +} } // namespace Stockfish::Eval::NNUE -#endif // #ifndef NNUE_COMMON_H_INCLUDED +#endif // #ifndef NNUE_COMMON_H_INCLUDED diff --git a/src/nnue/nnue_feature_transformer.h b/src/nnue/nnue_feature_transformer.h index fb867421..2b11adef 100644 --- a/src/nnue/nnue_feature_transformer.h +++ b/src/nnue/nnue_feature_transformer.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -21,589 +21,824 @@ #ifndef NNUE_FEATURE_TRANSFORMER_H_INCLUDED #define NNUE_FEATURE_TRANSFORMER_H_INCLUDED -#include "nnue_common.h" -#include "nnue_architecture.h" +#include +#include +#include +#include +#include +#include -#include // std::memset() +#include "../position.h" +#include "../types.h" +#include "nnue_accumulator.h" +#include "nnue_architecture.h" +#include "nnue_common.h" namespace Stockfish::Eval::NNUE { - using BiasType = std::int16_t; - using WeightType = std::int16_t; - using PSQTWeightType = std::int32_t; +using BiasType = std::int16_t; +using WeightType = std::int16_t; +using PSQTWeightType = std::int32_t; - // If vector instructions are enabled, we update and refresh the - // accumulator tile by tile such that each tile fits in the CPU's - // vector registers. - #define VECTOR +// If vector instructions are enabled, we update and refresh the +// accumulator tile by tile such that each tile fits in the CPU's +// vector registers. +#define VECTOR - static_assert(PSQTBuckets % 8 == 0, - "Per feature PSQT values cannot be processed at granularity lower than 8 at a time."); +static_assert(PSQTBuckets % 8 == 0, + "Per feature PSQT values cannot be processed at granularity lower than 8 at a time."); - #ifdef USE_AVX512 - typedef __m512i vec_t; - typedef __m256i psqt_vec_t; - #define vec_load(a) _mm512_load_si512(a) - #define vec_store(a,b) _mm512_store_si512(a,b) - #define vec_add_16(a,b) _mm512_add_epi16(a,b) - #define vec_sub_16(a,b) _mm512_sub_epi16(a,b) - #define vec_load_psqt(a) _mm256_load_si256(a) - #define vec_store_psqt(a,b) _mm256_store_si256(a,b) - #define vec_add_psqt_32(a,b) _mm256_add_epi32(a,b) - #define vec_sub_psqt_32(a,b) _mm256_sub_epi32(a,b) - #define vec_zero_psqt() _mm256_setzero_si256() - #define NumRegistersSIMD 32 +#ifdef USE_AVX512 +using vec_t = __m512i; +using psqt_vec_t = __m256i; + #define vec_load(a) _mm512_load_si512(a) + #define vec_store(a, b) _mm512_store_si512(a, b) + #define vec_add_16(a, b) _mm512_add_epi16(a, b) + #define vec_sub_16(a, b) _mm512_sub_epi16(a, b) + #define vec_mul_16(a, b) _mm512_mullo_epi16(a, b) + #define vec_zero() _mm512_setzero_epi32() + #define vec_set_16(a) _mm512_set1_epi16(a) + #define vec_max_16(a, b) _mm512_max_epi16(a, b) + #define vec_min_16(a, b) _mm512_min_epi16(a, b) + // Inverse permuted at load time + #define vec_msb_pack_16(a, b) \ + _mm512_packs_epi16(_mm512_srli_epi16(a, 7), _mm512_srli_epi16(b, 7)) + #define vec_load_psqt(a) _mm256_load_si256(a) + #define vec_store_psqt(a, b) _mm256_store_si256(a, b) + #define vec_add_psqt_32(a, b) _mm256_add_epi32(a, b) + #define vec_sub_psqt_32(a, b) _mm256_sub_epi32(a, b) + #define vec_zero_psqt() _mm256_setzero_si256() + #define NumRegistersSIMD 16 + #define MaxChunkSize 64 - #elif USE_AVX2 - typedef __m256i vec_t; - typedef __m256i psqt_vec_t; - #define vec_load(a) _mm256_load_si256(a) - #define vec_store(a,b) _mm256_store_si256(a,b) - #define vec_add_16(a,b) _mm256_add_epi16(a,b) - #define vec_sub_16(a,b) _mm256_sub_epi16(a,b) - #define vec_load_psqt(a) _mm256_load_si256(a) - #define vec_store_psqt(a,b) _mm256_store_si256(a,b) - #define vec_add_psqt_32(a,b) _mm256_add_epi32(a,b) - #define vec_sub_psqt_32(a,b) _mm256_sub_epi32(a,b) - #define vec_zero_psqt() _mm256_setzero_si256() - #define NumRegistersSIMD 16 +#elif USE_AVX2 +using vec_t = __m256i; +using psqt_vec_t = __m256i; + #define vec_load(a) _mm256_load_si256(a) + #define vec_store(a, b) _mm256_store_si256(a, b) + #define vec_add_16(a, b) _mm256_add_epi16(a, b) + #define vec_sub_16(a, b) _mm256_sub_epi16(a, b) + #define vec_mul_16(a, b) _mm256_mullo_epi16(a, b) + #define vec_zero() _mm256_setzero_si256() + #define vec_set_16(a) _mm256_set1_epi16(a) + #define vec_max_16(a, b) _mm256_max_epi16(a, b) + #define vec_min_16(a, b) _mm256_min_epi16(a, b) + // Inverse permuted at load time + #define vec_msb_pack_16(a, b) \ + _mm256_packs_epi16(_mm256_srli_epi16(a, 7), _mm256_srli_epi16(b, 7)) + #define vec_load_psqt(a) _mm256_load_si256(a) + #define vec_store_psqt(a, b) _mm256_store_si256(a, b) + #define vec_add_psqt_32(a, b) _mm256_add_epi32(a, b) + #define vec_sub_psqt_32(a, b) _mm256_sub_epi32(a, b) + #define vec_zero_psqt() _mm256_setzero_si256() + #define NumRegistersSIMD 16 + #define MaxChunkSize 32 - #elif USE_SSE2 - typedef __m128i vec_t; - typedef __m128i psqt_vec_t; - #define vec_load(a) (*(a)) - #define vec_store(a,b) *(a)=(b) - #define vec_add_16(a,b) _mm_add_epi16(a,b) - #define vec_sub_16(a,b) _mm_sub_epi16(a,b) - #define vec_load_psqt(a) (*(a)) - #define vec_store_psqt(a,b) *(a)=(b) - #define vec_add_psqt_32(a,b) _mm_add_epi32(a,b) - #define vec_sub_psqt_32(a,b) _mm_sub_epi32(a,b) - #define vec_zero_psqt() _mm_setzero_si128() - #define NumRegistersSIMD (Is64Bit ? 16 : 8) +#elif USE_SSE2 +using vec_t = __m128i; +using psqt_vec_t = __m128i; + #define vec_load(a) (*(a)) + #define vec_store(a, b) *(a) = (b) + #define vec_add_16(a, b) _mm_add_epi16(a, b) + #define vec_sub_16(a, b) _mm_sub_epi16(a, b) + #define vec_mul_16(a, b) _mm_mullo_epi16(a, b) + #define vec_zero() _mm_setzero_si128() + #define vec_set_16(a) _mm_set1_epi16(a) + #define vec_max_16(a, b) _mm_max_epi16(a, b) + #define vec_min_16(a, b) _mm_min_epi16(a, b) + #define vec_msb_pack_16(a, b) _mm_packs_epi16(_mm_srli_epi16(a, 7), _mm_srli_epi16(b, 7)) + #define vec_load_psqt(a) (*(a)) + #define vec_store_psqt(a, b) *(a) = (b) + #define vec_add_psqt_32(a, b) _mm_add_epi32(a, b) + #define vec_sub_psqt_32(a, b) _mm_sub_epi32(a, b) + #define vec_zero_psqt() _mm_setzero_si128() + #define NumRegistersSIMD (Is64Bit ? 16 : 8) + #define MaxChunkSize 16 - #elif USE_MMX - typedef __m64 vec_t; - typedef __m64 psqt_vec_t; - #define vec_load(a) (*(a)) - #define vec_store(a,b) *(a)=(b) - #define vec_add_16(a,b) _mm_add_pi16(a,b) - #define vec_sub_16(a,b) _mm_sub_pi16(a,b) - #define vec_load_psqt(a) (*(a)) - #define vec_store_psqt(a,b) *(a)=(b) - #define vec_add_psqt_32(a,b) _mm_add_pi32(a,b) - #define vec_sub_psqt_32(a,b) _mm_sub_pi32(a,b) - #define vec_zero_psqt() _mm_setzero_si64() - #define NumRegistersSIMD 8 +#elif USE_NEON +using vec_t = int16x8_t; +using psqt_vec_t = int32x4_t; + #define vec_load(a) (*(a)) + #define vec_store(a, b) *(a) = (b) + #define vec_add_16(a, b) vaddq_s16(a, b) + #define vec_sub_16(a, b) vsubq_s16(a, b) + #define vec_mul_16(a, b) vmulq_s16(a, b) + #define vec_zero() \ + vec_t { 0 } + #define vec_set_16(a) vdupq_n_s16(a) + #define vec_max_16(a, b) vmaxq_s16(a, b) + #define vec_min_16(a, b) vminq_s16(a, b) +inline vec_t vec_msb_pack_16(vec_t a, vec_t b) { + const int8x8_t shifta = vshrn_n_s16(a, 7); + const int8x8_t shiftb = vshrn_n_s16(b, 7); + const int8x16_t compacted = vcombine_s8(shifta, shiftb); + return *reinterpret_cast(&compacted); +} + #define vec_load_psqt(a) (*(a)) + #define vec_store_psqt(a, b) *(a) = (b) + #define vec_add_psqt_32(a, b) vaddq_s32(a, b) + #define vec_sub_psqt_32(a, b) vsubq_s32(a, b) + #define vec_zero_psqt() \ + psqt_vec_t { 0 } + #define NumRegistersSIMD 16 + #define MaxChunkSize 16 - #elif USE_NEON - typedef int16x8_t vec_t; - typedef int32x4_t psqt_vec_t; - #define vec_load(a) (*(a)) - #define vec_store(a,b) *(a)=(b) - #define vec_add_16(a,b) vaddq_s16(a,b) - #define vec_sub_16(a,b) vsubq_s16(a,b) - #define vec_load_psqt(a) (*(a)) - #define vec_store_psqt(a,b) *(a)=(b) - #define vec_add_psqt_32(a,b) vaddq_s32(a,b) - #define vec_sub_psqt_32(a,b) vsubq_s32(a,b) - #define vec_zero_psqt() psqt_vec_t{0} - #define NumRegistersSIMD 16 +#else + #undef VECTOR - #else - #undef VECTOR - - #endif +#endif - #ifdef VECTOR +#ifdef VECTOR - // Compute optimal SIMD register count for feature transformer accumulation. + // Compute optimal SIMD register count for feature transformer accumulation. - // We use __m* types as template arguments, which causes GCC to emit warnings - // about losing some attribute information. This is irrelevant to us as we - // only take their size, so the following pragma are harmless. - #pragma GCC diagnostic push - #pragma GCC diagnostic ignored "-Wignored-attributes" + // We use __m* types as template arguments, which causes GCC to emit warnings + // about losing some attribute information. This is irrelevant to us as we + // only take their size, so the following pragma are harmless. + #if defined(__GNUC__) + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wignored-attributes" + #endif - template - static constexpr int BestRegisterCount() - { - #define RegisterSize sizeof(SIMDRegisterType) - #define LaneSize sizeof(LaneType) +template +static constexpr int BestRegisterCount() { + #define RegisterSize sizeof(SIMDRegisterType) + #define LaneSize sizeof(LaneType) - static_assert(RegisterSize >= LaneSize); - static_assert(MaxRegisters <= NumRegistersSIMD); - static_assert(MaxRegisters > 0); - static_assert(NumRegistersSIMD > 0); - static_assert(RegisterSize % LaneSize == 0); - static_assert((NumLanes * LaneSize) % RegisterSize == 0); + static_assert(RegisterSize >= LaneSize); + static_assert(MaxRegisters <= NumRegistersSIMD); + static_assert(MaxRegisters > 0); + static_assert(NumRegistersSIMD > 0); + static_assert(RegisterSize % LaneSize == 0); + static_assert((NumLanes * LaneSize) % RegisterSize == 0); - const int ideal = (NumLanes * LaneSize) / RegisterSize; - if (ideal <= MaxRegisters) - return ideal; + const int ideal = (NumLanes * LaneSize) / RegisterSize; + if (ideal <= MaxRegisters) + return ideal; - // Look for the largest divisor of the ideal register count that is smaller than MaxRegisters - for (int divisor = MaxRegisters; divisor > 1; --divisor) - if (ideal % divisor == 0) - return divisor; + // Look for the largest divisor of the ideal register count that is smaller than MaxRegisters + for (int divisor = MaxRegisters; divisor > 1; --divisor) + if (ideal % divisor == 0) + return divisor; - return 1; - } - - static constexpr int NumRegs = BestRegisterCount(); - static constexpr int NumPsqtRegs = BestRegisterCount(); - - #pragma GCC diagnostic pop - - #endif + return 1; +} + #if defined(__GNUC__) + #pragma GCC diagnostic pop + #endif +#endif +// Input feature converter +template StateInfo::*accPtr> +class FeatureTransformer { - // Input feature converter - class FeatureTransformer { - - private: // Number of output dimensions for one side static constexpr IndexType HalfDimensions = TransformedFeatureDimensions; - #ifdef VECTOR - static constexpr IndexType TileHeight = NumRegs * sizeof(vec_t) / 2; + private: +#ifdef VECTOR + static constexpr int NumRegs = + BestRegisterCount(); + static constexpr int NumPsqtRegs = + BestRegisterCount(); + + static constexpr IndexType TileHeight = NumRegs * sizeof(vec_t) / 2; static constexpr IndexType PsqtTileHeight = NumPsqtRegs * sizeof(psqt_vec_t) / 4; static_assert(HalfDimensions % TileHeight == 0, "TileHeight must divide HalfDimensions"); static_assert(PSQTBuckets % PsqtTileHeight == 0, "PsqtTileHeight must divide PSQTBuckets"); - #endif +#endif public: // Output type using OutputType = TransformedFeatureType; // Number of input/output dimensions - static constexpr IndexType InputDimensions = FeatureSet::Dimensions; + static constexpr IndexType InputDimensions = FeatureSet::Dimensions; static constexpr IndexType OutputDimensions = HalfDimensions; // Size of forward propagation buffer - static constexpr std::size_t BufferSize = - OutputDimensions * sizeof(OutputType); + static constexpr std::size_t BufferSize = OutputDimensions * sizeof(OutputType); // Hash value embedded in the evaluation file static constexpr std::uint32_t get_hash_value() { - return FeatureSet::HashValue ^ (OutputDimensions * 2); + return FeatureSet::HashValue ^ (OutputDimensions * 2); + } + + static constexpr void order_packs([[maybe_unused]] uint64_t* v) { +#if defined(USE_AVX512) // _mm512_packs_epi16 ordering + uint64_t tmp0 = v[2], tmp1 = v[3]; + v[2] = v[8], v[3] = v[9]; + v[8] = v[4], v[9] = v[5]; + v[4] = tmp0, v[5] = tmp1; + tmp0 = v[6], tmp1 = v[7]; + v[6] = v[10], v[7] = v[11]; + v[10] = v[12], v[11] = v[13]; + v[12] = tmp0, v[13] = tmp1; +#elif defined(USE_AVX2) // _mm256_packs_epi16 ordering + std::swap(v[2], v[4]); + std::swap(v[3], v[5]); +#endif + } + + static constexpr void inverse_order_packs([[maybe_unused]] uint64_t* v) { +#if defined(USE_AVX512) // Inverse _mm512_packs_epi16 ordering + uint64_t tmp0 = v[2], tmp1 = v[3]; + v[2] = v[4], v[3] = v[5]; + v[4] = v[8], v[5] = v[9]; + v[8] = tmp0, v[9] = tmp1; + tmp0 = v[6], tmp1 = v[7]; + v[6] = v[12], v[7] = v[13]; + v[12] = v[10], v[13] = v[11]; + v[10] = tmp0, v[11] = tmp1; +#elif defined(USE_AVX2) // Inverse _mm256_packs_epi16 ordering + std::swap(v[2], v[4]); + std::swap(v[3], v[5]); +#endif + } + + void permute_weights([[maybe_unused]] void (*order_fn)(uint64_t*)) const { +#if defined(USE_AVX2) + #if defined(USE_AVX512) + constexpr IndexType di = 16; + #else + constexpr IndexType di = 8; + #endif + uint64_t* b = reinterpret_cast(const_cast(&biases[0])); + for (IndexType i = 0; i < HalfDimensions * sizeof(BiasType) / sizeof(uint64_t); i += di) + order_fn(&b[i]); + + for (IndexType j = 0; j < InputDimensions; ++j) + { + uint64_t* w = + reinterpret_cast(const_cast(&weights[j * HalfDimensions])); + for (IndexType i = 0; i < HalfDimensions * sizeof(WeightType) / sizeof(uint64_t); + i += di) + order_fn(&w[i]); + } +#endif } // Read network parameters bool read_parameters(std::istream& stream) { - read_little_endian(stream, biases , HalfDimensions ); - read_little_endian(stream, weights , HalfDimensions * InputDimensions); - read_little_endian(stream, psqtWeights, PSQTBuckets * InputDimensions); + read_leb_128(stream, biases, HalfDimensions); + read_leb_128(stream, weights, HalfDimensions * InputDimensions); + read_leb_128(stream, psqtWeights, PSQTBuckets * InputDimensions); - return !stream.fail(); + permute_weights(inverse_order_packs); + return !stream.fail(); } // Write network parameters bool write_parameters(std::ostream& stream) const { - write_little_endian(stream, biases , HalfDimensions ); - write_little_endian(stream, weights , HalfDimensions * InputDimensions); - write_little_endian(stream, psqtWeights, PSQTBuckets * InputDimensions); + permute_weights(order_packs); - return !stream.fail(); + write_leb_128(stream, biases, HalfDimensions); + write_leb_128(stream, weights, HalfDimensions * InputDimensions); + write_leb_128(stream, psqtWeights, PSQTBuckets * InputDimensions); + + permute_weights(inverse_order_packs); + return !stream.fail(); } // Convert input features - std::int32_t transform(const Position& pos, OutputType* output, int bucket) const { - update_accumulator(pos, WHITE); - update_accumulator(pos, BLACK); + std::int32_t transform(const Position& pos, + AccumulatorCaches::Cache* cache, + OutputType* output, + int bucket) const { + update_accumulator(pos, cache); + update_accumulator(pos, cache); - const Color perspectives[2] = {pos.side_to_move(), ~pos.side_to_move()}; - const auto& accumulation = pos.state()->accumulator.accumulation; - const auto& psqtAccumulation = pos.state()->accumulator.psqtAccumulation; + const Color perspectives[2] = {pos.side_to_move(), ~pos.side_to_move()}; + const auto& psqtAccumulation = (pos.state()->*accPtr).psqtAccumulation; + const auto psqt = + (psqtAccumulation[perspectives[0]][bucket] - psqtAccumulation[perspectives[1]][bucket]) + / 2; - const auto psqt = ( - psqtAccumulation[perspectives[0]][bucket] - - psqtAccumulation[perspectives[1]][bucket] - ) / 2; + const auto& accumulation = (pos.state()->*accPtr).accumulation; + for (IndexType p = 0; p < 2; ++p) + { + const IndexType offset = (HalfDimensions / 2) * p; - for (IndexType p = 0; p < 2; ++p) - { - const IndexType offset = (HalfDimensions / 2) * p; +#if defined(VECTOR) -#if defined(USE_AVX512) + constexpr IndexType OutputChunkSize = MaxChunkSize; + static_assert((HalfDimensions / 2) % OutputChunkSize == 0); + constexpr IndexType NumOutputChunks = HalfDimensions / 2 / OutputChunkSize; - constexpr IndexType OutputChunkSize = 512 / 8; - static_assert((HalfDimensions / 2) % OutputChunkSize == 0); - constexpr IndexType NumOutputChunks = HalfDimensions / 2 / OutputChunkSize; + const vec_t Zero = vec_zero(); + const vec_t One = vec_set_16(127); - const __m512i Zero = _mm512_setzero_si512(); - const __m512i One = _mm512_set1_epi16(127); - const __m512i Control = _mm512_setr_epi64(0, 2, 4, 6, 1, 3, 5, 7); + const vec_t* in0 = reinterpret_cast(&(accumulation[perspectives[p]][0])); + const vec_t* in1 = + reinterpret_cast(&(accumulation[perspectives[p]][HalfDimensions / 2])); + vec_t* out = reinterpret_cast(output + offset); - const __m512i* in0 = reinterpret_cast(&(accumulation[perspectives[p]][0])); - const __m512i* in1 = reinterpret_cast(&(accumulation[perspectives[p]][HalfDimensions / 2])); - __m512i* out = reinterpret_cast< __m512i*>(output + offset); + for (IndexType j = 0; j < NumOutputChunks; ++j) + { + const vec_t sum0a = vec_max_16(vec_min_16(in0[j * 2 + 0], One), Zero); + const vec_t sum0b = vec_max_16(vec_min_16(in0[j * 2 + 1], One), Zero); + const vec_t sum1a = vec_max_16(vec_min_16(in1[j * 2 + 0], One), Zero); + const vec_t sum1b = vec_max_16(vec_min_16(in1[j * 2 + 1], One), Zero); - for (IndexType j = 0; j < NumOutputChunks; j += 1) - { - const __m512i sum0a = _mm512_max_epi16(_mm512_min_epi16(in0[j * 2 + 0], One), Zero); - const __m512i sum0b = _mm512_max_epi16(_mm512_min_epi16(in0[j * 2 + 1], One), Zero); - const __m512i sum1a = _mm512_max_epi16(_mm512_min_epi16(in1[j * 2 + 0], One), Zero); - const __m512i sum1b = _mm512_max_epi16(_mm512_min_epi16(in1[j * 2 + 1], One), Zero); + const vec_t pa = vec_mul_16(sum0a, sum1a); + const vec_t pb = vec_mul_16(sum0b, sum1b); - const __m512i pa = _mm512_srli_epi16(_mm512_mullo_epi16(sum0a, sum1a), 7); - const __m512i pb = _mm512_srli_epi16(_mm512_mullo_epi16(sum0b, sum1b), 7); - - out[j] = _mm512_permutexvar_epi64(Control, _mm512_packs_epi16(pa, pb)); - } - -#elif defined(USE_AVX2) - - constexpr IndexType OutputChunkSize = 256 / 8; - static_assert((HalfDimensions / 2) % OutputChunkSize == 0); - constexpr IndexType NumOutputChunks = HalfDimensions / 2 / OutputChunkSize; - - const __m256i Zero = _mm256_setzero_si256(); - const __m256i One = _mm256_set1_epi16(127); - constexpr int Control = 0b11011000; - - const __m256i* in0 = reinterpret_cast(&(accumulation[perspectives[p]][0])); - const __m256i* in1 = reinterpret_cast(&(accumulation[perspectives[p]][HalfDimensions / 2])); - __m256i* out = reinterpret_cast< __m256i*>(output + offset); - - for (IndexType j = 0; j < NumOutputChunks; j += 1) - { - const __m256i sum0a = _mm256_max_epi16(_mm256_min_epi16(in0[j * 2 + 0], One), Zero); - const __m256i sum0b = _mm256_max_epi16(_mm256_min_epi16(in0[j * 2 + 1], One), Zero); - const __m256i sum1a = _mm256_max_epi16(_mm256_min_epi16(in1[j * 2 + 0], One), Zero); - const __m256i sum1b = _mm256_max_epi16(_mm256_min_epi16(in1[j * 2 + 1], One), Zero); - - const __m256i pa = _mm256_srli_epi16(_mm256_mullo_epi16(sum0a, sum1a), 7); - const __m256i pb = _mm256_srli_epi16(_mm256_mullo_epi16(sum0b, sum1b), 7); - - out[j] = _mm256_permute4x64_epi64(_mm256_packs_epi16(pa, pb), Control); - } - -#elif defined(USE_SSE2) - - constexpr IndexType OutputChunkSize = 128 / 8; - static_assert((HalfDimensions / 2) % OutputChunkSize == 0); - constexpr IndexType NumOutputChunks = HalfDimensions / 2 / OutputChunkSize; - - const __m128i Zero = _mm_setzero_si128(); - const __m128i One = _mm_set1_epi16(127); - - const __m128i* in0 = reinterpret_cast(&(accumulation[perspectives[p]][0])); - const __m128i* in1 = reinterpret_cast(&(accumulation[perspectives[p]][HalfDimensions / 2])); - __m128i* out = reinterpret_cast< __m128i*>(output + offset); - - for (IndexType j = 0; j < NumOutputChunks; j += 1) - { - const __m128i sum0a = _mm_max_epi16(_mm_min_epi16(in0[j * 2 + 0], One), Zero); - const __m128i sum0b = _mm_max_epi16(_mm_min_epi16(in0[j * 2 + 1], One), Zero); - const __m128i sum1a = _mm_max_epi16(_mm_min_epi16(in1[j * 2 + 0], One), Zero); - const __m128i sum1b = _mm_max_epi16(_mm_min_epi16(in1[j * 2 + 1], One), Zero); - - const __m128i pa = _mm_srli_epi16(_mm_mullo_epi16(sum0a, sum1a), 7); - const __m128i pb = _mm_srli_epi16(_mm_mullo_epi16(sum0b, sum1b), 7); - - out[j] = _mm_packs_epi16(pa, pb); - } - -#elif defined(USE_NEON) - - constexpr IndexType OutputChunkSize = 128 / 8; - static_assert((HalfDimensions / 2) % OutputChunkSize == 0); - constexpr IndexType NumOutputChunks = HalfDimensions / 2 / OutputChunkSize; - - const int16x8_t Zero = vdupq_n_s16(0); - const int16x8_t One = vdupq_n_s16(127); - - const int16x8_t* in0 = reinterpret_cast(&(accumulation[perspectives[p]][0])); - const int16x8_t* in1 = reinterpret_cast(&(accumulation[perspectives[p]][HalfDimensions / 2])); - int8x16_t* out = reinterpret_cast< int8x16_t*>(output + offset); - - for (IndexType j = 0; j < NumOutputChunks; j += 1) - { - const int16x8_t sum0a = vmaxq_s16(vminq_s16(in0[j * 2 + 0], One), Zero); - const int16x8_t sum0b = vmaxq_s16(vminq_s16(in0[j * 2 + 1], One), Zero); - const int16x8_t sum1a = vmaxq_s16(vminq_s16(in1[j * 2 + 0], One), Zero); - const int16x8_t sum1b = vmaxq_s16(vminq_s16(in1[j * 2 + 1], One), Zero); - - const int8x8_t pa = vshrn_n_s16(vmulq_s16(sum0a, sum1a), 7); - const int8x8_t pb = vshrn_n_s16(vmulq_s16(sum0b, sum1b), 7); - - out[j] = vcombine_s8(pa, pb); - } + out[j] = vec_msb_pack_16(pa, pb); + } #else - for (IndexType j = 0; j < HalfDimensions / 2; ++j) { - BiasType sum0 = accumulation[static_cast(perspectives[p])][j + 0]; - BiasType sum1 = accumulation[static_cast(perspectives[p])][j + HalfDimensions / 2]; - sum0 = std::max(0, std::min(127, sum0)); - sum1 = std::max(0, std::min(127, sum1)); - output[offset + j] = static_cast(sum0 * sum1 / 128); - } + for (IndexType j = 0; j < HalfDimensions / 2; ++j) + { + BiasType sum0 = accumulation[static_cast(perspectives[p])][j + 0]; + BiasType sum1 = + accumulation[static_cast(perspectives[p])][j + HalfDimensions / 2]; + sum0 = std::clamp(sum0, 0, 127); + sum1 = std::clamp(sum1, 0, 127); + output[offset + j] = static_cast(unsigned(sum0 * sum1) / 128); + } #endif - } - - return psqt; - - } // end of function transform() + } + return psqt; + } // end of function transform() + void hint_common_access(const Position& pos, + AccumulatorCaches::Cache* cache) const { + hint_common_access_for_perspective(pos, cache); + hint_common_access_for_perspective(pos, cache); + } private: - void update_accumulator(const Position& pos, const Color perspective) const { + template + [[nodiscard]] std::pair + try_find_computed_accumulator(const Position& pos) const { + // Look for a usable accumulator of an earlier position. We keep track + // of the estimated gain in terms of features to be added/subtracted. + StateInfo *st = pos.state(), *next = nullptr; + int gain = FeatureSet::refresh_cost(pos); + while (st->previous && !(st->*accPtr).computed[Perspective]) + { + // This governs when a full feature refresh is needed and how many + // updates are better than just one full refresh. + if (FeatureSet::requires_refresh(st, Perspective) + || (gain -= FeatureSet::update_cost(st) + 1) < 0) + break; + next = st; + st = st->previous; + } + return {st, next}; + } - // The size must be enough to contain the largest possible update. - // That might depend on the feature set and generally relies on the - // feature set's update cost calculation to be correct and never - // allow updates with more added/removed features than MaxActiveDimensions. + // NOTE: The parameter states_to_update is an array of position states. + // All states must be sequential, that is states_to_update[i] must either be reachable + // by repeatedly applying ->previous from states_to_update[i+1]. + // computed_st must be reachable by repeatedly applying ->previous on + // states_to_update[0]. + template + void update_accumulator_incremental(const Position& pos, + StateInfo* computed_st, + StateInfo* states_to_update[N]) const { + static_assert(N > 0); + assert([&]() { + for (size_t i = 0; i < N; ++i) + { + if (states_to_update[i] == nullptr) + return false; + } + return true; + }()); - #ifdef VECTOR - // Gcc-10.2 unnecessarily spills AVX2 registers if this array - // is defined in the VECTOR code below, once in each branch - vec_t acc[NumRegs]; - psqt_vec_t psqt[NumPsqtRegs]; - #endif - - // Look for a usable accumulator of an earlier position. We keep track - // of the estimated gain in terms of features to be added/subtracted. - StateInfo *st = pos.state(), *next = nullptr; - int gain = FeatureSet::refresh_cost(pos); - while (st->previous && !st->accumulator.computed[perspective]) - { - // This governs when a full feature refresh is needed and how many - // updates are better than just one full refresh. - if ( FeatureSet::requires_refresh(st, perspective) - || (gain -= FeatureSet::update_cost(st) + 1) < 0) - break; - next = st; - st = st->previous; - } - - if (st->accumulator.computed[perspective]) - { - if (next == nullptr) - return; - - // Update incrementally in two steps. First, we update the "next" - // accumulator. Then, we update the current accumulator (pos.state()). +#ifdef VECTOR + // Gcc-10.2 unnecessarily spills AVX2 registers if this array + // is defined in the VECTOR code below, once in each branch + vec_t acc[NumRegs]; + psqt_vec_t psqt[NumPsqtRegs]; +#endif + // Update incrementally going back through states_to_update. // Gather all features to be updated. - const Square ksq = pos.square(perspective); - FeatureSet::IndexList removed[2], added[2]; - FeatureSet::append_changed_indices( - ksq, next->dirtyPiece, perspective, removed[0], added[0]); - for (StateInfo *st2 = pos.state(); st2 != next; st2 = st2->previous) - FeatureSet::append_changed_indices( - ksq, st2->dirtyPiece, perspective, removed[1], added[1]); + const Square ksq = pos.square(Perspective); - // Mark the accumulators as computed. - next->accumulator.computed[perspective] = true; - pos.state()->accumulator.computed[perspective] = true; + // The size must be enough to contain the largest possible update. + // That might depend on the feature set and generally relies on the + // feature set's update cost calculation to be correct and never allow + // updates with more added/removed features than MaxActiveDimensions. + FeatureSet::IndexList removed[N], added[N]; + + for (int i = N - 1; i >= 0; --i) + { + (states_to_update[i]->*accPtr).computed[Perspective] = true; + + const StateInfo* end_state = i == 0 ? computed_st : states_to_update[i - 1]; + + for (StateInfo* st2 = states_to_update[i]; st2 != end_state; st2 = st2->previous) + FeatureSet::append_changed_indices(ksq, st2->dirtyPiece, removed[i], + added[i]); + } + + StateInfo* st = computed_st; // Now update the accumulators listed in states_to_update[], where the last element is a sentinel. - StateInfo *states_to_update[3] = - { next, next == pos.state() ? nullptr : pos.state(), nullptr }; - #ifdef VECTOR - for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j) - { - // Load accumulator - auto accTile = reinterpret_cast( - &st->accumulator.accumulation[perspective][j * TileHeight]); - for (IndexType k = 0; k < NumRegs; ++k) - acc[k] = vec_load(&accTile[k]); +#ifdef VECTOR + + if (N == 1 && (removed[0].size() == 1 || removed[0].size() == 2) && added[0].size() == 1) + { + assert(states_to_update[0]); + + auto accIn = + reinterpret_cast(&(st->*accPtr).accumulation[Perspective][0]); + auto accOut = reinterpret_cast( + &(states_to_update[0]->*accPtr).accumulation[Perspective][0]); + + const IndexType offsetR0 = HalfDimensions * removed[0][0]; + auto columnR0 = reinterpret_cast(&weights[offsetR0]); + const IndexType offsetA = HalfDimensions * added[0][0]; + auto columnA = reinterpret_cast(&weights[offsetA]); + + if (removed[0].size() == 1) + { + for (IndexType k = 0; k < HalfDimensions * sizeof(std::int16_t) / sizeof(vec_t); + ++k) + accOut[k] = vec_add_16(vec_sub_16(accIn[k], columnR0[k]), columnA[k]); + } + else + { + const IndexType offsetR1 = HalfDimensions * removed[0][1]; + auto columnR1 = reinterpret_cast(&weights[offsetR1]); + + for (IndexType k = 0; k < HalfDimensions * sizeof(std::int16_t) / sizeof(vec_t); + ++k) + accOut[k] = vec_sub_16(vec_add_16(accIn[k], columnA[k]), + vec_add_16(columnR0[k], columnR1[k])); + } + + auto accPsqtIn = + reinterpret_cast(&(st->*accPtr).psqtAccumulation[Perspective][0]); + auto accPsqtOut = reinterpret_cast( + &(states_to_update[0]->*accPtr).psqtAccumulation[Perspective][0]); + + const IndexType offsetPsqtR0 = PSQTBuckets * removed[0][0]; + auto columnPsqtR0 = reinterpret_cast(&psqtWeights[offsetPsqtR0]); + const IndexType offsetPsqtA = PSQTBuckets * added[0][0]; + auto columnPsqtA = reinterpret_cast(&psqtWeights[offsetPsqtA]); + + if (removed[0].size() == 1) + { + for (std::size_t k = 0; k < PSQTBuckets * sizeof(std::int32_t) / sizeof(psqt_vec_t); + ++k) + accPsqtOut[k] = vec_add_psqt_32(vec_sub_psqt_32(accPsqtIn[k], columnPsqtR0[k]), + columnPsqtA[k]); + } + else + { + const IndexType offsetPsqtR1 = PSQTBuckets * removed[0][1]; + auto columnPsqtR1 = reinterpret_cast(&psqtWeights[offsetPsqtR1]); + + for (std::size_t k = 0; k < PSQTBuckets * sizeof(std::int32_t) / sizeof(psqt_vec_t); + ++k) + accPsqtOut[k] = + vec_sub_psqt_32(vec_add_psqt_32(accPsqtIn[k], columnPsqtA[k]), + vec_add_psqt_32(columnPsqtR0[k], columnPsqtR1[k])); + } + } + else + { + for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j) + { + // Load accumulator + auto accTileIn = reinterpret_cast( + &(st->*accPtr).accumulation[Perspective][j * TileHeight]); + for (IndexType k = 0; k < NumRegs; ++k) + acc[k] = vec_load(&accTileIn[k]); + + for (IndexType i = 0; i < N; ++i) + { + // Difference calculation for the deactivated features + for (const auto index : removed[i]) + { + const IndexType offset = HalfDimensions * index + j * TileHeight; + auto column = reinterpret_cast(&weights[offset]); + for (IndexType k = 0; k < NumRegs; ++k) + acc[k] = vec_sub_16(acc[k], column[k]); + } + + // Difference calculation for the activated features + for (const auto index : added[i]) + { + const IndexType offset = HalfDimensions * index + j * TileHeight; + auto column = reinterpret_cast(&weights[offset]); + for (IndexType k = 0; k < NumRegs; ++k) + acc[k] = vec_add_16(acc[k], column[k]); + } + + // Store accumulator + auto accTileOut = reinterpret_cast( + &(states_to_update[i]->*accPtr).accumulation[Perspective][j * TileHeight]); + for (IndexType k = 0; k < NumRegs; ++k) + vec_store(&accTileOut[k], acc[k]); + } + } + + for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j) + { + // Load accumulator + auto accTilePsqtIn = reinterpret_cast( + &(st->*accPtr).psqtAccumulation[Perspective][j * PsqtTileHeight]); + for (std::size_t k = 0; k < NumPsqtRegs; ++k) + psqt[k] = vec_load_psqt(&accTilePsqtIn[k]); + + for (IndexType i = 0; i < N; ++i) + { + // Difference calculation for the deactivated features + for (const auto index : removed[i]) + { + const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight; + auto columnPsqt = reinterpret_cast(&psqtWeights[offset]); + for (std::size_t k = 0; k < NumPsqtRegs; ++k) + psqt[k] = vec_sub_psqt_32(psqt[k], columnPsqt[k]); + } + + // Difference calculation for the activated features + for (const auto index : added[i]) + { + const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight; + auto columnPsqt = reinterpret_cast(&psqtWeights[offset]); + for (std::size_t k = 0; k < NumPsqtRegs; ++k) + psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]); + } + + // Store accumulator + auto accTilePsqtOut = reinterpret_cast( + &(states_to_update[i]->*accPtr) + .psqtAccumulation[Perspective][j * PsqtTileHeight]); + for (std::size_t k = 0; k < NumPsqtRegs; ++k) + vec_store_psqt(&accTilePsqtOut[k], psqt[k]); + } + } + } +#else + for (IndexType i = 0; i < N; ++i) + { + std::memcpy((states_to_update[i]->*accPtr).accumulation[Perspective], + (st->*accPtr).accumulation[Perspective], HalfDimensions * sizeof(BiasType)); + + for (std::size_t k = 0; k < PSQTBuckets; ++k) + (states_to_update[i]->*accPtr).psqtAccumulation[Perspective][k] = + (st->*accPtr).psqtAccumulation[Perspective][k]; + + st = states_to_update[i]; - for (IndexType i = 0; states_to_update[i]; ++i) - { // Difference calculation for the deactivated features for (const auto index : removed[i]) { - const IndexType offset = HalfDimensions * index + j * TileHeight; - auto column = reinterpret_cast(&weights[offset]); - for (IndexType k = 0; k < NumRegs; ++k) - acc[k] = vec_sub_16(acc[k], column[k]); + const IndexType offset = HalfDimensions * index; + for (IndexType j = 0; j < HalfDimensions; ++j) + (st->*accPtr).accumulation[Perspective][j] -= weights[offset + j]; + + for (std::size_t k = 0; k < PSQTBuckets; ++k) + (st->*accPtr).psqtAccumulation[Perspective][k] -= + psqtWeights[index * PSQTBuckets + k]; } // Difference calculation for the activated features for (const auto index : added[i]) { - const IndexType offset = HalfDimensions * index + j * TileHeight; - auto column = reinterpret_cast(&weights[offset]); - for (IndexType k = 0; k < NumRegs; ++k) - acc[k] = vec_add_16(acc[k], column[k]); + const IndexType offset = HalfDimensions * index; + for (IndexType j = 0; j < HalfDimensions; ++j) + (st->*accPtr).accumulation[Perspective][j] += weights[offset + j]; + + for (std::size_t k = 0; k < PSQTBuckets; ++k) + (st->*accPtr).psqtAccumulation[Perspective][k] += + psqtWeights[index * PSQTBuckets + k]; } - - // Store accumulator - accTile = reinterpret_cast( - &states_to_update[i]->accumulator.accumulation[perspective][j * TileHeight]); - for (IndexType k = 0; k < NumRegs; ++k) - vec_store(&accTile[k], acc[k]); - } } - - for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j) - { - // Load accumulator - auto accTilePsqt = reinterpret_cast( - &st->accumulator.psqtAccumulation[perspective][j * PsqtTileHeight]); - for (std::size_t k = 0; k < NumPsqtRegs; ++k) - psqt[k] = vec_load_psqt(&accTilePsqt[k]); - - for (IndexType i = 0; states_to_update[i]; ++i) - { - // Difference calculation for the deactivated features - for (const auto index : removed[i]) - { - const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight; - auto columnPsqt = reinterpret_cast(&psqtWeights[offset]); - for (std::size_t k = 0; k < NumPsqtRegs; ++k) - psqt[k] = vec_sub_psqt_32(psqt[k], columnPsqt[k]); - } - - // Difference calculation for the activated features - for (const auto index : added[i]) - { - const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight; - auto columnPsqt = reinterpret_cast(&psqtWeights[offset]); - for (std::size_t k = 0; k < NumPsqtRegs; ++k) - psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]); - } - - // Store accumulator - accTilePsqt = reinterpret_cast( - &states_to_update[i]->accumulator.psqtAccumulation[perspective][j * PsqtTileHeight]); - for (std::size_t k = 0; k < NumPsqtRegs; ++k) - vec_store_psqt(&accTilePsqt[k], psqt[k]); - } - } - - #else - for (IndexType i = 0; states_to_update[i]; ++i) - { - std::memcpy(states_to_update[i]->accumulator.accumulation[perspective], - st->accumulator.accumulation[perspective], - HalfDimensions * sizeof(BiasType)); - - for (std::size_t k = 0; k < PSQTBuckets; ++k) - states_to_update[i]->accumulator.psqtAccumulation[perspective][k] = st->accumulator.psqtAccumulation[perspective][k]; - - st = states_to_update[i]; - - // Difference calculation for the deactivated features - for (const auto index : removed[i]) - { - const IndexType offset = HalfDimensions * index; - - for (IndexType j = 0; j < HalfDimensions; ++j) - st->accumulator.accumulation[perspective][j] -= weights[offset + j]; - - for (std::size_t k = 0; k < PSQTBuckets; ++k) - st->accumulator.psqtAccumulation[perspective][k] -= psqtWeights[index * PSQTBuckets + k]; - } - - // Difference calculation for the activated features - for (const auto index : added[i]) - { - const IndexType offset = HalfDimensions * index; - - for (IndexType j = 0; j < HalfDimensions; ++j) - st->accumulator.accumulation[perspective][j] += weights[offset + j]; - - for (std::size_t k = 0; k < PSQTBuckets; ++k) - st->accumulator.psqtAccumulation[perspective][k] += psqtWeights[index * PSQTBuckets + k]; - } - } - #endif - } - else - { - // Refresh the accumulator - auto& accumulator = pos.state()->accumulator; - accumulator.computed[perspective] = true; - FeatureSet::IndexList active; - FeatureSet::append_active_indices(pos, perspective, active); - - #ifdef VECTOR - for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j) - { - auto biasesTile = reinterpret_cast( - &biases[j * TileHeight]); - for (IndexType k = 0; k < NumRegs; ++k) - acc[k] = biasesTile[k]; - - for (const auto index : active) - { - const IndexType offset = HalfDimensions * index + j * TileHeight; - auto column = reinterpret_cast(&weights[offset]); - - for (unsigned k = 0; k < NumRegs; ++k) - acc[k] = vec_add_16(acc[k], column[k]); - } - - auto accTile = reinterpret_cast( - &accumulator.accumulation[perspective][j * TileHeight]); - for (unsigned k = 0; k < NumRegs; k++) - vec_store(&accTile[k], acc[k]); - } - - for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j) - { - for (std::size_t k = 0; k < NumPsqtRegs; ++k) - psqt[k] = vec_zero_psqt(); - - for (const auto index : active) - { - const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight; - auto columnPsqt = reinterpret_cast(&psqtWeights[offset]); - - for (std::size_t k = 0; k < NumPsqtRegs; ++k) - psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]); - } - - auto accTilePsqt = reinterpret_cast( - &accumulator.psqtAccumulation[perspective][j * PsqtTileHeight]); - for (std::size_t k = 0; k < NumPsqtRegs; ++k) - vec_store_psqt(&accTilePsqt[k], psqt[k]); - } - - #else - std::memcpy(accumulator.accumulation[perspective], biases, - HalfDimensions * sizeof(BiasType)); - - for (std::size_t k = 0; k < PSQTBuckets; ++k) - accumulator.psqtAccumulation[perspective][k] = 0; - - for (const auto index : active) - { - const IndexType offset = HalfDimensions * index; - - for (IndexType j = 0; j < HalfDimensions; ++j) - accumulator.accumulation[perspective][j] += weights[offset + j]; - - for (std::size_t k = 0; k < PSQTBuckets; ++k) - accumulator.psqtAccumulation[perspective][k] += psqtWeights[index * PSQTBuckets + k]; - } - #endif - } - - #if defined(USE_MMX) - _mm_empty(); - #endif +#endif } + template + void update_accumulator_refresh_cache(const Position& pos, + AccumulatorCaches::Cache* cache) const { + assert(cache != nullptr); + + Square ksq = pos.square(Perspective); + auto& entry = (*cache)[ksq][Perspective]; + FeatureSet::IndexList removed, added; + + for (Color c : {WHITE, BLACK}) + { + for (PieceType pt = PAWN; pt <= KING; ++pt) + { + const Piece piece = make_piece(c, pt); + const Bitboard oldBB = entry.byColorBB[c] & entry.byTypeBB[pt]; + const Bitboard newBB = pos.pieces(c, pt); + Bitboard toRemove = oldBB & ~newBB; + Bitboard toAdd = newBB & ~oldBB; + + while (toRemove) + { + Square sq = pop_lsb(toRemove); + removed.push_back(FeatureSet::make_index(sq, piece, ksq)); + } + while (toAdd) + { + Square sq = pop_lsb(toAdd); + added.push_back(FeatureSet::make_index(sq, piece, ksq)); + } + } + } + + auto& accumulator = pos.state()->*accPtr; + accumulator.computed[Perspective] = true; + +#ifdef VECTOR + vec_t acc[NumRegs]; + psqt_vec_t psqt[NumPsqtRegs]; + + for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j) + { + auto entryTile = reinterpret_cast(&entry.accumulation[j * TileHeight]); + for (IndexType k = 0; k < NumRegs; ++k) + acc[k] = entryTile[k]; + + int i = 0; + for (; i < int(std::min(removed.size(), added.size())); ++i) + { + IndexType indexR = removed[i]; + const IndexType offsetR = HalfDimensions * indexR + j * TileHeight; + auto columnR = reinterpret_cast(&weights[offsetR]); + IndexType indexA = added[i]; + const IndexType offsetA = HalfDimensions * indexA + j * TileHeight; + auto columnA = reinterpret_cast(&weights[offsetA]); + + for (unsigned k = 0; k < NumRegs; ++k) + acc[k] = vec_add_16(vec_sub_16(acc[k], columnR[k]), columnA[k]); + } + for (; i < int(removed.size()); ++i) + { + IndexType index = removed[i]; + const IndexType offset = HalfDimensions * index + j * TileHeight; + auto column = reinterpret_cast(&weights[offset]); + + for (unsigned k = 0; k < NumRegs; ++k) + acc[k] = vec_sub_16(acc[k], column[k]); + } + for (; i < int(added.size()); ++i) + { + IndexType index = added[i]; + const IndexType offset = HalfDimensions * index + j * TileHeight; + auto column = reinterpret_cast(&weights[offset]); + + for (unsigned k = 0; k < NumRegs; ++k) + acc[k] = vec_add_16(acc[k], column[k]); + } + + for (IndexType k = 0; k < NumRegs; k++) + vec_store(&entryTile[k], acc[k]); + } + + for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j) + { + auto entryTilePsqt = + reinterpret_cast(&entry.psqtAccumulation[j * PsqtTileHeight]); + for (std::size_t k = 0; k < NumPsqtRegs; ++k) + psqt[k] = entryTilePsqt[k]; + + for (int i = 0; i < int(removed.size()); ++i) + { + IndexType index = removed[i]; + const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight; + auto columnPsqt = reinterpret_cast(&psqtWeights[offset]); + + for (std::size_t k = 0; k < NumPsqtRegs; ++k) + psqt[k] = vec_sub_psqt_32(psqt[k], columnPsqt[k]); + } + for (int i = 0; i < int(added.size()); ++i) + { + IndexType index = added[i]; + const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight; + auto columnPsqt = reinterpret_cast(&psqtWeights[offset]); + + for (std::size_t k = 0; k < NumPsqtRegs; ++k) + psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]); + } + + for (std::size_t k = 0; k < NumPsqtRegs; ++k) + vec_store_psqt(&entryTilePsqt[k], psqt[k]); + } + +#else + + for (const auto index : removed) + { + const IndexType offset = HalfDimensions * index; + for (IndexType j = 0; j < HalfDimensions; ++j) + entry.accumulation[j] -= weights[offset + j]; + + for (std::size_t k = 0; k < PSQTBuckets; ++k) + entry.psqtAccumulation[k] -= psqtWeights[index * PSQTBuckets + k]; + } + for (const auto index : added) + { + const IndexType offset = HalfDimensions * index; + for (IndexType j = 0; j < HalfDimensions; ++j) + entry.accumulation[j] += weights[offset + j]; + + for (std::size_t k = 0; k < PSQTBuckets; ++k) + entry.psqtAccumulation[k] += psqtWeights[index * PSQTBuckets + k]; + } + +#endif + + // The accumulator of the refresh entry has been updated. + // Now copy its content to the actual accumulator we were refreshing + + std::memcpy(accumulator.accumulation[Perspective], entry.accumulation, + sizeof(BiasType) * HalfDimensions); + + std::memcpy(accumulator.psqtAccumulation[Perspective], entry.psqtAccumulation, + sizeof(int32_t) * PSQTBuckets); + + for (Color c : {WHITE, BLACK}) + entry.byColorBB[c] = pos.pieces(c); + + for (PieceType pt = PAWN; pt <= KING; ++pt) + entry.byTypeBB[pt] = pos.pieces(pt); + } + + template + void hint_common_access_for_perspective(const Position& pos, + AccumulatorCaches::Cache* cache) const { + + // Works like update_accumulator, but performs less work. + // Updates ONLY the accumulator for pos. + + // Look for a usable accumulator of an earlier position. We keep track + // of the estimated gain in terms of features to be added/subtracted. + // Fast early exit. + if ((pos.state()->*accPtr).computed[Perspective]) + return; + + auto [oldest_st, _] = try_find_computed_accumulator(pos); + + if ((oldest_st->*accPtr).computed[Perspective]) + { + // Only update current position accumulator to minimize work. + StateInfo* states_to_update[1] = {pos.state()}; + update_accumulator_incremental(pos, oldest_st, states_to_update); + } + else + update_accumulator_refresh_cache(pos, cache); + } + + template + void update_accumulator(const Position& pos, + AccumulatorCaches::Cache* cache) const { + + auto [oldest_st, next] = try_find_computed_accumulator(pos); + + if ((oldest_st->*accPtr).computed[Perspective]) + { + if (next == nullptr) + return; + + // Now update the accumulators listed in states_to_update[], where the last element is a sentinel. + // Currently we update 2 accumulators. + // 1. for the current position + // 2. the next accumulator after the computed one + // The heuristic may change in the future. + if (next == pos.state()) + { + StateInfo* states_to_update[1] = {next}; + + update_accumulator_incremental(pos, oldest_st, states_to_update); + } + else + { + StateInfo* states_to_update[2] = {next, pos.state()}; + + update_accumulator_incremental(pos, oldest_st, states_to_update); + } + } + else + update_accumulator_refresh_cache(pos, cache); + } + + template + friend struct AccumulatorCaches::Cache; + alignas(CacheLineSize) BiasType biases[HalfDimensions]; alignas(CacheLineSize) WeightType weights[HalfDimensions * InputDimensions]; alignas(CacheLineSize) PSQTWeightType psqtWeights[InputDimensions * PSQTBuckets]; - }; +}; } // namespace Stockfish::Eval::NNUE -#endif // #ifndef NNUE_FEATURE_TRANSFORMER_H_INCLUDED +#endif // #ifndef NNUE_FEATURE_TRANSFORMER_H_INCLUDED diff --git a/src/nnue/nnue_misc.cpp b/src/nnue/nnue_misc.cpp new file mode 100644 index 00000000..bf73a58b --- /dev/null +++ b/src/nnue/nnue_misc.cpp @@ -0,0 +1,202 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +// Code for calculating NNUE evaluation function + +#include "nnue_misc.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../evaluate.h" +#include "../position.h" +#include "../types.h" +#include "../uci.h" +#include "network.h" +#include "nnue_accumulator.h" + +namespace Stockfish::Eval::NNUE { + + +constexpr std::string_view PieceToChar(" PNBRQK pnbrqk"); + + +void hint_common_parent_position(const Position& pos, + const Networks& networks, + AccumulatorCaches& caches) { + + int simpleEvalAbs = std::abs(simple_eval(pos, pos.side_to_move())); + if (simpleEvalAbs > Eval::SmallNetThreshold) + networks.small.hint_common_access(pos, &caches.small); + else + networks.big.hint_common_access(pos, &caches.big); +} + +namespace { +// Converts a Value into (centi)pawns and writes it in a buffer. +// The buffer must have capacity for at least 5 chars. +void format_cp_compact(Value v, char* buffer, const Position& pos) { + + buffer[0] = (v < 0 ? '-' : v > 0 ? '+' : ' '); + + int cp = std::abs(UCIEngine::to_cp(v, pos)); + if (cp >= 10000) + { + buffer[1] = '0' + cp / 10000; + cp %= 10000; + buffer[2] = '0' + cp / 1000; + cp %= 1000; + buffer[3] = '0' + cp / 100; + buffer[4] = ' '; + } + else if (cp >= 1000) + { + buffer[1] = '0' + cp / 1000; + cp %= 1000; + buffer[2] = '0' + cp / 100; + cp %= 100; + buffer[3] = '.'; + buffer[4] = '0' + cp / 10; + } + else + { + buffer[1] = '0' + cp / 100; + cp %= 100; + buffer[2] = '.'; + buffer[3] = '0' + cp / 10; + cp %= 10; + buffer[4] = '0' + cp / 1; + } +} + + +// Converts a Value into pawns, always keeping two decimals +void format_cp_aligned_dot(Value v, std::stringstream& stream, const Position& pos) { + + const double pawns = std::abs(0.01 * UCIEngine::to_cp(v, pos)); + + stream << (v < 0 ? '-' + : v > 0 ? '+' + : ' ') + << std::setiosflags(std::ios::fixed) << std::setw(6) << std::setprecision(2) << pawns; +} +} + + +// Returns a string with the value of each piece on a board, +// and a table for (PSQT, Layers) values bucket by bucket. +std::string +trace(Position& pos, const Eval::NNUE::Networks& networks, Eval::NNUE::AccumulatorCaches& caches) { + + std::stringstream ss; + + char board[3 * 8 + 1][8 * 8 + 2]; + std::memset(board, ' ', sizeof(board)); + for (int row = 0; row < 3 * 8 + 1; ++row) + board[row][8 * 8 + 1] = '\0'; + + // A lambda to output one box of the board + auto writeSquare = [&board, &pos](File file, Rank rank, Piece pc, Value value) { + const int x = int(file) * 8; + const int y = (7 - int(rank)) * 3; + for (int i = 1; i < 8; ++i) + board[y][x + i] = board[y + 3][x + i] = '-'; + for (int i = 1; i < 3; ++i) + board[y + i][x] = board[y + i][x + 8] = '|'; + board[y][x] = board[y][x + 8] = board[y + 3][x + 8] = board[y + 3][x] = '+'; + if (pc != NO_PIECE) + board[y + 1][x + 4] = PieceToChar[pc]; + if (value != VALUE_NONE) + format_cp_compact(value, &board[y + 2][x + 2], pos); + }; + + // We estimate the value of each piece by doing a differential evaluation from + // the current base eval, simulating the removal of the piece from its square. + Value base = networks.big.evaluate(pos, &caches.big); + base = pos.side_to_move() == WHITE ? base : -base; + + for (File f = FILE_A; f <= FILE_H; ++f) + for (Rank r = RANK_1; r <= RANK_8; ++r) + { + Square sq = make_square(f, r); + Piece pc = pos.piece_on(sq); + Value v = VALUE_NONE; + + if (pc != NO_PIECE && type_of(pc) != KING) + { + auto st = pos.state(); + + pos.remove_piece(sq); + st->accumulatorBig.computed[WHITE] = st->accumulatorBig.computed[BLACK] = false; + + Value eval = networks.big.evaluate(pos, &caches.big); + eval = pos.side_to_move() == WHITE ? eval : -eval; + v = base - eval; + + pos.put_piece(pc, sq); + st->accumulatorBig.computed[WHITE] = st->accumulatorBig.computed[BLACK] = false; + } + + writeSquare(f, r, pc, v); + } + + ss << " NNUE derived piece values:\n"; + for (int row = 0; row < 3 * 8 + 1; ++row) + ss << board[row] << '\n'; + ss << '\n'; + + auto t = networks.big.trace_evaluate(pos, &caches.big); + + ss << " NNUE network contributions " + << (pos.side_to_move() == WHITE ? "(White to move)" : "(Black to move)") << std::endl + << "+------------+------------+------------+------------+\n" + << "| Bucket | Material | Positional | Total |\n" + << "| | (PSQT) | (Layers) | |\n" + << "+------------+------------+------------+------------+\n"; + + for (std::size_t bucket = 0; bucket < LayerStacks; ++bucket) + { + ss << "| " << bucket << " "; + ss << " | "; + format_cp_aligned_dot(t.psqt[bucket], ss, pos); + ss << " " + << " | "; + format_cp_aligned_dot(t.positional[bucket], ss, pos); + ss << " " + << " | "; + format_cp_aligned_dot(t.psqt[bucket] + t.positional[bucket], ss, pos); + ss << " " + << " |"; + if (bucket == t.correctBucket) + ss << " <-- this bucket is used"; + ss << '\n'; + } + + ss << "+------------+------------+------------+------------+\n"; + + return ss.str(); +} + + +} // namespace Stockfish::Eval::NNUE diff --git a/src/nnue/nnue_misc.h b/src/nnue/nnue_misc.h new file mode 100644 index 00000000..27a93f88 --- /dev/null +++ b/src/nnue/nnue_misc.h @@ -0,0 +1,64 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifndef NNUE_MISC_H_INCLUDED +#define NNUE_MISC_H_INCLUDED + +#include +#include + +#include "../types.h" +#include "nnue_architecture.h" + +namespace Stockfish { + +class Position; + +namespace Eval::NNUE { + +struct EvalFile { + // Default net name, will use one of the EvalFileDefaultName* macros defined + // in evaluate.h + std::string defaultName; + // Selected net name, either via uci option or default + std::string current; + // Net description extracted from the net file + std::string netDescription; +}; + + +struct NnueEvalTrace { + static_assert(LayerStacks == PSQTBuckets); + + Value psqt[LayerStacks]; + Value positional[LayerStacks]; + std::size_t correctBucket; +}; + +struct Networks; +struct AccumulatorCaches; + +std::string trace(Position& pos, const Networks& networks, AccumulatorCaches& caches); +void hint_common_parent_position(const Position& pos, + const Networks& networks, + AccumulatorCaches& caches); + +} // namespace Stockfish::Eval::NNUE +} // namespace Stockfish + +#endif // #ifndef NNUE_MISC_H_INCLUDED diff --git a/src/pawns.cpp b/src/pawns.cpp deleted file mode 100644 index 6e509133..00000000 --- a/src/pawns.cpp +++ /dev/null @@ -1,305 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include -#include - -#include "bitboard.h" -#include "pawns.h" -#include "position.h" -#include "thread.h" - -namespace Stockfish { - -namespace { - - #define V Value - #define S(mg, eg) make_score(mg, eg) - - // Pawn penalties - constexpr Score Backward = S( 9, 22); - constexpr Score Doubled = S(13, 51); - constexpr Score DoubledEarly = S(20, 7); - constexpr Score Isolated = S( 3, 15); - constexpr Score WeakLever = S( 4, 58); - constexpr Score WeakUnopposed = S(13, 24); - - // Bonus for blocked pawns at 5th or 6th rank - constexpr Score BlockedPawn[2] = { S(-17, -6), S(-9, 2) }; - - constexpr Score BlockedStorm[RANK_NB] = { - S(0, 0), S(0, 0), S(75, 78), S(-8, 16), S(-6, 10), S(-6, 6), S(0, 2) - }; - - // Connected pawn bonus - constexpr int Connected[RANK_NB] = { 0, 5, 7, 11, 23, 48, 87 }; - - // Strength of pawn shelter for our king by [distance from edge][rank]. - // RANK_1 = 0 is used for files where we have no pawn, or pawn is behind our king. - constexpr Value ShelterStrength[int(FILE_NB) / 2][RANK_NB] = { - { V( -5), V( 82), V( 92), V( 54), V( 36), V( 22), V( 28) }, - { V(-44), V( 63), V( 33), V(-50), V(-30), V(-12), V( -62) }, - { V(-11), V( 77), V( 22), V( -6), V( 31), V( 8), V( -45) }, - { V(-39), V(-12), V(-29), V(-50), V(-43), V(-68), V(-164) } - }; - - // Danger of enemy pawns moving toward our king by [distance from edge][rank]. - // RANK_1 = 0 is used for files where the enemy has no pawn, or their pawn - // is behind our king. Note that UnblockedStorm[0][1-2] accommodate opponent pawn - // on edge, likely blocked by our king. - constexpr Value UnblockedStorm[int(FILE_NB) / 2][RANK_NB] = { - { V( 87), V(-288), V(-168), V( 96), V( 47), V( 44), V( 46) }, - { V( 42), V( -25), V( 120), V( 45), V( 34), V( -9), V( 24) }, - { V( -8), V( 51), V( 167), V( 35), V( -4), V(-16), V(-12) }, - { V(-17), V( -13), V( 100), V( 4), V( 9), V(-16), V(-31) } - }; - - - // KingOnFile[semi-open Us][semi-open Them] contains bonuses/penalties - // for king when the king is on a semi-open or open file. - constexpr Score KingOnFile[2][2] = {{ S(-21,10), S(-7, 1) }, - { S( 0,-3), S( 9,-4) }}; - - #undef S - #undef V - - - /// evaluate() calculates a score for the static pawn structure of the given position. - /// We cannot use the location of pieces or king in this function, as the evaluation - /// of the pawn structure will be stored in a small cache for speed reasons, and will - /// be re-used even when the pieces have moved. - - template - Score evaluate(const Position& pos, Pawns::Entry* e) { - - constexpr Color Them = ~Us; - constexpr Direction Up = pawn_push(Us); - constexpr Direction Down = -Up; - - Bitboard neighbours, stoppers, support, phalanx, opposed; - Bitboard lever, leverPush, blocked; - Square s; - bool backward, passed, doubled; - Score score = SCORE_ZERO; - Bitboard b = pos.pieces(Us, PAWN); - - Bitboard ourPawns = pos.pieces( Us, PAWN); - Bitboard theirPawns = pos.pieces(Them, PAWN); - - Bitboard doubleAttackThem = pawn_double_attacks_bb(theirPawns); - - e->passedPawns[Us] = 0; - e->kingSquares[Us] = SQ_NONE; - e->pawnAttacks[Us] = e->pawnAttacksSpan[Us] = pawn_attacks_bb(ourPawns); - e->blockedCount += popcount(shift(ourPawns) & (theirPawns | doubleAttackThem)); - - // Loop through all pawns of the current color and score each pawn - while (b) - { - s = pop_lsb(b); - - assert(pos.piece_on(s) == make_piece(Us, PAWN)); - - Rank r = relative_rank(Us, s); - - // Flag the pawn - opposed = theirPawns & forward_file_bb(Us, s); - blocked = theirPawns & (s + Up); - stoppers = theirPawns & passed_pawn_span(Us, s); - lever = theirPawns & pawn_attacks_bb(Us, s); - leverPush = theirPawns & pawn_attacks_bb(Us, s + Up); - doubled = ourPawns & (s - Up); - neighbours = ourPawns & adjacent_files_bb(s); - phalanx = neighbours & rank_bb(s); - support = neighbours & rank_bb(s - Up); - - if (doubled) - { - // Additional doubled penalty if none of their pawns is fixed - if (!(ourPawns & shift(theirPawns | pawn_attacks_bb(theirPawns)))) - score -= DoubledEarly; - } - - // A pawn is backward when it is behind all pawns of the same color on - // the adjacent files and cannot safely advance. - backward = !(neighbours & forward_ranks_bb(Them, s + Up)) - && (leverPush | blocked); - - // Compute additional span if pawn is not backward nor blocked - if (!backward && !blocked) - e->pawnAttacksSpan[Us] |= pawn_attack_span(Us, s); - - // A pawn is passed if one of the three following conditions is true: - // (a) there is no stoppers except some levers - // (b) the only stoppers are the leverPush, but we outnumber them - // (c) there is only one front stopper which can be levered. - // (Refined in Evaluation::passed) - passed = !(stoppers ^ lever) - || ( !(stoppers ^ leverPush) - && popcount(phalanx) >= popcount(leverPush)) - || ( stoppers == blocked && r >= RANK_5 - && (shift(support) & ~(theirPawns | doubleAttackThem))); - - passed &= !(forward_file_bb(Us, s) & ourPawns); - - // Passed pawns will be properly scored later in evaluation when we have - // full attack info. - if (passed) - e->passedPawns[Us] |= s; - - // Score this pawn - if (support | phalanx) - { - int v = Connected[r] * (2 + bool(phalanx) - bool(opposed)) - + 22 * popcount(support); - - score += make_score(v, v * (r - 2) / 4); - } - - else if (!neighbours) - { - if ( opposed - && (ourPawns & forward_file_bb(Them, s)) - && !(theirPawns & adjacent_files_bb(s))) - score -= Doubled; - else - score -= Isolated - + WeakUnopposed * !opposed; - } - - else if (backward) - score -= Backward - + WeakUnopposed * !opposed * bool(~(FileABB | FileHBB) & s); - - if (!support) - score -= Doubled * doubled - + WeakLever * more_than_one(lever); - - if (blocked && r >= RANK_5) - score += BlockedPawn[r - RANK_5]; - } - - return score; - } - -} // namespace - -namespace Pawns { - - -/// Pawns::probe() looks up the current position's pawns configuration in -/// the pawns hash table. It returns a pointer to the Entry if the position -/// is found. Otherwise a new Entry is computed and stored there, so we don't -/// have to recompute all when the same pawns configuration occurs again. - -Entry* probe(const Position& pos) { - - Key key = pos.pawn_key(); - Entry* e = pos.this_thread()->pawnsTable[key]; - - if (e->key == key) - return e; - - e->key = key; - e->blockedCount = 0; - e->scores[WHITE] = evaluate(pos, e); - e->scores[BLACK] = evaluate(pos, e); - - return e; -} - - -/// Entry::evaluate_shelter() calculates the shelter bonus and the storm -/// penalty for a king, looking at the king file and the two closest files. - -template -Score Entry::evaluate_shelter(const Position& pos, Square ksq) const { - - constexpr Color Them = ~Us; - - Bitboard b = pos.pieces(PAWN) & ~forward_ranks_bb(Them, ksq); - Bitboard ourPawns = b & pos.pieces(Us) & ~pawnAttacks[Them]; - Bitboard theirPawns = b & pos.pieces(Them); - - Score bonus = make_score(5, 5); - - File center = std::clamp(file_of(ksq), FILE_B, FILE_G); - for (File f = File(center - 1); f <= File(center + 1); ++f) - { - b = ourPawns & file_bb(f); - int ourRank = b ? relative_rank(Us, frontmost_sq(Them, b)) : 0; - - b = theirPawns & file_bb(f); - int theirRank = b ? relative_rank(Us, frontmost_sq(Them, b)) : 0; - - int d = edge_distance(f); - bonus += make_score(ShelterStrength[d][ourRank], 0); - - if (ourRank && (ourRank == theirRank - 1)) - bonus -= BlockedStorm[theirRank]; - else - bonus -= make_score(UnblockedStorm[d][theirRank], 0); - } - - // King On File - bonus -= KingOnFile[pos.is_on_semiopen_file(Us, ksq)][pos.is_on_semiopen_file(Them, ksq)]; - - return bonus; -} - - -/// Entry::do_king_safety() calculates a bonus for king safety. It is called only -/// when king square changes, which is about 20% of total king_safety() calls. - -template -Score Entry::do_king_safety(const Position& pos) { - - Square ksq = pos.square(Us); - kingSquares[Us] = ksq; - castlingRights[Us] = pos.castling_rights(Us); - auto compare = [](Score a, Score b) { return mg_value(a) < mg_value(b); }; - - Score shelter = evaluate_shelter(pos, ksq); - - // If we can castle use the bonus after castling if it is bigger - - if (pos.can_castle(Us & KING_SIDE)) - shelter = std::max(shelter, evaluate_shelter(pos, relative_square(Us, SQ_G1)), compare); - - if (pos.can_castle(Us & QUEEN_SIDE)) - shelter = std::max(shelter, evaluate_shelter(pos, relative_square(Us, SQ_C1)), compare); - - // In endgame we like to bring our king near our closest pawn - Bitboard pawns = pos.pieces(Us, PAWN); - int minPawnDist = 6; - - if (pawns & attacks_bb(ksq)) - minPawnDist = 1; - else while (pawns) - minPawnDist = std::min(minPawnDist, distance(ksq, pop_lsb(pawns))); - - return shelter - make_score(0, 16 * minPawnDist); -} - -// Explicit template instantiation -template Score Entry::do_king_safety(const Position& pos); -template Score Entry::do_king_safety(const Position& pos); - -} // namespace Pawns - -} // namespace Stockfish diff --git a/src/pawns.h b/src/pawns.h deleted file mode 100644 index af0370fc..00000000 --- a/src/pawns.h +++ /dev/null @@ -1,70 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#ifndef PAWNS_H_INCLUDED -#define PAWNS_H_INCLUDED - -#include "misc.h" -#include "position.h" -#include "types.h" - -namespace Stockfish::Pawns { - -/// Pawns::Entry contains various information about a pawn structure. A lookup -/// to the pawn hash table (performed by calling the probe function) returns a -/// pointer to an Entry object. - -struct Entry { - - Score pawn_score(Color c) const { return scores[c]; } - Bitboard pawn_attacks(Color c) const { return pawnAttacks[c]; } - Bitboard passed_pawns(Color c) const { return passedPawns[c]; } - Bitboard pawn_attacks_span(Color c) const { return pawnAttacksSpan[c]; } - int passed_count() const { return popcount(passedPawns[WHITE] | passedPawns[BLACK]); } - int blocked_count() const { return blockedCount; } - - template - Score king_safety(const Position& pos) { - return kingSquares[Us] == pos.square(Us) && castlingRights[Us] == pos.castling_rights(Us) - ? kingSafety[Us] : (kingSafety[Us] = do_king_safety(pos)); - } - - template - Score do_king_safety(const Position& pos); - - template - Score evaluate_shelter(const Position& pos, Square ksq) const; - - Key key; - Score scores[COLOR_NB]; - Bitboard passedPawns[COLOR_NB]; - Bitboard pawnAttacks[COLOR_NB]; - Bitboard pawnAttacksSpan[COLOR_NB]; - Square kingSquares[COLOR_NB]; - Score kingSafety[COLOR_NB]; - int castlingRights[COLOR_NB]; - int blockedCount; -}; - -typedef HashTable Table; - -Entry* probe(const Position& pos); - -} // namespace Stockfish::Pawns - -#endif // #ifndef PAWNS_H_INCLUDED diff --git a/src/perft.h b/src/perft.h new file mode 100644 index 00000000..e907742d --- /dev/null +++ b/src/perft.h @@ -0,0 +1,68 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifndef PERFT_H_INCLUDED +#define PERFT_H_INCLUDED + +#include + +#include "movegen.h" +#include "position.h" +#include "types.h" +#include "uci.h" + +namespace Stockfish::Benchmark { + +// Utility to verify move generation. All the leaf nodes up +// to the given depth are generated and counted, and the sum is returned. +template +uint64_t perft(Position& pos, Depth depth) { + + StateInfo st; + ASSERT_ALIGNED(&st, Eval::NNUE::CacheLineSize); + + uint64_t cnt, nodes = 0; + const bool leaf = (depth == 2); + + for (const auto& m : MoveList(pos)) + { + if (Root && depth <= 1) + cnt = 1, nodes++; + else + { + pos.do_move(m, st); + cnt = leaf ? MoveList(pos).size() : perft(pos, depth - 1); + nodes += cnt; + pos.undo_move(m); + } + if (Root) + sync_cout << UCIEngine::move(m, pos.is_chess960()) << ": " << cnt << sync_endl; + } + return nodes; +} + +inline uint64_t perft(const std::string& fen, Depth depth, bool isChess960) { + StateListPtr states(new std::deque(1)); + Position p; + p.set(fen, isChess960, &states->back()); + + return perft(p, depth); +} +} + +#endif // PERFT_H_INCLUDED diff --git a/src/position.cpp b/src/position.cpp index ec9229ea..b46ba029 100644 --- a/src/position.cpp +++ b/src/position.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -16,21 +16,28 @@ along with this program. If not, see . */ +#include "position.h" + #include +#include #include -#include // For offsetof() -#include // For std::memset, std::memcmp +#include +#include +#include +#include #include +#include #include +#include +#include #include "bitboard.h" #include "misc.h" #include "movegen.h" -#include "position.h" -#include "thread.h" +#include "nnue/nnue_common.h" +#include "syzygy/tbprobe.h" #include "tt.h" #include "uci.h" -#include "syzygy/tbprobe.h" using std::string; @@ -38,126 +45,121 @@ namespace Stockfish { namespace Zobrist { - Key psq[PIECE_NB][SQUARE_NB]; - Key enpassant[FILE_NB]; - Key castling[CASTLING_RIGHT_NB]; - Key side, noPawns; +Key psq[PIECE_NB][SQUARE_NB]; +Key enpassant[FILE_NB]; +Key castling[CASTLING_RIGHT_NB]; +Key side, noPawns; } namespace { -const string PieceToChar(" PNBRQK pnbrqk"); +constexpr std::string_view PieceToChar(" PNBRQK pnbrqk"); -constexpr Piece Pieces[] = { W_PAWN, W_KNIGHT, W_BISHOP, W_ROOK, W_QUEEN, W_KING, - B_PAWN, B_KNIGHT, B_BISHOP, B_ROOK, B_QUEEN, B_KING }; -} // namespace +constexpr Piece Pieces[] = {W_PAWN, W_KNIGHT, W_BISHOP, W_ROOK, W_QUEEN, W_KING, + B_PAWN, B_KNIGHT, B_BISHOP, B_ROOK, B_QUEEN, B_KING}; +} // namespace -/// operator<<(Position) returns an ASCII representation of the position - +// Returns an ASCII representation of the position std::ostream& operator<<(std::ostream& os, const Position& pos) { - os << "\n +---+---+---+---+---+---+---+---+\n"; + os << "\n +---+---+---+---+---+---+---+---+\n"; - for (Rank r = RANK_8; r >= RANK_1; --r) - { - for (File f = FILE_A; f <= FILE_H; ++f) - os << " | " << PieceToChar[pos.piece_on(make_square(f, r))]; + for (Rank r = RANK_8; r >= RANK_1; --r) + { + for (File f = FILE_A; f <= FILE_H; ++f) + os << " | " << PieceToChar[pos.piece_on(make_square(f, r))]; - os << " | " << (1 + r) << "\n +---+---+---+---+---+---+---+---+\n"; - } + os << " | " << (1 + r) << "\n +---+---+---+---+---+---+---+---+\n"; + } - os << " a b c d e f g h\n" - << "\nFen: " << pos.fen() << "\nKey: " << std::hex << std::uppercase - << std::setfill('0') << std::setw(16) << pos.key() - << std::setfill(' ') << std::dec << "\nCheckers: "; + os << " a b c d e f g h\n" + << "\nFen: " << pos.fen() << "\nKey: " << std::hex << std::uppercase << std::setfill('0') + << std::setw(16) << pos.key() << std::setfill(' ') << std::dec << "\nCheckers: "; - for (Bitboard b = pos.checkers(); b; ) - os << UCI::square(pop_lsb(b)) << " "; + for (Bitboard b = pos.checkers(); b;) + os << UCIEngine::square(pop_lsb(b)) << " "; - if ( int(Tablebases::MaxCardinality) >= popcount(pos.pieces()) - && !pos.can_castle(ANY_CASTLING)) - { - StateInfo st; - ASSERT_ALIGNED(&st, Eval::NNUE::CacheLineSize); + if (int(Tablebases::MaxCardinality) >= popcount(pos.pieces()) && !pos.can_castle(ANY_CASTLING)) + { + StateInfo st; + ASSERT_ALIGNED(&st, Eval::NNUE::CacheLineSize); - Position p; - p.set(pos.fen(), pos.is_chess960(), &st, pos.this_thread()); - Tablebases::ProbeState s1, s2; - Tablebases::WDLScore wdl = Tablebases::probe_wdl(p, &s1); - int dtz = Tablebases::probe_dtz(p, &s2); - os << "\nTablebases WDL: " << std::setw(4) << wdl << " (" << s1 << ")" - << "\nTablebases DTZ: " << std::setw(4) << dtz << " (" << s2 << ")"; - } + Position p; + p.set(pos.fen(), pos.is_chess960(), &st); + Tablebases::ProbeState s1, s2; + Tablebases::WDLScore wdl = Tablebases::probe_wdl(p, &s1); + int dtz = Tablebases::probe_dtz(p, &s2); + os << "\nTablebases WDL: " << std::setw(4) << wdl << " (" << s1 << ")" + << "\nTablebases DTZ: " << std::setw(4) << dtz << " (" << s2 << ")"; + } - return os; + return os; } -// Marcel van Kervinck's cuckoo algorithm for fast detection of "upcoming repetition" -// situations. Description of the algorithm in the following paper: -// https://marcelk.net/2013-04-06/paper/upcoming-rep-v2.pdf +// Implements Marcel van Kervinck's cuckoo algorithm to detect repetition of positions +// for 3-fold repetition draws. The algorithm uses two hash tables with Zobrist hashes +// to allow fast detection of recurring positions. For details see: +// http://web.archive.org/web/20201107002606/https://marcelk.net/2013-04-06/paper/upcoming-rep-v2.pdf // First and second hash functions for indexing the cuckoo tables inline int H1(Key h) { return h & 0x1fff; } inline int H2(Key h) { return (h >> 16) & 0x1fff; } // Cuckoo tables with Zobrist hashes of valid reversible moves, and the moves themselves -Key cuckoo[8192]; -Move cuckooMove[8192]; - - -/// Position::init() initializes at startup the various arrays used to compute hash keys +std::array cuckoo; +std::array cuckooMove; +// Initializes at startup the various arrays used to compute hash keys void Position::init() { - PRNG rng(1070372); + PRNG rng(1070372); - for (Piece pc : Pieces) - for (Square s = SQ_A1; s <= SQ_H8; ++s) - Zobrist::psq[pc][s] = rng.rand(); + for (Piece pc : Pieces) + for (Square s = SQ_A1; s <= SQ_H8; ++s) + Zobrist::psq[pc][s] = rng.rand(); - for (File f = FILE_A; f <= FILE_H; ++f) - Zobrist::enpassant[f] = rng.rand(); + for (File f = FILE_A; f <= FILE_H; ++f) + Zobrist::enpassant[f] = rng.rand(); - for (int cr = NO_CASTLING; cr <= ANY_CASTLING; ++cr) - Zobrist::castling[cr] = rng.rand(); + for (int cr = NO_CASTLING; cr <= ANY_CASTLING; ++cr) + Zobrist::castling[cr] = rng.rand(); - Zobrist::side = rng.rand(); - Zobrist::noPawns = rng.rand(); + Zobrist::side = rng.rand(); + Zobrist::noPawns = rng.rand(); - // Prepare the cuckoo tables - std::memset(cuckoo, 0, sizeof(cuckoo)); - std::memset(cuckooMove, 0, sizeof(cuckooMove)); - int count = 0; - for (Piece pc : Pieces) - for (Square s1 = SQ_A1; s1 <= SQ_H8; ++s1) - for (Square s2 = Square(s1 + 1); s2 <= SQ_H8; ++s2) - if ((type_of(pc) != PAWN) && (attacks_bb(type_of(pc), s1, 0) & s2)) - { - Move move = make_move(s1, s2); - Key key = Zobrist::psq[pc][s1] ^ Zobrist::psq[pc][s2] ^ Zobrist::side; - int i = H1(key); - while (true) - { - std::swap(cuckoo[i], key); - std::swap(cuckooMove[i], move); - if (move == MOVE_NONE) // Arrived at empty slot? - break; - i = (i == H1(key)) ? H2(key) : H1(key); // Push victim to alternative slot - } - count++; - } - assert(count == 3668); + // Prepare the cuckoo tables + cuckoo.fill(0); + cuckooMove.fill(Move::none()); + [[maybe_unused]] int count = 0; + for (Piece pc : Pieces) + for (Square s1 = SQ_A1; s1 <= SQ_H8; ++s1) + for (Square s2 = Square(s1 + 1); s2 <= SQ_H8; ++s2) + if ((type_of(pc) != PAWN) && (attacks_bb(type_of(pc), s1, 0) & s2)) + { + Move move = Move(s1, s2); + Key key = Zobrist::psq[pc][s1] ^ Zobrist::psq[pc][s2] ^ Zobrist::side; + int i = H1(key); + while (true) + { + std::swap(cuckoo[i], key); + std::swap(cuckooMove[i], move); + if (move == Move::none()) // Arrived at empty slot? + break; + i = (i == H1(key)) ? H2(key) : H1(key); // Push victim to alternative slot + } + count++; + } + assert(count == 3668); } -/// Position::set() initializes the position object with the given FEN string. -/// This function is not very robust - make sure that input FENs are correct, -/// this is assumed to be the responsibility of the GUI. - -Position& Position::set(const string& fenStr, bool isChess960, StateInfo* si, Thread* th) { -/* +// Initializes the position object with the given FEN string. +// This function is not very robust - make sure that input FENs are correct, +// this is assumed to be the responsibility of the GUI. +Position& Position::set(const string& fenStr, bool isChess960, StateInfo* si) { + /* A FEN string defines a particular position using only the ASCII character set. A FEN string contains six fields separated by a space. The fields are: @@ -180,9 +182,9 @@ Position& Position::set(const string& fenStr, bool isChess960, StateInfo* si, Th 4) En passant target square (in algebraic notation). If there's no en passant target square, this is "-". If a pawn has just made a 2-square move, this - is the position "behind" the pawn. Following X-FEN standard, this is recorded only - if there is a pawn in position to make an en passant capture, and if there really - is a pawn that might have advanced two squares. + is the position "behind" the pawn. Following X-FEN standard, this is recorded + only if there is a pawn in position to make an en passant capture, and if + there really is a pawn that might have advanced two squares. 5) Halfmove clock. This is the number of halfmoves since the last pawn advance or capture. This is used to determine if a draw can be claimed under the @@ -192,998 +194,957 @@ Position& Position::set(const string& fenStr, bool isChess960, StateInfo* si, Th incremented after Black's move. */ - unsigned char col, row, token; - size_t idx; - Square sq = SQ_A8; - std::istringstream ss(fenStr); + unsigned char col, row, token; + size_t idx; + Square sq = SQ_A8; + std::istringstream ss(fenStr); - std::memset(this, 0, sizeof(Position)); - std::memset(si, 0, sizeof(StateInfo)); - st = si; + std::memset(this, 0, sizeof(Position)); + std::memset(si, 0, sizeof(StateInfo)); + st = si; - ss >> std::noskipws; + ss >> std::noskipws; - // 1. Piece placement - while ((ss >> token) && !isspace(token)) - { - if (isdigit(token)) - sq += (token - '0') * EAST; // Advance the given number of files + // 1. Piece placement + while ((ss >> token) && !isspace(token)) + { + if (isdigit(token)) + sq += (token - '0') * EAST; // Advance the given number of files - else if (token == '/') - sq += 2 * SOUTH; + else if (token == '/') + sq += 2 * SOUTH; - else if ((idx = PieceToChar.find(token)) != string::npos) { - put_piece(Piece(idx), sq); - ++sq; - } - } + else if ((idx = PieceToChar.find(token)) != string::npos) + { + put_piece(Piece(idx), sq); + ++sq; + } + } - // 2. Active color - ss >> token; - sideToMove = (token == 'w' ? WHITE : BLACK); - ss >> token; + // 2. Active color + ss >> token; + sideToMove = (token == 'w' ? WHITE : BLACK); + ss >> token; - // 3. Castling availability. Compatible with 3 standards: Normal FEN standard, - // Shredder-FEN that uses the letters of the columns on which the rooks began - // the game instead of KQkq and also X-FEN standard that, in case of Chess960, - // if an inner rook is associated with the castling right, the castling tag is - // replaced by the file letter of the involved rook, as for the Shredder-FEN. - while ((ss >> token) && !isspace(token)) - { - Square rsq; - Color c = islower(token) ? BLACK : WHITE; - Piece rook = make_piece(c, ROOK); + // 3. Castling availability. Compatible with 3 standards: Normal FEN standard, + // Shredder-FEN that uses the letters of the columns on which the rooks began + // the game instead of KQkq and also X-FEN standard that, in case of Chess960, + // if an inner rook is associated with the castling right, the castling tag is + // replaced by the file letter of the involved rook, as for the Shredder-FEN. + while ((ss >> token) && !isspace(token)) + { + Square rsq; + Color c = islower(token) ? BLACK : WHITE; + Piece rook = make_piece(c, ROOK); - token = char(toupper(token)); + token = char(toupper(token)); - if (token == 'K') - for (rsq = relative_square(c, SQ_H1); piece_on(rsq) != rook; --rsq) {} + if (token == 'K') + for (rsq = relative_square(c, SQ_H1); piece_on(rsq) != rook; --rsq) + {} - else if (token == 'Q') - for (rsq = relative_square(c, SQ_A1); piece_on(rsq) != rook; ++rsq) {} + else if (token == 'Q') + for (rsq = relative_square(c, SQ_A1); piece_on(rsq) != rook; ++rsq) + {} - else if (token >= 'A' && token <= 'H') - rsq = make_square(File(token - 'A'), relative_rank(c, RANK_1)); + else if (token >= 'A' && token <= 'H') + rsq = make_square(File(token - 'A'), relative_rank(c, RANK_1)); - else - continue; + else + continue; - set_castling_right(c, rsq); - } + set_castling_right(c, rsq); + } - // 4. En passant square. - // Ignore if square is invalid or not on side to move relative rank 6. - bool enpassant = false; + // 4. En passant square. + // Ignore if square is invalid or not on side to move relative rank 6. + bool enpassant = false; - if ( ((ss >> col) && (col >= 'a' && col <= 'h')) - && ((ss >> row) && (row == (sideToMove == WHITE ? '6' : '3')))) - { - st->epSquare = make_square(File(col - 'a'), Rank(row - '1')); + if (((ss >> col) && (col >= 'a' && col <= 'h')) + && ((ss >> row) && (row == (sideToMove == WHITE ? '6' : '3')))) + { + st->epSquare = make_square(File(col - 'a'), Rank(row - '1')); - // En passant square will be considered only if - // a) side to move have a pawn threatening epSquare - // b) there is an enemy pawn in front of epSquare - // c) there is no piece on epSquare or behind epSquare - enpassant = pawn_attacks_bb(~sideToMove, st->epSquare) & pieces(sideToMove, PAWN) - && (pieces(~sideToMove, PAWN) & (st->epSquare + pawn_push(~sideToMove))) - && !(pieces() & (st->epSquare | (st->epSquare + pawn_push(sideToMove)))); - } + // En passant square will be considered only if + // a) side to move have a pawn threatening epSquare + // b) there is an enemy pawn in front of epSquare + // c) there is no piece on epSquare or behind epSquare + enpassant = pawn_attacks_bb(~sideToMove, st->epSquare) & pieces(sideToMove, PAWN) + && (pieces(~sideToMove, PAWN) & (st->epSquare + pawn_push(~sideToMove))) + && !(pieces() & (st->epSquare | (st->epSquare + pawn_push(sideToMove)))); + } - if (!enpassant) - st->epSquare = SQ_NONE; + if (!enpassant) + st->epSquare = SQ_NONE; - // 5-6. Halfmove clock and fullmove number - ss >> std::skipws >> st->rule50 >> gamePly; + // 5-6. Halfmove clock and fullmove number + ss >> std::skipws >> st->rule50 >> gamePly; - // Convert from fullmove starting from 1 to gamePly starting from 0, - // handle also common incorrect FEN with fullmove = 0. - gamePly = std::max(2 * (gamePly - 1), 0) + (sideToMove == BLACK); + // Convert from fullmove starting from 1 to gamePly starting from 0, + // handle also common incorrect FEN with fullmove = 0. + gamePly = std::max(2 * (gamePly - 1), 0) + (sideToMove == BLACK); - chess960 = isChess960; - thisThread = th; - set_state(st); + chess960 = isChess960; + set_state(); - assert(pos_is_ok()); + assert(pos_is_ok()); - return *this; + return *this; } -/// Position::set_castling_right() is a helper function used to set castling -/// rights given the corresponding color and the rook starting square. - +// Helper function used to set castling +// rights given the corresponding color and the rook starting square. void Position::set_castling_right(Color c, Square rfrom) { - Square kfrom = square(c); - CastlingRights cr = c & (kfrom < rfrom ? KING_SIDE: QUEEN_SIDE); + Square kfrom = square(c); + CastlingRights cr = c & (kfrom < rfrom ? KING_SIDE : QUEEN_SIDE); - st->castlingRights |= cr; - castlingRightsMask[kfrom] |= cr; - castlingRightsMask[rfrom] |= cr; - castlingRookSquare[cr] = rfrom; + st->castlingRights |= cr; + castlingRightsMask[kfrom] |= cr; + castlingRightsMask[rfrom] |= cr; + castlingRookSquare[cr] = rfrom; - Square kto = relative_square(c, cr & KING_SIDE ? SQ_G1 : SQ_C1); - Square rto = relative_square(c, cr & KING_SIDE ? SQ_F1 : SQ_D1); + Square kto = relative_square(c, cr & KING_SIDE ? SQ_G1 : SQ_C1); + Square rto = relative_square(c, cr & KING_SIDE ? SQ_F1 : SQ_D1); - castlingPath[cr] = (between_bb(rfrom, rto) | between_bb(kfrom, kto)) - & ~(kfrom | rfrom); + castlingPath[cr] = (between_bb(rfrom, rto) | between_bb(kfrom, kto)) & ~(kfrom | rfrom); } -/// Position::set_check_info() sets king attacks to detect if a move gives check +// Sets king attacks to detect if a move gives check +void Position::set_check_info() const { -void Position::set_check_info(StateInfo* si) const { + update_slider_blockers(WHITE); + update_slider_blockers(BLACK); - si->blockersForKing[WHITE] = slider_blockers(pieces(BLACK), square(WHITE), si->pinners[BLACK]); - si->blockersForKing[BLACK] = slider_blockers(pieces(WHITE), square(BLACK), si->pinners[WHITE]); + Square ksq = square(~sideToMove); - Square ksq = square(~sideToMove); - - si->checkSquares[PAWN] = pawn_attacks_bb(~sideToMove, ksq); - si->checkSquares[KNIGHT] = attacks_bb(ksq); - si->checkSquares[BISHOP] = attacks_bb(ksq, pieces()); - si->checkSquares[ROOK] = attacks_bb(ksq, pieces()); - si->checkSquares[QUEEN] = si->checkSquares[BISHOP] | si->checkSquares[ROOK]; - si->checkSquares[KING] = 0; + st->checkSquares[PAWN] = pawn_attacks_bb(~sideToMove, ksq); + st->checkSquares[KNIGHT] = attacks_bb(ksq); + st->checkSquares[BISHOP] = attacks_bb(ksq, pieces()); + st->checkSquares[ROOK] = attacks_bb(ksq, pieces()); + st->checkSquares[QUEEN] = st->checkSquares[BISHOP] | st->checkSquares[ROOK]; + st->checkSquares[KING] = 0; } -/// Position::set_state() computes the hash keys of the position, and other -/// data that once computed is updated incrementally as moves are made. -/// The function is only used when a new position is set up, and to verify -/// the correctness of the StateInfo data when running in debug mode. +// Computes the hash keys of the position, and other +// data that once computed is updated incrementally as moves are made. +// The function is only used when a new position is set up +void Position::set_state() const { -void Position::set_state(StateInfo* si) const { + st->key = st->materialKey = 0; + st->pawnKey = Zobrist::noPawns; + st->nonPawnMaterial[WHITE] = st->nonPawnMaterial[BLACK] = VALUE_ZERO; + st->checkersBB = attackers_to(square(sideToMove)) & pieces(~sideToMove); - si->key = si->materialKey = 0; - si->pawnKey = Zobrist::noPawns; - si->nonPawnMaterial[WHITE] = si->nonPawnMaterial[BLACK] = VALUE_ZERO; - si->checkersBB = attackers_to(square(sideToMove)) & pieces(~sideToMove); + set_check_info(); - set_check_info(si); + for (Bitboard b = pieces(); b;) + { + Square s = pop_lsb(b); + Piece pc = piece_on(s); + st->key ^= Zobrist::psq[pc][s]; - for (Bitboard b = pieces(); b; ) - { - Square s = pop_lsb(b); - Piece pc = piece_on(s); - si->key ^= Zobrist::psq[pc][s]; + if (type_of(pc) == PAWN) + st->pawnKey ^= Zobrist::psq[pc][s]; - if (type_of(pc) == PAWN) - si->pawnKey ^= Zobrist::psq[pc][s]; + else if (type_of(pc) != KING) + st->nonPawnMaterial[color_of(pc)] += PieceValue[pc]; + } - else if (type_of(pc) != KING) - si->nonPawnMaterial[color_of(pc)] += PieceValue[MG][pc]; - } + if (st->epSquare != SQ_NONE) + st->key ^= Zobrist::enpassant[file_of(st->epSquare)]; - if (si->epSquare != SQ_NONE) - si->key ^= Zobrist::enpassant[file_of(si->epSquare)]; + if (sideToMove == BLACK) + st->key ^= Zobrist::side; - if (sideToMove == BLACK) - si->key ^= Zobrist::side; + st->key ^= Zobrist::castling[st->castlingRights]; - si->key ^= Zobrist::castling[si->castlingRights]; - - for (Piece pc : Pieces) - for (int cnt = 0; cnt < pieceCount[pc]; ++cnt) - si->materialKey ^= Zobrist::psq[pc][cnt]; + for (Piece pc : Pieces) + for (int cnt = 0; cnt < pieceCount[pc]; ++cnt) + st->materialKey ^= Zobrist::psq[pc][cnt]; } -/// Position::set() is an overload to initialize the position object with -/// the given endgame code string like "KBPKN". It is mainly a helper to -/// get the material key out of an endgame code. - +// Overload to initialize the position object with the given endgame code string +// like "KBPKN". It's mainly a helper to get the material key out of an endgame code. Position& Position::set(const string& code, Color c, StateInfo* si) { - assert(code[0] == 'K'); + assert(code[0] == 'K'); - string sides[] = { code.substr(code.find('K', 1)), // Weak - code.substr(0, std::min(code.find('v'), code.find('K', 1))) }; // Strong + string sides[] = {code.substr(code.find('K', 1)), // Weak + code.substr(0, std::min(code.find('v'), code.find('K', 1)))}; // Strong - assert(sides[0].length() > 0 && sides[0].length() < 8); - assert(sides[1].length() > 0 && sides[1].length() < 8); + assert(sides[0].length() > 0 && sides[0].length() < 8); + assert(sides[1].length() > 0 && sides[1].length() < 8); - std::transform(sides[c].begin(), sides[c].end(), sides[c].begin(), tolower); + std::transform(sides[c].begin(), sides[c].end(), sides[c].begin(), tolower); - string fenStr = "8/" + sides[0] + char(8 - sides[0].length() + '0') + "/8/8/8/8/" - + sides[1] + char(8 - sides[1].length() + '0') + "/8 w - - 0 10"; + string fenStr = "8/" + sides[0] + char(8 - sides[0].length() + '0') + "/8/8/8/8/" + sides[1] + + char(8 - sides[1].length() + '0') + "/8 w - - 0 10"; - return set(fenStr, false, si, nullptr); + return set(fenStr, false, si); } -/// Position::fen() returns a FEN representation of the position. In case of -/// Chess960 the Shredder-FEN notation is used. This is mainly a debugging function. - +// Returns a FEN representation of the position. In case of +// Chess960 the Shredder-FEN notation is used. This is mainly a debugging function. string Position::fen() const { - int emptyCnt; - std::ostringstream ss; + int emptyCnt; + std::ostringstream ss; - for (Rank r = RANK_8; r >= RANK_1; --r) - { - for (File f = FILE_A; f <= FILE_H; ++f) - { - for (emptyCnt = 0; f <= FILE_H && empty(make_square(f, r)); ++f) - ++emptyCnt; - - if (emptyCnt) - ss << emptyCnt; - - if (f <= FILE_H) - ss << PieceToChar[piece_on(make_square(f, r))]; - } - - if (r > RANK_1) - ss << '/'; - } - - ss << (sideToMove == WHITE ? " w " : " b "); - - if (can_castle(WHITE_OO)) - ss << (chess960 ? char('A' + file_of(castling_rook_square(WHITE_OO ))) : 'K'); - - if (can_castle(WHITE_OOO)) - ss << (chess960 ? char('A' + file_of(castling_rook_square(WHITE_OOO))) : 'Q'); - - if (can_castle(BLACK_OO)) - ss << (chess960 ? char('a' + file_of(castling_rook_square(BLACK_OO ))) : 'k'); - - if (can_castle(BLACK_OOO)) - ss << (chess960 ? char('a' + file_of(castling_rook_square(BLACK_OOO))) : 'q'); - - if (!can_castle(ANY_CASTLING)) - ss << '-'; - - ss << (ep_square() == SQ_NONE ? " - " : " " + UCI::square(ep_square()) + " ") - << st->rule50 << " " << 1 + (gamePly - (sideToMove == BLACK)) / 2; - - return ss.str(); -} - - -/// Position::slider_blockers() returns a bitboard of all the pieces (both colors) -/// that are blocking attacks on the square 's' from 'sliders'. A piece blocks a -/// slider if removing that piece from the board would result in a position where -/// square 's' is attacked. For example, a king-attack blocking piece can be either -/// a pinned or a discovered check piece, according if its color is the opposite -/// or the same of the color of the slider. - -Bitboard Position::slider_blockers(Bitboard sliders, Square s, Bitboard& pinners) const { - - Bitboard blockers = 0; - pinners = 0; - - // Snipers are sliders that attack 's' when a piece and other snipers are removed - Bitboard snipers = ( (attacks_bb< ROOK>(s) & pieces(QUEEN, ROOK)) - | (attacks_bb(s) & pieces(QUEEN, BISHOP))) & sliders; - Bitboard occupancy = pieces() ^ snipers; - - while (snipers) - { - Square sniperSq = pop_lsb(snipers); - Bitboard b = between_bb(s, sniperSq) & occupancy; - - if (b && !more_than_one(b)) + for (Rank r = RANK_8; r >= RANK_1; --r) { - blockers |= b; - if (b & pieces(color_of(piece_on(s)))) - pinners |= sniperSq; + for (File f = FILE_A; f <= FILE_H; ++f) + { + for (emptyCnt = 0; f <= FILE_H && empty(make_square(f, r)); ++f) + ++emptyCnt; + + if (emptyCnt) + ss << emptyCnt; + + if (f <= FILE_H) + ss << PieceToChar[piece_on(make_square(f, r))]; + } + + if (r > RANK_1) + ss << '/'; + } + + ss << (sideToMove == WHITE ? " w " : " b "); + + if (can_castle(WHITE_OO)) + ss << (chess960 ? char('A' + file_of(castling_rook_square(WHITE_OO))) : 'K'); + + if (can_castle(WHITE_OOO)) + ss << (chess960 ? char('A' + file_of(castling_rook_square(WHITE_OOO))) : 'Q'); + + if (can_castle(BLACK_OO)) + ss << (chess960 ? char('a' + file_of(castling_rook_square(BLACK_OO))) : 'k'); + + if (can_castle(BLACK_OOO)) + ss << (chess960 ? char('a' + file_of(castling_rook_square(BLACK_OOO))) : 'q'); + + if (!can_castle(ANY_CASTLING)) + ss << '-'; + + ss << (ep_square() == SQ_NONE ? " - " : " " + UCIEngine::square(ep_square()) + " ") + << st->rule50 << " " << 1 + (gamePly - (sideToMove == BLACK)) / 2; + + return ss.str(); +} + +// Calculates st->blockersForKing[c] and st->pinners[~c], +// which store respectively the pieces preventing king of color c from being in check +// and the slider pieces of color ~c pinning pieces of color c to the king. +void Position::update_slider_blockers(Color c) const { + + Square ksq = square(c); + + st->blockersForKing[c] = 0; + st->pinners[~c] = 0; + + // Snipers are sliders that attack 's' when a piece and other snipers are removed + Bitboard snipers = ((attacks_bb(ksq) & pieces(QUEEN, ROOK)) + | (attacks_bb(ksq) & pieces(QUEEN, BISHOP))) + & pieces(~c); + Bitboard occupancy = pieces() ^ snipers; + + while (snipers) + { + Square sniperSq = pop_lsb(snipers); + Bitboard b = between_bb(ksq, sniperSq) & occupancy; + + if (b && !more_than_one(b)) + { + st->blockersForKing[c] |= b; + if (b & pieces(c)) + st->pinners[~c] |= sniperSq; + } } - } - return blockers; } -/// Position::attackers_to() computes a bitboard of all pieces which attack a -/// given square. Slider attacks use the occupied bitboard to indicate occupancy. - +// Computes a bitboard of all pieces which attack a given square. +// Slider attacks use the occupied bitboard to indicate occupancy. Bitboard Position::attackers_to(Square s, Bitboard occupied) const { - return (pawn_attacks_bb(BLACK, s) & pieces(WHITE, PAWN)) - | (pawn_attacks_bb(WHITE, s) & pieces(BLACK, PAWN)) - | (attacks_bb(s) & pieces(KNIGHT)) - | (attacks_bb< ROOK>(s, occupied) & pieces( ROOK, QUEEN)) - | (attacks_bb(s, occupied) & pieces(BISHOP, QUEEN)) - | (attacks_bb(s) & pieces(KING)); + return (pawn_attacks_bb(BLACK, s) & pieces(WHITE, PAWN)) + | (pawn_attacks_bb(WHITE, s) & pieces(BLACK, PAWN)) + | (attacks_bb(s) & pieces(KNIGHT)) + | (attacks_bb(s, occupied) & pieces(ROOK, QUEEN)) + | (attacks_bb(s, occupied) & pieces(BISHOP, QUEEN)) + | (attacks_bb(s) & pieces(KING)); } -/// Position::legal() tests whether a pseudo-legal move is legal - +// Tests whether a pseudo-legal move is legal bool Position::legal(Move m) const { - assert(is_ok(m)); + assert(m.is_ok()); - Color us = sideToMove; - Square from = from_sq(m); - Square to = to_sq(m); + Color us = sideToMove; + Square from = m.from_sq(); + Square to = m.to_sq(); - assert(color_of(moved_piece(m)) == us); - assert(piece_on(square(us)) == make_piece(us, KING)); + assert(color_of(moved_piece(m)) == us); + assert(piece_on(square(us)) == make_piece(us, KING)); - // En passant captures are a tricky special case. Because they are rather - // uncommon, we do it simply by testing whether the king is attacked after - // the move is made. - if (type_of(m) == EN_PASSANT) - { - Square ksq = square(us); - Square capsq = to - pawn_push(us); - Bitboard occupied = (pieces() ^ from ^ capsq) | to; + // En passant captures are a tricky special case. Because they are rather + // uncommon, we do it simply by testing whether the king is attacked after + // the move is made. + if (m.type_of() == EN_PASSANT) + { + Square ksq = square(us); + Square capsq = to - pawn_push(us); + Bitboard occupied = (pieces() ^ from ^ capsq) | to; - assert(to == ep_square()); - assert(moved_piece(m) == make_piece(us, PAWN)); - assert(piece_on(capsq) == make_piece(~us, PAWN)); - assert(piece_on(to) == NO_PIECE); + assert(to == ep_square()); + assert(moved_piece(m) == make_piece(us, PAWN)); + assert(piece_on(capsq) == make_piece(~us, PAWN)); + assert(piece_on(to) == NO_PIECE); - return !(attacks_bb< ROOK>(ksq, occupied) & pieces(~us, QUEEN, ROOK)) + return !(attacks_bb(ksq, occupied) & pieces(~us, QUEEN, ROOK)) && !(attacks_bb(ksq, occupied) & pieces(~us, QUEEN, BISHOP)); - } + } - // Castling moves generation does not check if the castling path is clear of - // enemy attacks, it is delayed at a later time: now! - if (type_of(m) == CASTLING) - { - // After castling, the rook and king final positions are the same in - // Chess960 as they would be in standard chess. - to = relative_square(us, to > from ? SQ_G1 : SQ_C1); - Direction step = to > from ? WEST : EAST; + // Castling moves generation does not check if the castling path is clear of + // enemy attacks, it is delayed at a later time: now! + if (m.type_of() == CASTLING) + { + // After castling, the rook and king final positions are the same in + // Chess960 as they would be in standard chess. + to = relative_square(us, to > from ? SQ_G1 : SQ_C1); + Direction step = to > from ? WEST : EAST; - for (Square s = to; s != from; s += step) - if (attackers_to(s) & pieces(~us)) - return false; + for (Square s = to; s != from; s += step) + if (attackers_to(s) & pieces(~us)) + return false; - // In case of Chess960, verify if the Rook blocks some checks - // For instance an enemy queen in SQ_A1 when castling rook is in SQ_B1. - return !chess960 || !(blockers_for_king(us) & to_sq(m)); - } + // In case of Chess960, verify if the Rook blocks some checks. + // For instance an enemy queen in SQ_A1 when castling rook is in SQ_B1. + return !chess960 || !(blockers_for_king(us) & m.to_sq()); + } - // If the moving piece is a king, check whether the destination square is - // attacked by the opponent. - if (type_of(piece_on(from)) == KING) - return !(attackers_to(to, pieces() ^ from) & pieces(~us)); + // If the moving piece is a king, check whether the destination square is + // attacked by the opponent. + if (type_of(piece_on(from)) == KING) + return !(attackers_to(to, pieces() ^ from) & pieces(~us)); - // A non-king move is legal if and only if it is not pinned or it - // is moving along the ray towards or away from the king. - return !(blockers_for_king(us) & from) - || aligned(from, to, square(us)); + // A non-king move is legal if and only if it is not pinned or it + // is moving along the ray towards or away from the king. + return !(blockers_for_king(us) & from) || aligned(from, to, square(us)); } -/// Position::pseudo_legal() takes a random move and tests whether the move is -/// pseudo legal. It is used to validate moves from TT that can be corrupted -/// due to SMP concurrent access or hash position key aliasing. - +// Takes a random move and tests whether the move is +// pseudo-legal. It is used to validate moves from TT that can be corrupted +// due to SMP concurrent access or hash position key aliasing. bool Position::pseudo_legal(const Move m) const { - Color us = sideToMove; - Square from = from_sq(m); - Square to = to_sq(m); - Piece pc = moved_piece(m); + Color us = sideToMove; + Square from = m.from_sq(); + Square to = m.to_sq(); + Piece pc = moved_piece(m); - // Use a slower but simpler function for uncommon cases - // yet we skip the legality check of MoveList(). - if (type_of(m) != NORMAL) - return checkers() ? MoveList< EVASIONS>(*this).contains(m) - : MoveList(*this).contains(m); + // Use a slower but simpler function for uncommon cases + // yet we skip the legality check of MoveList(). + if (m.type_of() != NORMAL) + return checkers() ? MoveList(*this).contains(m) + : MoveList(*this).contains(m); - // Is not a promotion, so promotion piece must be empty - if (promotion_type(m) - KNIGHT != NO_PIECE_TYPE) - return false; + // Is not a promotion, so the promotion piece must be empty + assert(m.promotion_type() - KNIGHT == NO_PIECE_TYPE); - // If the 'from' square is not occupied by a piece belonging to the side to - // move, the move is obviously not legal. - if (pc == NO_PIECE || color_of(pc) != us) - return false; + // If the 'from' square is not occupied by a piece belonging to the side to + // move, the move is obviously not legal. + if (pc == NO_PIECE || color_of(pc) != us) + return false; - // The destination square cannot be occupied by a friendly piece - if (pieces(us) & to) - return false; + // The destination square cannot be occupied by a friendly piece + if (pieces(us) & to) + return false; - // Handle the special case of a pawn move - if (type_of(pc) == PAWN) - { - // We have already handled promotion moves, so destination - // cannot be on the 8th/1st rank. - if ((Rank8BB | Rank1BB) & to) - return false; + // Handle the special case of a pawn move + if (type_of(pc) == PAWN) + { + // We have already handled promotion moves, so destination cannot be on the 8th/1st rank + if ((Rank8BB | Rank1BB) & to) + return false; - if ( !(pawn_attacks_bb(us, from) & pieces(~us) & to) // Not a capture - && !((from + pawn_push(us) == to) && empty(to)) // Not a single push - && !( (from + 2 * pawn_push(us) == to) // Not a double push - && (relative_rank(us, from) == RANK_2) - && empty(to) - && empty(to - pawn_push(us)))) - return false; - } - else if (!(attacks_bb(type_of(pc), from, pieces()) & to)) - return false; + if (!(pawn_attacks_bb(us, from) & pieces(~us) & to) // Not a capture + && !((from + pawn_push(us) == to) && empty(to)) // Not a single push + && !((from + 2 * pawn_push(us) == to) // Not a double push + && (relative_rank(us, from) == RANK_2) && empty(to) && empty(to - pawn_push(us)))) + return false; + } + else if (!(attacks_bb(type_of(pc), from, pieces()) & to)) + return false; - // Evasions generator already takes care to avoid some kind of illegal moves - // and legal() relies on this. We therefore have to take care that the same - // kind of moves are filtered out here. - if (checkers()) - { - if (type_of(pc) != KING) - { - // Double check? In this case a king move is required - if (more_than_one(checkers())) - return false; + // Evasions generator already takes care to avoid some kind of illegal moves + // and legal() relies on this. We therefore have to take care that the same + // kind of moves are filtered out here. + if (checkers()) + { + if (type_of(pc) != KING) + { + // Double check? In this case, a king move is required + if (more_than_one(checkers())) + return false; - // Our move must be a blocking interposition or a capture of the checking piece - if (!(between_bb(square(us), lsb(checkers())) & to)) - return false; - } - // In case of king moves under check we have to remove king so as to catch - // invalid moves like b1a1 when opposite queen is on c1. - else if (attackers_to(to, pieces() ^ from) & pieces(~us)) - return false; - } + // Our move must be a blocking interposition or a capture of the checking piece + if (!(between_bb(square(us), lsb(checkers())) & to)) + return false; + } + // In case of king moves under check we have to remove the king so as to catch + // invalid moves like b1a1 when opposite queen is on c1. + else if (attackers_to(to, pieces() ^ from) & pieces(~us)) + return false; + } - return true; + return true; } -/// Position::gives_check() tests whether a pseudo-legal move gives a check - +// Tests whether a pseudo-legal move gives a check bool Position::gives_check(Move m) const { - assert(is_ok(m)); - assert(color_of(moved_piece(m)) == sideToMove); + assert(m.is_ok()); + assert(color_of(moved_piece(m)) == sideToMove); - Square from = from_sq(m); - Square to = to_sq(m); + Square from = m.from_sq(); + Square to = m.to_sq(); - // Is there a direct check? - if (check_squares(type_of(piece_on(from))) & to) - return true; + // Is there a direct check? + if (check_squares(type_of(piece_on(from))) & to) + return true; - // Is there a discovered check? - if ( (blockers_for_king(~sideToMove) & from) - && !aligned(from, to, square(~sideToMove))) - return true; + // Is there a discovered check? + if (blockers_for_king(~sideToMove) & from) + return !aligned(from, to, square(~sideToMove)) || m.type_of() == CASTLING; - switch (type_of(m)) - { - case NORMAL: - return false; + switch (m.type_of()) + { + case NORMAL : + return false; - case PROMOTION: - return attacks_bb(promotion_type(m), to, pieces() ^ from) & square(~sideToMove); + case PROMOTION : + return attacks_bb(m.promotion_type(), to, pieces() ^ from) & square(~sideToMove); - // En passant capture with check? We have already handled the case - // of direct checks and ordinary discovered check, so the only case we - // need to handle is the unusual case of a discovered check through - // the captured pawn. - case EN_PASSANT: - { - Square capsq = make_square(file_of(to), rank_of(from)); - Bitboard b = (pieces() ^ from ^ capsq) | to; + // En passant capture with check? We have already handled the case of direct + // checks and ordinary discovered check, so the only case we need to handle + // is the unusual case of a discovered check through the captured pawn. + case EN_PASSANT : { + Square capsq = make_square(file_of(to), rank_of(from)); + Bitboard b = (pieces() ^ from ^ capsq) | to; - return (attacks_bb< ROOK>(square(~sideToMove), b) & pieces(sideToMove, QUEEN, ROOK)) - | (attacks_bb(square(~sideToMove), b) & pieces(sideToMove, QUEEN, BISHOP)); - } - default: //CASTLING - { - // Castling is encoded as 'king captures the rook' - Square ksq = square(~sideToMove); - Square rto = relative_square(sideToMove, to > from ? SQ_F1 : SQ_D1); + return (attacks_bb(square(~sideToMove), b) & pieces(sideToMove, QUEEN, ROOK)) + | (attacks_bb(square(~sideToMove), b) + & pieces(sideToMove, QUEEN, BISHOP)); + } + default : //CASTLING + { + // Castling is encoded as 'king captures the rook' + Square rto = relative_square(sideToMove, to > from ? SQ_F1 : SQ_D1); - return (attacks_bb(rto) & ksq) - && (attacks_bb(rto, pieces() ^ from ^ to) & ksq); - } - } + return check_squares(ROOK) & rto; + } + } } -/// Position::do_move() makes a move, and saves all information necessary -/// to a StateInfo object. The move is assumed to be legal. Pseudo-legal -/// moves should be filtered out before this function is called. - +// Makes a move, and saves all information necessary +// to a StateInfo object. The move is assumed to be legal. Pseudo-legal +// moves should be filtered out before this function is called. void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) { - assert(is_ok(m)); - assert(&newSt != st); + assert(m.is_ok()); + assert(&newSt != st); - thisThread->nodes.fetch_add(1, std::memory_order_relaxed); - Key k = st->key ^ Zobrist::side; + Key k = st->key ^ Zobrist::side; - // Copy some fields of the old state to our new StateInfo object except the - // ones which are going to be recalculated from scratch anyway and then switch - // our state pointer to point to the new (ready to be updated) state. - std::memcpy(&newSt, st, offsetof(StateInfo, key)); - newSt.previous = st; - st = &newSt; + // Copy some fields of the old state to our new StateInfo object except the + // ones which are going to be recalculated from scratch anyway and then switch + // our state pointer to point to the new (ready to be updated) state. + std::memcpy(&newSt, st, offsetof(StateInfo, key)); + newSt.previous = st; + st = &newSt; - // Increment ply counters. In particular, rule50 will be reset to zero later on - // in case of a capture or a pawn move. - ++gamePly; - ++st->rule50; - ++st->pliesFromNull; + // Increment ply counters. In particular, rule50 will be reset to zero later on + // in case of a capture or a pawn move. + ++gamePly; + ++st->rule50; + ++st->pliesFromNull; - // Used by NNUE - st->accumulator.computed[WHITE] = false; - st->accumulator.computed[BLACK] = false; - auto& dp = st->dirtyPiece; - dp.dirty_num = 1; + // Used by NNUE + st->accumulatorBig.computed[WHITE] = st->accumulatorBig.computed[BLACK] = + st->accumulatorSmall.computed[WHITE] = st->accumulatorSmall.computed[BLACK] = false; - Color us = sideToMove; - Color them = ~us; - Square from = from_sq(m); - Square to = to_sq(m); - Piece pc = piece_on(from); - Piece captured = type_of(m) == EN_PASSANT ? make_piece(them, PAWN) : piece_on(to); + auto& dp = st->dirtyPiece; + dp.dirty_num = 1; - assert(color_of(pc) == us); - assert(captured == NO_PIECE || color_of(captured) == (type_of(m) != CASTLING ? them : us)); - assert(type_of(captured) != KING); + Color us = sideToMove; + Color them = ~us; + Square from = m.from_sq(); + Square to = m.to_sq(); + Piece pc = piece_on(from); + Piece captured = m.type_of() == EN_PASSANT ? make_piece(them, PAWN) : piece_on(to); - if (type_of(m) == CASTLING) - { - assert(pc == make_piece(us, KING)); - assert(captured == make_piece(us, ROOK)); + assert(color_of(pc) == us); + assert(captured == NO_PIECE || color_of(captured) == (m.type_of() != CASTLING ? them : us)); + assert(type_of(captured) != KING); - Square rfrom, rto; - do_castling(us, from, to, rfrom, rto); + if (m.type_of() == CASTLING) + { + assert(pc == make_piece(us, KING)); + assert(captured == make_piece(us, ROOK)); - k ^= Zobrist::psq[captured][rfrom] ^ Zobrist::psq[captured][rto]; - captured = NO_PIECE; - } + Square rfrom, rto; + do_castling(us, from, to, rfrom, rto); - if (captured) - { - Square capsq = to; + k ^= Zobrist::psq[captured][rfrom] ^ Zobrist::psq[captured][rto]; + captured = NO_PIECE; + } - // If the captured piece is a pawn, update pawn hash key, otherwise - // update non-pawn material. - if (type_of(captured) == PAWN) - { - if (type_of(m) == EN_PASSANT) - { - capsq -= pawn_push(us); + if (captured) + { + Square capsq = to; - assert(pc == make_piece(us, PAWN)); - assert(to == st->epSquare); - assert(relative_rank(us, to) == RANK_6); - assert(piece_on(to) == NO_PIECE); - assert(piece_on(capsq) == make_piece(them, PAWN)); - } + // If the captured piece is a pawn, update pawn hash key, otherwise + // update non-pawn material. + if (type_of(captured) == PAWN) + { + if (m.type_of() == EN_PASSANT) + { + capsq -= pawn_push(us); - st->pawnKey ^= Zobrist::psq[captured][capsq]; - } - else - st->nonPawnMaterial[them] -= PieceValue[MG][captured]; + assert(pc == make_piece(us, PAWN)); + assert(to == st->epSquare); + assert(relative_rank(us, to) == RANK_6); + assert(piece_on(to) == NO_PIECE); + assert(piece_on(capsq) == make_piece(them, PAWN)); + } - if (Eval::useNNUE) - { - dp.dirty_num = 2; // 1 piece moved, 1 piece captured - dp.piece[1] = captured; - dp.from[1] = capsq; - dp.to[1] = SQ_NONE; - } + st->pawnKey ^= Zobrist::psq[captured][capsq]; + } + else + st->nonPawnMaterial[them] -= PieceValue[captured]; - // Update board and piece lists - remove_piece(capsq); + dp.dirty_num = 2; // 1 piece moved, 1 piece captured + dp.piece[1] = captured; + dp.from[1] = capsq; + dp.to[1] = SQ_NONE; - if (type_of(m) == EN_PASSANT) - board[capsq] = NO_PIECE; + // Update board and piece lists + remove_piece(capsq); - // Update material hash key and prefetch access to materialTable - k ^= Zobrist::psq[captured][capsq]; - st->materialKey ^= Zobrist::psq[captured][pieceCount[captured]]; - prefetch(thisThread->materialTable[st->materialKey]); + k ^= Zobrist::psq[captured][capsq]; + st->materialKey ^= Zobrist::psq[captured][pieceCount[captured]]; - // Reset rule 50 counter - st->rule50 = 0; - } + // Reset rule 50 counter + st->rule50 = 0; + } - // Update hash key - k ^= Zobrist::psq[pc][from] ^ Zobrist::psq[pc][to]; + // Update hash key + k ^= Zobrist::psq[pc][from] ^ Zobrist::psq[pc][to]; - // Reset en passant square - if (st->epSquare != SQ_NONE) - { - k ^= Zobrist::enpassant[file_of(st->epSquare)]; - st->epSquare = SQ_NONE; - } + // Reset en passant square + if (st->epSquare != SQ_NONE) + { + k ^= Zobrist::enpassant[file_of(st->epSquare)]; + st->epSquare = SQ_NONE; + } - // Update castling rights if needed - if (st->castlingRights && (castlingRightsMask[from] | castlingRightsMask[to])) - { - k ^= Zobrist::castling[st->castlingRights]; - st->castlingRights &= ~(castlingRightsMask[from] | castlingRightsMask[to]); - k ^= Zobrist::castling[st->castlingRights]; - } + // Update castling rights if needed + if (st->castlingRights && (castlingRightsMask[from] | castlingRightsMask[to])) + { + k ^= Zobrist::castling[st->castlingRights]; + st->castlingRights &= ~(castlingRightsMask[from] | castlingRightsMask[to]); + k ^= Zobrist::castling[st->castlingRights]; + } - // Move the piece. The tricky Chess960 castling is handled earlier - if (type_of(m) != CASTLING) - { - if (Eval::useNNUE) - { - dp.piece[0] = pc; - dp.from[0] = from; - dp.to[0] = to; - } + // Move the piece. The tricky Chess960 castling is handled earlier + if (m.type_of() != CASTLING) + { + dp.piece[0] = pc; + dp.from[0] = from; + dp.to[0] = to; - move_piece(from, to); - } + move_piece(from, to); + } - // If the moving piece is a pawn do some special extra work - if (type_of(pc) == PAWN) - { - // Set en passant square if the moved pawn can be captured - if ( (int(to) ^ int(from)) == 16 - && (pawn_attacks_bb(us, to - pawn_push(us)) & pieces(them, PAWN))) - { - st->epSquare = to - pawn_push(us); - k ^= Zobrist::enpassant[file_of(st->epSquare)]; - } + // If the moving piece is a pawn do some special extra work + if (type_of(pc) == PAWN) + { + // Set en passant square if the moved pawn can be captured + if ((int(to) ^ int(from)) == 16 + && (pawn_attacks_bb(us, to - pawn_push(us)) & pieces(them, PAWN))) + { + st->epSquare = to - pawn_push(us); + k ^= Zobrist::enpassant[file_of(st->epSquare)]; + } - else if (type_of(m) == PROMOTION) - { - Piece promotion = make_piece(us, promotion_type(m)); + else if (m.type_of() == PROMOTION) + { + Piece promotion = make_piece(us, m.promotion_type()); - assert(relative_rank(us, to) == RANK_8); - assert(type_of(promotion) >= KNIGHT && type_of(promotion) <= QUEEN); + assert(relative_rank(us, to) == RANK_8); + assert(type_of(promotion) >= KNIGHT && type_of(promotion) <= QUEEN); - remove_piece(to); - put_piece(promotion, to); + remove_piece(to); + put_piece(promotion, to); - if (Eval::useNNUE) - { - // Promoting pawn to SQ_NONE, promoted piece from SQ_NONE - dp.to[0] = SQ_NONE; - dp.piece[dp.dirty_num] = promotion; - dp.from[dp.dirty_num] = SQ_NONE; - dp.to[dp.dirty_num] = to; - dp.dirty_num++; - } + // Promoting pawn to SQ_NONE, promoted piece from SQ_NONE + dp.to[0] = SQ_NONE; + dp.piece[dp.dirty_num] = promotion; + dp.from[dp.dirty_num] = SQ_NONE; + dp.to[dp.dirty_num] = to; + dp.dirty_num++; - // Update hash keys - k ^= Zobrist::psq[pc][to] ^ Zobrist::psq[promotion][to]; - st->pawnKey ^= Zobrist::psq[pc][to]; - st->materialKey ^= Zobrist::psq[promotion][pieceCount[promotion]-1] - ^ Zobrist::psq[pc][pieceCount[pc]]; + // Update hash keys + k ^= Zobrist::psq[pc][to] ^ Zobrist::psq[promotion][to]; + st->pawnKey ^= Zobrist::psq[pc][to]; + st->materialKey ^= + Zobrist::psq[promotion][pieceCount[promotion] - 1] ^ Zobrist::psq[pc][pieceCount[pc]]; - // Update material - st->nonPawnMaterial[us] += PieceValue[MG][promotion]; - } + // Update material + st->nonPawnMaterial[us] += PieceValue[promotion]; + } - // Update pawn hash key - st->pawnKey ^= Zobrist::psq[pc][from] ^ Zobrist::psq[pc][to]; + // Update pawn hash key + st->pawnKey ^= Zobrist::psq[pc][from] ^ Zobrist::psq[pc][to]; - // Reset rule 50 draw counter - st->rule50 = 0; - } + // Reset rule 50 draw counter + st->rule50 = 0; + } - // Set capture piece - st->capturedPiece = captured; + // Set capture piece + st->capturedPiece = captured; - // Update the key with the final value - st->key = k; + // Update the key with the final value + st->key = k; - // Calculate checkers bitboard (if move gives check) - st->checkersBB = givesCheck ? attackers_to(square(them)) & pieces(us) : 0; + // Calculate checkers bitboard (if move gives check) + st->checkersBB = givesCheck ? attackers_to(square(them)) & pieces(us) : 0; - sideToMove = ~sideToMove; + sideToMove = ~sideToMove; - // Update king attacks used for fast check detection - set_check_info(st); + // Update king attacks used for fast check detection + set_check_info(); - // Calculate the repetition info. It is the ply distance from the previous - // occurrence of the same position, negative in the 3-fold case, or zero - // if the position was not repeated. - st->repetition = 0; - int end = std::min(st->rule50, st->pliesFromNull); - if (end >= 4) - { - StateInfo* stp = st->previous->previous; - for (int i = 4; i <= end; i += 2) - { - stp = stp->previous->previous; - if (stp->key == st->key) - { - st->repetition = stp->repetition ? -i : i; - break; - } - } - } + // Calculate the repetition info. It is the ply distance from the previous + // occurrence of the same position, negative in the 3-fold case, or zero + // if the position was not repeated. + st->repetition = 0; + int end = std::min(st->rule50, st->pliesFromNull); + if (end >= 4) + { + StateInfo* stp = st->previous->previous; + for (int i = 4; i <= end; i += 2) + { + stp = stp->previous->previous; + if (stp->key == st->key) + { + st->repetition = stp->repetition ? -i : i; + break; + } + } + } - assert(pos_is_ok()); + assert(pos_is_ok()); } -/// Position::undo_move() unmakes a move. When it returns, the position should -/// be restored to exactly the same state as before the move was made. - +// Unmakes a move. When it returns, the position should +// be restored to exactly the same state as before the move was made. void Position::undo_move(Move m) { - assert(is_ok(m)); + assert(m.is_ok()); - sideToMove = ~sideToMove; + sideToMove = ~sideToMove; - Color us = sideToMove; - Square from = from_sq(m); - Square to = to_sq(m); - Piece pc = piece_on(to); + Color us = sideToMove; + Square from = m.from_sq(); + Square to = m.to_sq(); + Piece pc = piece_on(to); - assert(empty(from) || type_of(m) == CASTLING); - assert(type_of(st->capturedPiece) != KING); + assert(empty(from) || m.type_of() == CASTLING); + assert(type_of(st->capturedPiece) != KING); - if (type_of(m) == PROMOTION) - { - assert(relative_rank(us, to) == RANK_8); - assert(type_of(pc) == promotion_type(m)); - assert(type_of(pc) >= KNIGHT && type_of(pc) <= QUEEN); + if (m.type_of() == PROMOTION) + { + assert(relative_rank(us, to) == RANK_8); + assert(type_of(pc) == m.promotion_type()); + assert(type_of(pc) >= KNIGHT && type_of(pc) <= QUEEN); - remove_piece(to); - pc = make_piece(us, PAWN); - put_piece(pc, to); - } + remove_piece(to); + pc = make_piece(us, PAWN); + put_piece(pc, to); + } - if (type_of(m) == CASTLING) - { - Square rfrom, rto; - do_castling(us, from, to, rfrom, rto); - } - else - { - move_piece(to, from); // Put the piece back at the source square + if (m.type_of() == CASTLING) + { + Square rfrom, rto; + do_castling(us, from, to, rfrom, rto); + } + else + { + move_piece(to, from); // Put the piece back at the source square - if (st->capturedPiece) - { - Square capsq = to; + if (st->capturedPiece) + { + Square capsq = to; - if (type_of(m) == EN_PASSANT) - { - capsq -= pawn_push(us); + if (m.type_of() == EN_PASSANT) + { + capsq -= pawn_push(us); - assert(type_of(pc) == PAWN); - assert(to == st->previous->epSquare); - assert(relative_rank(us, to) == RANK_6); - assert(piece_on(capsq) == NO_PIECE); - assert(st->capturedPiece == make_piece(~us, PAWN)); - } + assert(type_of(pc) == PAWN); + assert(to == st->previous->epSquare); + assert(relative_rank(us, to) == RANK_6); + assert(piece_on(capsq) == NO_PIECE); + assert(st->capturedPiece == make_piece(~us, PAWN)); + } - put_piece(st->capturedPiece, capsq); // Restore the captured piece - } - } + put_piece(st->capturedPiece, capsq); // Restore the captured piece + } + } - // Finally point our state pointer back to the previous state - st = st->previous; - --gamePly; + // Finally point our state pointer back to the previous state + st = st->previous; + --gamePly; - assert(pos_is_ok()); + assert(pos_is_ok()); } -/// Position::do_castling() is a helper used to do/undo a castling move. This -/// is a bit tricky in Chess960 where from/to squares can overlap. +// Helper used to do/undo a castling move. This is a bit +// tricky in Chess960 where from/to squares can overlap. template void Position::do_castling(Color us, Square from, Square& to, Square& rfrom, Square& rto) { - bool kingSide = to > from; - rfrom = to; // Castling is encoded as "king captures friendly rook" - rto = relative_square(us, kingSide ? SQ_F1 : SQ_D1); - to = relative_square(us, kingSide ? SQ_G1 : SQ_C1); + bool kingSide = to > from; + rfrom = to; // Castling is encoded as "king captures friendly rook" + rto = relative_square(us, kingSide ? SQ_F1 : SQ_D1); + to = relative_square(us, kingSide ? SQ_G1 : SQ_C1); - if (Do && Eval::useNNUE) - { - auto& dp = st->dirtyPiece; - dp.piece[0] = make_piece(us, KING); - dp.from[0] = from; - dp.to[0] = to; - dp.piece[1] = make_piece(us, ROOK); - dp.from[1] = rfrom; - dp.to[1] = rto; - dp.dirty_num = 2; - } + if (Do) + { + auto& dp = st->dirtyPiece; + dp.piece[0] = make_piece(us, KING); + dp.from[0] = from; + dp.to[0] = to; + dp.piece[1] = make_piece(us, ROOK); + dp.from[1] = rfrom; + dp.to[1] = rto; + dp.dirty_num = 2; + } - // Remove both pieces first since squares could overlap in Chess960 - remove_piece(Do ? from : to); - remove_piece(Do ? rfrom : rto); - board[Do ? from : to] = board[Do ? rfrom : rto] = NO_PIECE; // Since remove_piece doesn't do this for us - put_piece(make_piece(us, KING), Do ? to : from); - put_piece(make_piece(us, ROOK), Do ? rto : rfrom); + // Remove both pieces first since squares could overlap in Chess960 + remove_piece(Do ? from : to); + remove_piece(Do ? rfrom : rto); + board[Do ? from : to] = board[Do ? rfrom : rto] = + NO_PIECE; // remove_piece does not do this for us + put_piece(make_piece(us, KING), Do ? to : from); + put_piece(make_piece(us, ROOK), Do ? rto : rfrom); } -/// Position::do_null_move() is used to do a "null move": it flips -/// the side to move without executing any move on the board. +// Used to do a "null move": it flips +// the side to move without executing any move on the board. +void Position::do_null_move(StateInfo& newSt, TranspositionTable& tt) { -void Position::do_null_move(StateInfo& newSt) { + assert(!checkers()); + assert(&newSt != st); - assert(!checkers()); - assert(&newSt != st); + std::memcpy(&newSt, st, offsetof(StateInfo, accumulatorBig)); - std::memcpy(&newSt, st, offsetof(StateInfo, accumulator)); + newSt.previous = st; + st = &newSt; - newSt.previous = st; - st = &newSt; + st->dirtyPiece.dirty_num = 0; + st->dirtyPiece.piece[0] = NO_PIECE; // Avoid checks in UpdateAccumulator() + st->accumulatorBig.computed[WHITE] = st->accumulatorBig.computed[BLACK] = + st->accumulatorSmall.computed[WHITE] = st->accumulatorSmall.computed[BLACK] = false; - st->dirtyPiece.dirty_num = 0; - st->dirtyPiece.piece[0] = NO_PIECE; // Avoid checks in UpdateAccumulator() - st->accumulator.computed[WHITE] = false; - st->accumulator.computed[BLACK] = false; + if (st->epSquare != SQ_NONE) + { + st->key ^= Zobrist::enpassant[file_of(st->epSquare)]; + st->epSquare = SQ_NONE; + } - if (st->epSquare != SQ_NONE) - { - st->key ^= Zobrist::enpassant[file_of(st->epSquare)]; - st->epSquare = SQ_NONE; - } + st->key ^= Zobrist::side; + ++st->rule50; + prefetch(tt.first_entry(key())); - st->key ^= Zobrist::side; - ++st->rule50; - prefetch(TT.first_entry(key())); + st->pliesFromNull = 0; - st->pliesFromNull = 0; + sideToMove = ~sideToMove; - sideToMove = ~sideToMove; + set_check_info(); - set_check_info(st); + st->repetition = 0; - st->repetition = 0; - - assert(pos_is_ok()); + assert(pos_is_ok()); } -/// Position::undo_null_move() must be used to undo a "null move" - +// Must be used to undo a "null move" void Position::undo_null_move() { - assert(!checkers()); + assert(!checkers()); - st = st->previous; - sideToMove = ~sideToMove; + st = st->previous; + sideToMove = ~sideToMove; } -/// Position::key_after() computes the new hash key after the given move. Needed -/// for speculative prefetch. It doesn't recognize special moves like castling, -/// en passant and promotions. - +// Computes the new hash key after the given move. Needed +// for speculative prefetch. It doesn't recognize special moves like castling, +// en passant and promotions. Key Position::key_after(Move m) const { - Square from = from_sq(m); - Square to = to_sq(m); - Piece pc = piece_on(from); - Piece captured = piece_on(to); - Key k = st->key ^ Zobrist::side; + Square from = m.from_sq(); + Square to = m.to_sq(); + Piece pc = piece_on(from); + Piece captured = piece_on(to); + Key k = st->key ^ Zobrist::side; - if (captured) - k ^= Zobrist::psq[captured][to]; + if (captured) + k ^= Zobrist::psq[captured][to]; - return k ^ Zobrist::psq[pc][to] ^ Zobrist::psq[pc][from]; + k ^= Zobrist::psq[pc][to] ^ Zobrist::psq[pc][from]; + + return (captured || type_of(pc) == PAWN) ? k : adjust_key50(k); } -/// Position::see_ge (Static Exchange Evaluation Greater or Equal) tests if the -/// SEE value of move is greater or equal to the given threshold. We'll use an -/// algorithm similar to alpha-beta pruning with a null window. +// Tests if the SEE (Static Exchange Evaluation) +// value of move is greater or equal to the given threshold. We'll use an +// algorithm similar to alpha-beta pruning with a null window. +bool Position::see_ge(Move m, int threshold) const { -bool Position::see_ge(Move m, Value threshold) const { + assert(m.is_ok()); - assert(is_ok(m)); + // Only deal with normal moves, assume others pass a simple SEE + if (m.type_of() != NORMAL) + return VALUE_ZERO >= threshold; - // Only deal with normal moves, assume others pass a simple SEE - if (type_of(m) != NORMAL) - return VALUE_ZERO >= threshold; + Square from = m.from_sq(), to = m.to_sq(); - Square from = from_sq(m), to = to_sq(m); + int swap = PieceValue[piece_on(to)] - threshold; + if (swap < 0) + return false; - int swap = PieceValue[MG][piece_on(to)] - threshold; - if (swap < 0) - return false; + swap = PieceValue[piece_on(from)] - swap; + if (swap <= 0) + return true; - swap = PieceValue[MG][piece_on(from)] - swap; - if (swap <= 0) - return true; + assert(color_of(piece_on(from)) == sideToMove); + Bitboard occupied = pieces() ^ from ^ to; // xoring to is important for pinned piece logic + Color stm = sideToMove; + Bitboard attackers = attackers_to(to, occupied); + Bitboard stmAttackers, bb; + int res = 1; - assert(color_of(piece_on(from)) == sideToMove); - Bitboard occupied = pieces() ^ from ^ to; - Color stm = sideToMove; - Bitboard attackers = attackers_to(to, occupied); - Bitboard stmAttackers, bb; - int res = 1; + while (true) + { + stm = ~stm; + attackers &= occupied; - while (true) - { - stm = ~stm; - attackers &= occupied; + // If stm has no more attackers then give up: stm loses + if (!(stmAttackers = attackers & pieces(stm))) + break; - // If stm has no more attackers then give up: stm loses - if (!(stmAttackers = attackers & pieces(stm))) - break; + // Don't allow pinned pieces to attack as long as there are + // pinners on their original square. + if (pinners(~stm) & occupied) + { + stmAttackers &= ~blockers_for_king(stm); - // Don't allow pinned pieces to attack as long as there are - // pinners on their original square. - if (pinners(~stm) & occupied) - stmAttackers &= ~blockers_for_king(stm); + if (!stmAttackers) + break; + } - if (!stmAttackers) - break; + res ^= 1; - res ^= 1; + // Locate and remove the next least valuable attacker, and add to + // the bitboard 'attackers' any X-ray attackers behind it. + if ((bb = stmAttackers & pieces(PAWN))) + { + if ((swap = PawnValue - swap) < res) + break; + occupied ^= least_significant_square_bb(bb); - // Locate and remove the next least valuable attacker, and add to - // the bitboard 'attackers' any X-ray attackers behind it. - if ((bb = stmAttackers & pieces(PAWN))) - { - if ((swap = PawnValueMg - swap) < res) - break; + attackers |= attacks_bb(to, occupied) & pieces(BISHOP, QUEEN); + } - occupied ^= least_significant_square_bb(bb); - attackers |= attacks_bb(to, occupied) & pieces(BISHOP, QUEEN); - } + else if ((bb = stmAttackers & pieces(KNIGHT))) + { + if ((swap = KnightValue - swap) < res) + break; + occupied ^= least_significant_square_bb(bb); + } - else if ((bb = stmAttackers & pieces(KNIGHT))) - { - if ((swap = KnightValueMg - swap) < res) - break; + else if ((bb = stmAttackers & pieces(BISHOP))) + { + if ((swap = BishopValue - swap) < res) + break; + occupied ^= least_significant_square_bb(bb); - occupied ^= least_significant_square_bb(bb); - } + attackers |= attacks_bb(to, occupied) & pieces(BISHOP, QUEEN); + } - else if ((bb = stmAttackers & pieces(BISHOP))) - { - if ((swap = BishopValueMg - swap) < res) - break; + else if ((bb = stmAttackers & pieces(ROOK))) + { + if ((swap = RookValue - swap) < res) + break; + occupied ^= least_significant_square_bb(bb); - occupied ^= least_significant_square_bb(bb); - attackers |= attacks_bb(to, occupied) & pieces(BISHOP, QUEEN); - } + attackers |= attacks_bb(to, occupied) & pieces(ROOK, QUEEN); + } - else if ((bb = stmAttackers & pieces(ROOK))) - { - if ((swap = RookValueMg - swap) < res) - break; + else if ((bb = stmAttackers & pieces(QUEEN))) + { + if ((swap = QueenValue - swap) < res) + break; + occupied ^= least_significant_square_bb(bb); - occupied ^= least_significant_square_bb(bb); - attackers |= attacks_bb(to, occupied) & pieces(ROOK, QUEEN); - } + attackers |= (attacks_bb(to, occupied) & pieces(BISHOP, QUEEN)) + | (attacks_bb(to, occupied) & pieces(ROOK, QUEEN)); + } - else if ((bb = stmAttackers & pieces(QUEEN))) - { - if ((swap = QueenValueMg - swap) < res) - break; + else // KING + // If we "capture" with the king but the opponent still has attackers, + // reverse the result. + return (attackers & ~pieces(stm)) ? res ^ 1 : res; + } - occupied ^= least_significant_square_bb(bb); - attackers |= (attacks_bb(to, occupied) & pieces(BISHOP, QUEEN)) - | (attacks_bb(to, occupied) & pieces(ROOK , QUEEN)); - } - - else // KING - // If we "capture" with the king but opponent still has attackers, - // reverse the result. - return (attackers & ~pieces(stm)) ? res ^ 1 : res; - } - - return bool(res); + return bool(res); } - -/// Position::is_draw() tests whether the position is drawn by 50-move rule -/// or by repetition. It does not detect stalemates. - +// Tests whether the position is drawn by 50-move rule +// or by repetition. It does not detect stalemates. bool Position::is_draw(int ply) const { - if (st->rule50 > 99 && (!checkers() || MoveList(*this).size())) - return true; + if (st->rule50 > 99 && (!checkers() || MoveList(*this).size())) + return true; - // Return a draw score if a position repeats once earlier but strictly - // after the root, or repeats twice before or at the root. - return st->repetition && st->repetition < ply; + // Return a draw score if a position repeats once earlier but strictly + // after the root, or repeats twice before or at the root. + return st->repetition && st->repetition < ply; } -// Position::has_repeated() tests whether there has been at least one repetition +// Tests whether there has been at least one repetition // of positions since the last capture or pawn move. - bool Position::has_repeated() const { StateInfo* stc = st; - int end = std::min(st->rule50, st->pliesFromNull); + int end = std::min(st->rule50, st->pliesFromNull); while (end-- >= 4) { if (stc->repetition) @@ -1195,154 +1156,137 @@ bool Position::has_repeated() const { } -/// Position::has_game_cycle() tests if the position has a move which draws by repetition, -/// or an earlier position has a move that directly reaches the current position. - +// Tests if the position has a move which draws by repetition, +// or an earlier position has a move that directly reaches the current position. bool Position::has_game_cycle(int ply) const { - int j; + int j; - int end = std::min(st->rule50, st->pliesFromNull); + int end = std::min(st->rule50, st->pliesFromNull); - if (end < 3) + if (end < 3) + return false; + + Key originalKey = st->key; + StateInfo* stp = st->previous; + + for (int i = 3; i <= end; i += 2) + { + stp = stp->previous->previous; + + Key moveKey = originalKey ^ stp->key; + if ((j = H1(moveKey), cuckoo[j] == moveKey) || (j = H2(moveKey), cuckoo[j] == moveKey)) + { + Move move = cuckooMove[j]; + Square s1 = move.from_sq(); + Square s2 = move.to_sq(); + + if (!((between_bb(s1, s2) ^ s2) & pieces())) + { + if (ply > i) + return true; + + // For nodes before or at the root, check that the move is a + // repetition rather than a move to the current position. + // In the cuckoo table, both moves Rc1c5 and Rc5c1 are stored in + // the same location, so we have to select which square to check. + if (color_of(piece_on(empty(s1) ? s2 : s1)) != side_to_move()) + continue; + + // For repetitions before or at the root, require one more + if (stp->repetition) + return true; + } + } + } return false; - - Key originalKey = st->key; - StateInfo* stp = st->previous; - - for (int i = 3; i <= end; i += 2) - { - stp = stp->previous->previous; - - Key moveKey = originalKey ^ stp->key; - if ( (j = H1(moveKey), cuckoo[j] == moveKey) - || (j = H2(moveKey), cuckoo[j] == moveKey)) - { - Move move = cuckooMove[j]; - Square s1 = from_sq(move); - Square s2 = to_sq(move); - - if (!((between_bb(s1, s2) ^ s2) & pieces())) - { - if (ply > i) - return true; - - // For nodes before or at the root, check that the move is a - // repetition rather than a move to the current position. - // In the cuckoo table, both moves Rc1c5 and Rc5c1 are stored in - // the same location, so we have to select which square to check. - if (color_of(piece_on(empty(s1) ? s2 : s1)) != side_to_move()) - continue; - - // For repetitions before or at the root, require one more - if (stp->repetition) - return true; - } - } - } - return false; } -/// Position::flip() flips position with the white and black sides reversed. This -/// is only useful for debugging e.g. for finding evaluation symmetry bugs. - +// Flips position with the white and black sides reversed. This +// is only useful for debugging e.g. for finding evaluation symmetry bugs. void Position::flip() { - string f, token; - std::stringstream ss(fen()); + string f, token; + std::stringstream ss(fen()); - for (Rank r = RANK_8; r >= RANK_1; --r) // Piece placement - { - std::getline(ss, token, r > RANK_1 ? '/' : ' '); - f.insert(0, token + (f.empty() ? " " : "/")); - } + for (Rank r = RANK_8; r >= RANK_1; --r) // Piece placement + { + std::getline(ss, token, r > RANK_1 ? '/' : ' '); + f.insert(0, token + (f.empty() ? " " : "/")); + } - ss >> token; // Active color - f += (token == "w" ? "B " : "W "); // Will be lowercased later + ss >> token; // Active color + f += (token == "w" ? "B " : "W "); // Will be lowercased later - ss >> token; // Castling availability - f += token + " "; + ss >> token; // Castling availability + f += token + " "; - std::transform(f.begin(), f.end(), f.begin(), - [](char c) { return char(islower(c) ? toupper(c) : tolower(c)); }); + std::transform(f.begin(), f.end(), f.begin(), + [](char c) { return char(islower(c) ? toupper(c) : tolower(c)); }); - ss >> token; // En passant square - f += (token == "-" ? token : token.replace(1, 1, token[1] == '3' ? "6" : "3")); + ss >> token; // En passant square + f += (token == "-" ? token : token.replace(1, 1, token[1] == '3' ? "6" : "3")); - std::getline(ss, token); // Half and full moves - f += token; + std::getline(ss, token); // Half and full moves + f += token; - set(f, is_chess960(), st, this_thread()); + set(f, is_chess960(), st); - assert(pos_is_ok()); + assert(pos_is_ok()); } -/// Position::pos_is_ok() performs some consistency checks for the -/// position object and raises an asserts if something wrong is detected. -/// This is meant to be helpful when debugging. - +// Performs some consistency checks for the position object +// and raise an assert if something wrong is detected. +// This is meant to be helpful when debugging. bool Position::pos_is_ok() const { - constexpr bool Fast = true; // Quick (default) or full check? + constexpr bool Fast = true; // Quick (default) or full check? - if ( (sideToMove != WHITE && sideToMove != BLACK) - || piece_on(square(WHITE)) != W_KING - || piece_on(square(BLACK)) != B_KING - || ( ep_square() != SQ_NONE - && relative_rank(sideToMove, ep_square()) != RANK_6)) - assert(0 && "pos_is_ok: Default"); + if ((sideToMove != WHITE && sideToMove != BLACK) || piece_on(square(WHITE)) != W_KING + || piece_on(square(BLACK)) != B_KING + || (ep_square() != SQ_NONE && relative_rank(sideToMove, ep_square()) != RANK_6)) + assert(0 && "pos_is_ok: Default"); - if (Fast) - return true; + if (Fast) + return true; - if ( pieceCount[W_KING] != 1 - || pieceCount[B_KING] != 1 - || attackers_to(square(~sideToMove)) & pieces(sideToMove)) - assert(0 && "pos_is_ok: Kings"); + if (pieceCount[W_KING] != 1 || pieceCount[B_KING] != 1 + || attackers_to(square(~sideToMove)) & pieces(sideToMove)) + assert(0 && "pos_is_ok: Kings"); - if ( (pieces(PAWN) & (Rank1BB | Rank8BB)) - || pieceCount[W_PAWN] > 8 - || pieceCount[B_PAWN] > 8) - assert(0 && "pos_is_ok: Pawns"); + if ((pieces(PAWN) & (Rank1BB | Rank8BB)) || pieceCount[W_PAWN] > 8 || pieceCount[B_PAWN] > 8) + assert(0 && "pos_is_ok: Pawns"); - if ( (pieces(WHITE) & pieces(BLACK)) - || (pieces(WHITE) | pieces(BLACK)) != pieces() - || popcount(pieces(WHITE)) > 16 - || popcount(pieces(BLACK)) > 16) - assert(0 && "pos_is_ok: Bitboards"); + if ((pieces(WHITE) & pieces(BLACK)) || (pieces(WHITE) | pieces(BLACK)) != pieces() + || popcount(pieces(WHITE)) > 16 || popcount(pieces(BLACK)) > 16) + assert(0 && "pos_is_ok: Bitboards"); - for (PieceType p1 = PAWN; p1 <= KING; ++p1) - for (PieceType p2 = PAWN; p2 <= KING; ++p2) - if (p1 != p2 && (pieces(p1) & pieces(p2))) - assert(0 && "pos_is_ok: Bitboards"); + for (PieceType p1 = PAWN; p1 <= KING; ++p1) + for (PieceType p2 = PAWN; p2 <= KING; ++p2) + if (p1 != p2 && (pieces(p1) & pieces(p2))) + assert(0 && "pos_is_ok: Bitboards"); - StateInfo si = *st; - ASSERT_ALIGNED(&si, Eval::NNUE::CacheLineSize); - set_state(&si); - if (std::memcmp(&si, st, sizeof(StateInfo))) - assert(0 && "pos_is_ok: State"); + for (Piece pc : Pieces) + if (pieceCount[pc] != popcount(pieces(color_of(pc), type_of(pc))) + || pieceCount[pc] != std::count(board, board + SQUARE_NB, pc)) + assert(0 && "pos_is_ok: Pieces"); - for (Piece pc : Pieces) - if ( pieceCount[pc] != popcount(pieces(color_of(pc), type_of(pc))) - || pieceCount[pc] != std::count(board, board + SQUARE_NB, pc)) - assert(0 && "pos_is_ok: Pieces"); + for (Color c : {WHITE, BLACK}) + for (CastlingRights cr : {c & KING_SIDE, c & QUEEN_SIDE}) + { + if (!can_castle(cr)) + continue; - for (Color c : { WHITE, BLACK }) - for (CastlingRights cr : {c & KING_SIDE, c & QUEEN_SIDE}) - { - if (!can_castle(cr)) - continue; + if (piece_on(castlingRookSquare[cr]) != make_piece(c, ROOK) + || castlingRightsMask[castlingRookSquare[cr]] != cr + || (castlingRightsMask[square(c)] & cr) != cr) + assert(0 && "pos_is_ok: Castling"); + } - if ( piece_on(castlingRookSquare[cr]) != make_piece(c, ROOK) - || castlingRightsMask[castlingRookSquare[cr]] != cr - || (castlingRightsMask[square(c)] & cr) != cr) - assert(0 && "pos_is_ok: Castling"); - } - - return true; + return true; } -} // namespace Stockfish +} // namespace Stockfish diff --git a/src/position.h b/src/position.h index 8dbf1493..154ed652 100644 --- a/src/position.h +++ b/src/position.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -21,399 +21,344 @@ #include #include -#include // For std::unique_ptr +#include +#include #include #include "bitboard.h" -#include "evaluate.h" -#include "psqt.h" -#include "types.h" - #include "nnue/nnue_accumulator.h" +#include "nnue/nnue_architecture.h" +#include "types.h" namespace Stockfish { -/// StateInfo struct stores information needed to restore a Position object to -/// its previous state when we retract a move. Whenever a move is made on the -/// board (by calling Position::do_move), a StateInfo object must be passed. +class TranspositionTable; + +// StateInfo struct stores information needed to restore a Position object to +// its previous state when we retract a move. Whenever a move is made on the +// board (by calling Position::do_move), a StateInfo object must be passed. struct StateInfo { - // Copied when making a move - Key pawnKey; - Key materialKey; - Value nonPawnMaterial[COLOR_NB]; - int castlingRights; - int rule50; - int pliesFromNull; - Square epSquare; + // Copied when making a move + Key materialKey; + Key pawnKey; + Value nonPawnMaterial[COLOR_NB]; + int castlingRights; + int rule50; + int pliesFromNull; + Square epSquare; - // Not copied when making a move (will be recomputed anyhow) - Key key; - Bitboard checkersBB; - StateInfo* previous; - Bitboard blockersForKing[COLOR_NB]; - Bitboard pinners[COLOR_NB]; - Bitboard checkSquares[PIECE_TYPE_NB]; - Piece capturedPiece; - int repetition; + // Not copied when making a move (will be recomputed anyhow) + Key key; + Bitboard checkersBB; + StateInfo* previous; + Bitboard blockersForKing[COLOR_NB]; + Bitboard pinners[COLOR_NB]; + Bitboard checkSquares[PIECE_TYPE_NB]; + Piece capturedPiece; + int repetition; - // Used by NNUE - Eval::NNUE::Accumulator accumulator; - DirtyPiece dirtyPiece; + // Used by NNUE + Eval::NNUE::Accumulator accumulatorBig; + Eval::NNUE::Accumulator accumulatorSmall; + DirtyPiece dirtyPiece; }; -/// A list to keep track of the position states along the setup moves (from the -/// start position to the position just before the search starts). Needed by -/// 'draw by repetition' detection. Use a std::deque because pointers to -/// elements are not invalidated upon list resizing. -typedef std::unique_ptr> StateListPtr; +// A list to keep track of the position states along the setup moves (from the +// start position to the position just before the search starts). Needed by +// 'draw by repetition' detection. Use a std::deque because pointers to +// elements are not invalidated upon list resizing. +using StateListPtr = std::unique_ptr>; -/// Position class stores information regarding the board representation as -/// pieces, side to move, hash keys, castling info, etc. Important methods are -/// do_move() and undo_move(), used by the search to update node info when -/// traversing the search tree. -class Thread; - +// Position class stores information regarding the board representation as +// pieces, side to move, hash keys, castling info, etc. Important methods are +// do_move() and undo_move(), used by the search to update node info when +// traversing the search tree. class Position { -public: - static void init(); + public: + static void init(); - Position() = default; - Position(const Position&) = delete; - Position& operator=(const Position&) = delete; + Position() = default; + Position(const Position&) = delete; + Position& operator=(const Position&) = delete; - // FEN string input/output - Position& set(const std::string& fenStr, bool isChess960, StateInfo* si, Thread* th); - Position& set(const std::string& code, Color c, StateInfo* si); - std::string fen() const; + // FEN string input/output + Position& set(const std::string& fenStr, bool isChess960, StateInfo* si); + Position& set(const std::string& code, Color c, StateInfo* si); + std::string fen() const; - // Position representation - Bitboard pieces(PieceType pt) const; - Bitboard pieces(PieceType pt1, PieceType pt2) const; - Bitboard pieces(Color c) const; - Bitboard pieces(Color c, PieceType pt) const; - Bitboard pieces(Color c, PieceType pt1, PieceType pt2) const; - Piece piece_on(Square s) const; - Square ep_square() const; - bool empty(Square s) const; - template int count(Color c) const; - template int count() const; - template Square square(Color c) const; - bool is_on_semiopen_file(Color c, Square s) const; + // Position representation + Bitboard pieces(PieceType pt = ALL_PIECES) const; + template + Bitboard pieces(PieceType pt, PieceTypes... pts) const; + Bitboard pieces(Color c) const; + template + Bitboard pieces(Color c, PieceTypes... pts) const; + Piece piece_on(Square s) const; + Square ep_square() const; + bool empty(Square s) const; + template + int count(Color c) const; + template + int count() const; + template + Square square(Color c) const; - // Castling - CastlingRights castling_rights(Color c) const; - bool can_castle(CastlingRights cr) const; - bool castling_impeded(CastlingRights cr) const; - Square castling_rook_square(CastlingRights cr) const; + // Castling + CastlingRights castling_rights(Color c) const; + bool can_castle(CastlingRights cr) const; + bool castling_impeded(CastlingRights cr) const; + Square castling_rook_square(CastlingRights cr) const; - // Checking - Bitboard checkers() const; - Bitboard blockers_for_king(Color c) const; - Bitboard check_squares(PieceType pt) const; - Bitboard pinners(Color c) const; + // Checking + Bitboard checkers() const; + Bitboard blockers_for_king(Color c) const; + Bitboard check_squares(PieceType pt) const; + Bitboard pinners(Color c) const; - // Attacks to/from a given square - Bitboard attackers_to(Square s) const; - Bitboard attackers_to(Square s, Bitboard occupied) const; - Bitboard slider_blockers(Bitboard sliders, Square s, Bitboard& pinners) const; + // Attacks to/from a given square + Bitboard attackers_to(Square s) const; + Bitboard attackers_to(Square s, Bitboard occupied) const; + void update_slider_blockers(Color c) const; + template + Bitboard attacks_by(Color c) const; - // Properties of moves - bool legal(Move m) const; - bool pseudo_legal(const Move m) const; - bool capture(Move m) const; - bool capture_or_promotion(Move m) const; - bool gives_check(Move m) const; - Piece moved_piece(Move m) const; - Piece captured_piece() const; + // Properties of moves + bool legal(Move m) const; + bool pseudo_legal(const Move m) const; + bool capture(Move m) const; + bool capture_stage(Move m) const; + bool gives_check(Move m) const; + Piece moved_piece(Move m) const; + Piece captured_piece() const; - // Piece specific - bool pawn_passed(Color c, Square s) const; - bool opposite_bishops() const; - int pawns_on_same_color_squares(Color c, Square s) const; + // Doing and undoing moves + void do_move(Move m, StateInfo& newSt); + void do_move(Move m, StateInfo& newSt, bool givesCheck); + void undo_move(Move m); + void do_null_move(StateInfo& newSt, TranspositionTable& tt); + void undo_null_move(); - // Doing and undoing moves - void do_move(Move m, StateInfo& newSt); - void do_move(Move m, StateInfo& newSt, bool givesCheck); - void undo_move(Move m); - void do_null_move(StateInfo& newSt); - void undo_null_move(); + // Static Exchange Evaluation + bool see_ge(Move m, int threshold = 0) const; - // Static Exchange Evaluation - bool see_ge(Move m, Value threshold = VALUE_ZERO) const; + // Accessing hash keys + Key key() const; + Key key_after(Move m) const; + Key material_key() const; + Key pawn_key() const; - // Accessing hash keys - Key key() const; - Key key_after(Move m) const; - Key material_key() const; - Key pawn_key() const; + // Other properties of the position + Color side_to_move() const; + int game_ply() const; + bool is_chess960() const; + bool is_draw(int ply) const; + bool has_game_cycle(int ply) const; + bool has_repeated() const; + int rule50_count() const; + Value non_pawn_material(Color c) const; + Value non_pawn_material() const; - // Other properties of the position - Color side_to_move() const; - int game_ply() const; - bool is_chess960() const; - Thread* this_thread() const; - bool is_draw(int ply) const; - bool has_game_cycle(int ply) const; - bool has_repeated() const; - int rule50_count() const; - Score psq_score() const; - Value non_pawn_material(Color c) const; - Value non_pawn_material() const; + // Position consistency check, for debugging + bool pos_is_ok() const; + void flip(); - // Position consistency check, for debugging - bool pos_is_ok() const; - void flip(); + // Used by NNUE + StateInfo* state() const; - // Used by NNUE - StateInfo* state() const; + void put_piece(Piece pc, Square s); + void remove_piece(Square s); - void put_piece(Piece pc, Square s); - void remove_piece(Square s); + private: + // Initialization helpers (used while setting up a position) + void set_castling_right(Color c, Square rfrom); + void set_state() const; + void set_check_info() const; -private: - // Initialization helpers (used while setting up a position) - void set_castling_right(Color c, Square rfrom); - void set_state(StateInfo* si) const; - void set_check_info(StateInfo* si) const; + // Other helpers + void move_piece(Square from, Square to); + template + void do_castling(Color us, Square from, Square& to, Square& rfrom, Square& rto); + template + Key adjust_key50(Key k) const; - // Other helpers - void move_piece(Square from, Square to); - template - void do_castling(Color us, Square from, Square& to, Square& rfrom, Square& rto); - - // Data members - Piece board[SQUARE_NB]; - Bitboard byTypeBB[PIECE_TYPE_NB]; - Bitboard byColorBB[COLOR_NB]; - int pieceCount[PIECE_NB]; - int castlingRightsMask[SQUARE_NB]; - Square castlingRookSquare[CASTLING_RIGHT_NB]; - Bitboard castlingPath[CASTLING_RIGHT_NB]; - Thread* thisThread; - StateInfo* st; - int gamePly; - Color sideToMove; - Score psq; - bool chess960; + // Data members + Piece board[SQUARE_NB]; + Bitboard byTypeBB[PIECE_TYPE_NB]; + Bitboard byColorBB[COLOR_NB]; + int pieceCount[PIECE_NB]; + int castlingRightsMask[SQUARE_NB]; + Square castlingRookSquare[CASTLING_RIGHT_NB]; + Bitboard castlingPath[CASTLING_RIGHT_NB]; + StateInfo* st; + int gamePly; + Color sideToMove; + bool chess960; }; -extern std::ostream& operator<<(std::ostream& os, const Position& pos); +std::ostream& operator<<(std::ostream& os, const Position& pos); -inline Color Position::side_to_move() const { - return sideToMove; -} +inline Color Position::side_to_move() const { return sideToMove; } inline Piece Position::piece_on(Square s) const { - assert(is_ok(s)); - return board[s]; + assert(is_ok(s)); + return board[s]; } -inline bool Position::empty(Square s) const { - return piece_on(s) == NO_PIECE; +inline bool Position::empty(Square s) const { return piece_on(s) == NO_PIECE; } + +inline Piece Position::moved_piece(Move m) const { return piece_on(m.from_sq()); } + +inline Bitboard Position::pieces(PieceType pt) const { return byTypeBB[pt]; } + +template +inline Bitboard Position::pieces(PieceType pt, PieceTypes... pts) const { + return pieces(pt) | pieces(pts...); } -inline Piece Position::moved_piece(Move m) const { - return piece_on(from_sq(m)); +inline Bitboard Position::pieces(Color c) const { return byColorBB[c]; } + +template +inline Bitboard Position::pieces(Color c, PieceTypes... pts) const { + return pieces(c) & pieces(pts...); } -inline Bitboard Position::pieces(PieceType pt = ALL_PIECES) const { - return byTypeBB[pt]; +template +inline int Position::count(Color c) const { + return pieceCount[make_piece(c, Pt)]; } -inline Bitboard Position::pieces(PieceType pt1, PieceType pt2) const { - return pieces(pt1) | pieces(pt2); +template +inline int Position::count() const { + return count(WHITE) + count(BLACK); } -inline Bitboard Position::pieces(Color c) const { - return byColorBB[c]; +template +inline Square Position::square(Color c) const { + assert(count(c) == 1); + return lsb(pieces(c, Pt)); } -inline Bitboard Position::pieces(Color c, PieceType pt) const { - return pieces(c) & pieces(pt); -} +inline Square Position::ep_square() const { return st->epSquare; } -inline Bitboard Position::pieces(Color c, PieceType pt1, PieceType pt2) const { - return pieces(c) & (pieces(pt1) | pieces(pt2)); -} - -template inline int Position::count(Color c) const { - return pieceCount[make_piece(c, Pt)]; -} - -template inline int Position::count() const { - return count(WHITE) + count(BLACK); -} - -template inline Square Position::square(Color c) const { - assert(count(c) == 1); - return lsb(pieces(c, Pt)); -} - -inline Square Position::ep_square() const { - return st->epSquare; -} - -inline bool Position::is_on_semiopen_file(Color c, Square s) const { - return !(pieces(c, PAWN) & file_bb(s)); -} - -inline bool Position::can_castle(CastlingRights cr) const { - return st->castlingRights & cr; -} +inline bool Position::can_castle(CastlingRights cr) const { return st->castlingRights & cr; } inline CastlingRights Position::castling_rights(Color c) const { - return c & CastlingRights(st->castlingRights); + return c & CastlingRights(st->castlingRights); } inline bool Position::castling_impeded(CastlingRights cr) const { - assert(cr == WHITE_OO || cr == WHITE_OOO || cr == BLACK_OO || cr == BLACK_OOO); - - return pieces() & castlingPath[cr]; + assert(cr == WHITE_OO || cr == WHITE_OOO || cr == BLACK_OO || cr == BLACK_OOO); + return pieces() & castlingPath[cr]; } inline Square Position::castling_rook_square(CastlingRights cr) const { - assert(cr == WHITE_OO || cr == WHITE_OOO || cr == BLACK_OO || cr == BLACK_OOO); - - return castlingRookSquare[cr]; + assert(cr == WHITE_OO || cr == WHITE_OOO || cr == BLACK_OO || cr == BLACK_OOO); + return castlingRookSquare[cr]; } -inline Bitboard Position::attackers_to(Square s) const { - return attackers_to(s, pieces()); +inline Bitboard Position::attackers_to(Square s) const { return attackers_to(s, pieces()); } + +template +inline Bitboard Position::attacks_by(Color c) const { + + if constexpr (Pt == PAWN) + return c == WHITE ? pawn_attacks_bb(pieces(WHITE, PAWN)) + : pawn_attacks_bb(pieces(BLACK, PAWN)); + else + { + Bitboard threats = 0; + Bitboard attackers = pieces(c, Pt); + while (attackers) + threats |= attacks_bb(pop_lsb(attackers), pieces()); + return threats; + } } -inline Bitboard Position::checkers() const { - return st->checkersBB; +inline Bitboard Position::checkers() const { return st->checkersBB; } + +inline Bitboard Position::blockers_for_king(Color c) const { return st->blockersForKing[c]; } + +inline Bitboard Position::pinners(Color c) const { return st->pinners[c]; } + +inline Bitboard Position::check_squares(PieceType pt) const { return st->checkSquares[pt]; } + +inline Key Position::key() const { return adjust_key50(st->key); } + +template +inline Key Position::adjust_key50(Key k) const { + return st->rule50 < 14 - AfterMove ? k : k ^ make_key((st->rule50 - (14 - AfterMove)) / 8); } -inline Bitboard Position::blockers_for_king(Color c) const { - return st->blockersForKing[c]; -} +inline Key Position::pawn_key() const { return st->pawnKey; } -inline Bitboard Position::pinners(Color c) const { - return st->pinners[c]; -} +inline Key Position::material_key() const { return st->materialKey; } -inline Bitboard Position::check_squares(PieceType pt) const { - return st->checkSquares[pt]; -} - -inline bool Position::pawn_passed(Color c, Square s) const { - return !(pieces(~c, PAWN) & passed_pawn_span(c, s)); -} - -inline int Position::pawns_on_same_color_squares(Color c, Square s) const { - return popcount(pieces(c, PAWN) & ((DarkSquares & s) ? DarkSquares : ~DarkSquares)); -} - -inline Key Position::key() const { - return st->rule50 < 14 ? st->key - : st->key ^ make_key((st->rule50 - 14) / 8); -} - -inline Key Position::pawn_key() const { - return st->pawnKey; -} - -inline Key Position::material_key() const { - return st->materialKey; -} - -inline Score Position::psq_score() const { - return psq; -} - -inline Value Position::non_pawn_material(Color c) const { - return st->nonPawnMaterial[c]; -} +inline Value Position::non_pawn_material(Color c) const { return st->nonPawnMaterial[c]; } inline Value Position::non_pawn_material() const { - return non_pawn_material(WHITE) + non_pawn_material(BLACK); + return non_pawn_material(WHITE) + non_pawn_material(BLACK); } -inline int Position::game_ply() const { - return gamePly; -} +inline int Position::game_ply() const { return gamePly; } -inline int Position::rule50_count() const { - return st->rule50; -} +inline int Position::rule50_count() const { return st->rule50; } -inline bool Position::opposite_bishops() const { - return count(WHITE) == 1 - && count(BLACK) == 1 - && opposite_colors(square(WHITE), square(BLACK)); -} - -inline bool Position::is_chess960() const { - return chess960; -} - -inline bool Position::capture_or_promotion(Move m) const { - assert(is_ok(m)); - return type_of(m) != NORMAL ? type_of(m) != CASTLING : !empty(to_sq(m)); -} +inline bool Position::is_chess960() const { return chess960; } inline bool Position::capture(Move m) const { - assert(is_ok(m)); - // Castling is encoded as "king captures rook" - return (!empty(to_sq(m)) && type_of(m) != CASTLING) || type_of(m) == EN_PASSANT; + assert(m.is_ok()); + return (!empty(m.to_sq()) && m.type_of() != CASTLING) || m.type_of() == EN_PASSANT; } -inline Piece Position::captured_piece() const { - return st->capturedPiece; +// Returns true if a move is generated from the capture stage, having also +// queen promotions covered, i.e. consistency with the capture stage move generation +// is needed to avoid the generation of duplicate moves. +inline bool Position::capture_stage(Move m) const { + assert(m.is_ok()); + return capture(m) || m.promotion_type() == QUEEN; } -inline Thread* Position::this_thread() const { - return thisThread; -} +inline Piece Position::captured_piece() const { return st->capturedPiece; } inline void Position::put_piece(Piece pc, Square s) { - board[s] = pc; - byTypeBB[ALL_PIECES] |= byTypeBB[type_of(pc)] |= s; - byColorBB[color_of(pc)] |= s; - pieceCount[pc]++; - pieceCount[make_piece(color_of(pc), ALL_PIECES)]++; - psq += PSQT::psq[pc][s]; + board[s] = pc; + byTypeBB[ALL_PIECES] |= byTypeBB[type_of(pc)] |= s; + byColorBB[color_of(pc)] |= s; + pieceCount[pc]++; + pieceCount[make_piece(color_of(pc), ALL_PIECES)]++; } inline void Position::remove_piece(Square s) { - Piece pc = board[s]; - byTypeBB[ALL_PIECES] ^= s; - byTypeBB[type_of(pc)] ^= s; - byColorBB[color_of(pc)] ^= s; - board[s] = NO_PIECE; - pieceCount[pc]--; - pieceCount[make_piece(color_of(pc), ALL_PIECES)]--; - psq -= PSQT::psq[pc][s]; + Piece pc = board[s]; + byTypeBB[ALL_PIECES] ^= s; + byTypeBB[type_of(pc)] ^= s; + byColorBB[color_of(pc)] ^= s; + board[s] = NO_PIECE; + pieceCount[pc]--; + pieceCount[make_piece(color_of(pc), ALL_PIECES)]--; } inline void Position::move_piece(Square from, Square to) { - Piece pc = board[from]; - Bitboard fromTo = from | to; - byTypeBB[ALL_PIECES] ^= fromTo; - byTypeBB[type_of(pc)] ^= fromTo; - byColorBB[color_of(pc)] ^= fromTo; - board[from] = NO_PIECE; - board[to] = pc; - psq += PSQT::psq[pc][to] - PSQT::psq[pc][from]; + Piece pc = board[from]; + Bitboard fromTo = from | to; + byTypeBB[ALL_PIECES] ^= fromTo; + byTypeBB[type_of(pc)] ^= fromTo; + byColorBB[color_of(pc)] ^= fromTo; + board[from] = NO_PIECE; + board[to] = pc; } -inline void Position::do_move(Move m, StateInfo& newSt) { - do_move(m, newSt, gives_check(m)); -} +inline void Position::do_move(Move m, StateInfo& newSt) { do_move(m, newSt, gives_check(m)); } -inline StateInfo* Position::state() const { +inline StateInfo* Position::state() const { return st; } - return st; -} +} // namespace Stockfish -} // namespace Stockfish - -#endif // #ifndef POSITION_H_INCLUDED +#endif // #ifndef POSITION_H_INCLUDED diff --git a/src/psqt.cpp b/src/psqt.cpp deleted file mode 100644 index ca5664c2..00000000 --- a/src/psqt.cpp +++ /dev/null @@ -1,131 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - - -#include "psqt.h" - -#include - -#include "bitboard.h" -#include "types.h" - -namespace Stockfish { - -namespace -{ - -auto constexpr S = make_score; - -// 'Bonus' contains Piece-Square parameters. -// Scores are explicit for files A to D, implicitly mirrored for E to H. -constexpr Score Bonus[][RANK_NB][int(FILE_NB) / 2] = { - { }, - { }, - { // Knight - { S(-175, -96), S(-92,-65), S(-74,-49), S(-73,-21) }, - { S( -77, -67), S(-41,-54), S(-27,-18), S(-15, 8) }, - { S( -61, -40), S(-17,-27), S( 6, -8), S( 12, 29) }, - { S( -35, -35), S( 8, -2), S( 40, 13), S( 49, 28) }, - { S( -34, -45), S( 13,-16), S( 44, 9), S( 51, 39) }, - { S( -9, -51), S( 22,-44), S( 58,-16), S( 53, 17) }, - { S( -67, -69), S(-27,-50), S( 4,-51), S( 37, 12) }, - { S(-201,-100), S(-83,-88), S(-56,-56), S(-26,-17) } - }, - { // Bishop - { S(-37,-40), S(-4 ,-21), S( -6,-26), S(-16, -8) }, - { S(-11,-26), S( 6, -9), S( 13,-12), S( 3, 1) }, - { S(-5 ,-11), S( 15, -1), S( -4, -1), S( 12, 7) }, - { S(-4 ,-14), S( 8, -4), S( 18, 0), S( 27, 12) }, - { S(-8 ,-12), S( 20, -1), S( 15,-10), S( 22, 11) }, - { S(-11,-21), S( 4, 4), S( 1, 3), S( 8, 4) }, - { S(-12,-22), S(-10,-14), S( 4, -1), S( 0, 1) }, - { S(-34,-32), S( 1,-29), S(-10,-26), S(-16,-17) } - }, - { // Rook - { S(-31, -9), S(-20,-13), S(-14,-10), S(-5, -9) }, - { S(-21,-12), S(-13, -9), S( -8, -1), S( 6, -2) }, - { S(-25, 6), S(-11, -8), S( -1, -2), S( 3, -6) }, - { S(-13, -6), S( -5, 1), S( -4, -9), S(-6, 7) }, - { S(-27, -5), S(-15, 8), S( -4, 7), S( 3, -6) }, - { S(-22, 6), S( -2, 1), S( 6, -7), S(12, 10) }, - { S( -2, 4), S( 12, 5), S( 16, 20), S(18, -5) }, - { S(-17, 18), S(-19, 0), S( -1, 19), S( 9, 13) } - }, - { // Queen - { S( 3,-69), S(-5,-57), S(-5,-47), S( 4,-26) }, - { S(-3,-54), S( 5,-31), S( 8,-22), S(12, -4) }, - { S(-3,-39), S( 6,-18), S(13, -9), S( 7, 3) }, - { S( 4,-23), S( 5, -3), S( 9, 13), S( 8, 24) }, - { S( 0,-29), S(14, -6), S(12, 9), S( 5, 21) }, - { S(-4,-38), S(10,-18), S( 6,-11), S( 8, 1) }, - { S(-5,-50), S( 6,-27), S(10,-24), S( 8, -8) }, - { S(-2,-74), S(-2,-52), S( 1,-43), S(-2,-34) } - }, - { // King - { S(271, 1), S(327, 45), S(271, 85), S(198, 76) }, - { S(278, 53), S(303,100), S(234,133), S(179,135) }, - { S(195, 88), S(258,130), S(169,169), S(120,175) }, - { S(164,103), S(190,156), S(138,172), S( 98,172) }, - { S(154, 96), S(179,166), S(105,199), S( 70,199) }, - { S(123, 92), S(145,172), S( 81,184), S( 31,191) }, - { S( 88, 47), S(120,121), S( 65,116), S( 33,131) }, - { S( 59, 11), S( 89, 59), S( 45, 73), S( -1, 78) } - } -}; - -constexpr Score PBonus[RANK_NB][FILE_NB] = - { // Pawn (asymmetric distribution) - { }, - { S( 2, -8), S( 4, -6), S( 11, 9), S( 18, 5), S( 16, 16), S( 21, 6), S( 9, -6), S( -3,-18) }, - { S( -9, -9), S(-15, -7), S( 11,-10), S( 15, 5), S( 31, 2), S( 23, 3), S( 6, -8), S(-20, -5) }, - { S( -3, 7), S(-20, 1), S( 8, -8), S( 19, -2), S( 39,-14), S( 17,-13), S( 2,-11), S( -5, -6) }, - { S( 11, 12), S( -4, 6), S(-11, 2), S( 2, -6), S( 11, -5), S( 0, -4), S(-12, 14), S( 5, 9) }, - { S( 3, 27), S(-11, 18), S( -6, 19), S( 22, 29), S( -8, 30), S( -5, 9), S(-14, 8), S(-11, 14) }, - { S( -7, -1), S( 6,-14), S( -2, 13), S(-11, 22), S( 4, 24), S(-14, 17), S( 10, 7), S( -9, 7) } - }; - -} // namespace - - -namespace PSQT -{ - -Score psq[PIECE_NB][SQUARE_NB]; - -// PSQT::init() initializes piece-square tables: the white halves of the tables are -// copied from Bonus[] and PBonus[], adding the piece value, then the black halves of -// the tables are initialized by flipping and changing the sign of the white scores. -void init() { - - for (Piece pc : {W_PAWN, W_KNIGHT, W_BISHOP, W_ROOK, W_QUEEN, W_KING}) - { - Score score = make_score(PieceValue[MG][pc], PieceValue[EG][pc]); - - for (Square s = SQ_A1; s <= SQ_H8; ++s) - { - File f = File(edge_distance(file_of(s))); - psq[ pc][s] = score + (type_of(pc) == PAWN ? PBonus[rank_of(s)][file_of(s)] - : Bonus[pc][rank_of(s)][f]); - psq[~pc][flip_rank(s)] = -psq[pc][s]; - } - } -} - -} // namespace PSQT - -} // namespace Stockfish diff --git a/src/score.cpp b/src/score.cpp new file mode 100644 index 00000000..292f5340 --- /dev/null +++ b/src/score.cpp @@ -0,0 +1,48 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include "score.h" + +#include +#include +#include + +#include "uci.h" + +namespace Stockfish { + +Score::Score(Value v, const Position& pos) { + assert(-VALUE_INFINITE < v && v < VALUE_INFINITE); + + if (std::abs(v) < VALUE_TB_WIN_IN_MAX_PLY) + { + score = InternalUnits{UCIEngine::to_cp(v, pos)}; + } + else if (std::abs(v) <= VALUE_TB) + { + auto distance = VALUE_TB - std::abs(v); + score = (v > 0) ? Tablebase{distance, true} : Tablebase{-distance, false}; + } + else + { + auto distance = VALUE_MATE - std::abs(v); + score = (v > 0) ? Mate{distance} : Mate{-distance}; + } +} + +} \ No newline at end of file diff --git a/src/score.h b/src/score.h new file mode 100644 index 00000000..2eb40f7e --- /dev/null +++ b/src/score.h @@ -0,0 +1,70 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifndef SCORE_H_INCLUDED +#define SCORE_H_INCLUDED + +#include +#include + +#include "types.h" + +namespace Stockfish { + +class Position; + +class Score { + public: + struct Mate { + int plies; + }; + + struct Tablebase { + int plies; + bool win; + }; + + struct InternalUnits { + int value; + }; + + Score() = default; + Score(Value v, const Position& pos); + + template + bool is() const { + return std::holds_alternative(score); + } + + template + T get() const { + return std::get(score); + } + + template + decltype(auto) visit(F&& f) const { + return std::visit(std::forward(f), score); + } + + private: + std::variant score; +}; + +} + +#endif // #ifndef SCORE_H_INCLUDED diff --git a/src/search.cpp b/src/search.cpp index 6785ba4c..6830e4b1 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -16,560 +16,553 @@ along with this program. If not, see . */ +#include "search.h" + #include +#include +#include #include #include -#include // For std::memset -#include -#include +#include +#include +#include +#include +#include #include "evaluate.h" #include "misc.h" #include "movegen.h" #include "movepick.h" +#include "nnue/network.h" +#include "nnue/nnue_accumulator.h" +#include "nnue/nnue_common.h" +#include "nnue/nnue_misc.h" #include "position.h" -#include "search.h" +#include "syzygy/tbprobe.h" #include "thread.h" #include "timeman.h" #include "tt.h" #include "uci.h" -#include "syzygy/tbprobe.h" +#include "ucioption.h" namespace Stockfish { -namespace Search { - - LimitsType Limits; -} - -namespace Tablebases { - - int Cardinality; - bool RootInTB; - bool UseRule50; - Depth ProbeDepth; -} - namespace TB = Tablebases; -using std::string; using Eval::evaluate; using namespace Search; namespace { - // Different node types, used as a template parameter - enum NodeType { NonPV, PV, Root }; +static constexpr double EvalLevel[10] = {0.981, 0.956, 0.895, 0.949, 0.913, + 0.942, 0.933, 0.890, 0.984, 0.941}; - // Futility margin - Value futility_margin(Depth d, bool improving) { - return Value(168 * (d - improving)); - } +// Futility margin +Value futility_margin(Depth d, bool noTtCutNode, bool improving, bool oppWorsening) { + Value futilityMult = 126 - 46 * noTtCutNode; + Value improvingDeduction = 58 * improving * futilityMult / 32; + Value worseningDeduction = (323 + 52 * improving) * oppWorsening * futilityMult / 1024; - // Reductions lookup table, initialized at startup - int Reductions[MAX_MOVES]; // [depth or moveNumber] + return futilityMult * d - improvingDeduction - worseningDeduction; +} - Depth reduction(bool i, Depth d, int mn, Value delta, Value rootDelta) { - int r = Reductions[d] * Reductions[mn]; - return (r + 1463 - int(delta) * 1024 / int(rootDelta)) / 1024 + (!i && r > 1010); - } +constexpr int futility_move_count(bool improving, Depth depth) { + return improving ? (3 + depth * depth) : (3 + depth * depth) / 2; +} - constexpr int futility_move_count(bool improving, Depth depth) { - return (3 + depth * depth) / (2 - improving); - } +// Add correctionHistory value to raw staticEval and guarantee evaluation does not hit the tablebase range +Value to_corrected_static_eval(Value v, const Worker& w, const Position& pos) { + auto cv = w.correctionHistory[pos.side_to_move()][pawn_structure_index(pos)]; + v += cv * std::abs(cv) / 7350; + return std::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1); +} - // History and stats update bonus, based on depth - int stat_bonus(Depth d) { - return std::min((9 * d + 270) * d - 311 , 2145); - } +// History and stats update bonus, based on depth +int stat_bonus(Depth d) { return std::clamp(208 * d - 297, 16, 1406); } - // Add a small random component to draw evaluations to avoid 3-fold blindness - Value value_draw(Thread* thisThread) { - return VALUE_DRAW + Value(2 * (thisThread->nodes & 1) - 1); - } +// History and stats update malus, based on depth +int stat_malus(Depth d) { return (d < 4 ? 520 * d - 312 : 1479); } - // Skill structure is used to implement strength limit. If we have an uci_elo then - // we convert it to a suitable fractional skill level using anchoring to CCRL Elo - // (goldfish 1.13 = 2000) and a fit through Ordo derived Elo for match (TC 60+0.6) - // results spanning a wide range of k values. - struct Skill { +// Add a small random component to draw evaluations to avoid 3-fold blindness +Value value_draw(size_t nodes) { return VALUE_DRAW - 1 + Value(nodes & 0x2); } + +// Skill structure is used to implement strength limit. If we have a UCI_Elo, +// we convert it to an appropriate skill level, anchored to the Stash engine. +// This method is based on a fit of the Elo results for games played between +// Stockfish at various skill levels and various versions of the Stash engine. +// Skill 0 .. 19 now covers CCRL Blitz Elo from 1320 to 3190, approximately +// Reference: https://github.com/vondele/Stockfish/commit/a08b8d4e9711c2 +struct Skill { Skill(int skill_level, int uci_elo) { if (uci_elo) - level = std::clamp(std::pow((uci_elo - 1346.6) / 143.4, 1 / 0.806), 0.0, 20.0); + { + double e = double(uci_elo - 1320) / (3190 - 1320); + level = std::clamp((((37.2473 * e - 40.8525) * e + 22.2943) * e - 0.311438), 0.0, 19.0); + } else level = double(skill_level); } bool enabled() const { return level < 20.0; } bool time_to_pick(Depth depth) const { return depth == 1 + int(level); } - Move pick_best(size_t multiPV); + Move pick_best(const RootMoves&, size_t multiPV); double level; - Move best = MOVE_NONE; - }; + Move best = Move::none(); +}; - template - Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, bool cutNode); +Value value_to_tt(Value v, int ply); +Value value_from_tt(Value v, int ply, int r50c); +void update_pv(Move* pv, Move move, const Move* childPv); +void update_continuation_histories(Stack* ss, Piece pc, Square to, int bonus); +void update_refutations(const Position& pos, Stack* ss, Search::Worker& workerThread, Move move); +void update_quiet_histories( + const Position& pos, Stack* ss, Search::Worker& workerThread, Move move, int bonus); +void update_quiet_stats( + const Position& pos, Stack* ss, Search::Worker& workerThread, Move move, int bonus); +void update_all_stats(const Position& pos, + Stack* ss, + Search::Worker& workerThread, + Move bestMove, + Value bestValue, + Value beta, + Square prevSq, + Move* quietsSearched, + int quietCount, + Move* capturesSearched, + int captureCount, + Depth depth); - template - Value qsearch(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth = 0); +} // namespace - Value value_to_tt(Value v, int ply); - Value value_from_tt(Value v, int ply, int r50c); - void update_pv(Move* pv, Move move, Move* childPv); - void update_continuation_histories(Stack* ss, Piece pc, Square to, int bonus); - void update_quiet_stats(const Position& pos, Stack* ss, Move move, int bonus); - void update_all_stats(const Position& pos, Stack* ss, Move bestMove, Value bestValue, Value beta, Square prevSq, - Move* quietsSearched, int quietCount, Move* capturesSearched, int captureCount, Depth depth); +Search::Worker::Worker(SharedState& sharedState, + std::unique_ptr sm, + size_t thread_id) : + // Unpack the SharedState struct into member variables + thread_idx(thread_id), + manager(std::move(sm)), + options(sharedState.options), + threads(sharedState.threads), + tt(sharedState.tt), + networks(sharedState.networks), + refreshTable(networks) { + clear(); +} - // perft() is our utility to verify move generation. All the leaf nodes up - // to the given depth are generated and counted, and the sum is returned. - template - uint64_t perft(Position& pos, Depth depth) { +void Search::Worker::start_searching() { - StateInfo st; - ASSERT_ALIGNED(&st, Eval::NNUE::CacheLineSize); - - uint64_t cnt, nodes = 0; - const bool leaf = (depth == 2); - - for (const auto& m : MoveList(pos)) + // Non-main threads go directly to iterative_deepening() + if (!is_mainthread()) { - if (Root && depth <= 1) - cnt = 1, nodes++; + iterative_deepening(); + return; + } + + main_manager()->tm.init(limits, rootPos.side_to_move(), rootPos.game_ply(), options); + tt.new_search(); + + if (rootMoves.empty()) + { + rootMoves.emplace_back(Move::none()); + main_manager()->updates.onUpdateNoMoves( + {0, {rootPos.checkers() ? -VALUE_MATE : VALUE_DRAW, rootPos}}); + } + else + { + threads.start_searching(); // start non-main threads + iterative_deepening(); // main thread start searching + } + + // When we reach the maximum depth, we can arrive here without a raise of + // threads.stop. However, if we are pondering or in an infinite search, + // the UCI protocol states that we shouldn't print the best move before the + // GUI sends a "stop" or "ponderhit" command. We therefore simply wait here + // until the GUI sends one of those commands. + while (!threads.stop && (main_manager()->ponder || limits.infinite)) + {} // Busy wait for a stop or a ponder reset + + // Stop the threads if not already stopped (also raise the stop if + // "ponderhit" just reset threads.ponder). + threads.stop = true; + + // Wait until all threads have finished + threads.wait_for_search_finished(); + + // When playing in 'nodes as time' mode, subtract the searched nodes from + // the available ones before exiting. + if (limits.npmsec) + main_manager()->tm.advance_nodes_time(limits.inc[rootPos.side_to_move()] + - threads.nodes_searched()); + + Worker* bestThread = this; + Skill skill = + Skill(options["Skill Level"], options["UCI_LimitStrength"] ? int(options["UCI_Elo"]) : 0); + + if (int(options["MultiPV"]) == 1 && !limits.depth && !limits.mate && !skill.enabled() + && rootMoves[0].pv[0] != Move::none()) + bestThread = threads.get_best_thread()->worker.get(); + + main_manager()->bestPreviousScore = bestThread->rootMoves[0].score; + main_manager()->bestPreviousAverageScore = bestThread->rootMoves[0].averageScore; + + // Send again PV info if we have a new best thread + if (bestThread != this) + main_manager()->pv(*bestThread, threads, tt, bestThread->completedDepth); + + std::string ponder; + + if (bestThread->rootMoves[0].pv.size() > 1 + || bestThread->rootMoves[0].extract_ponder_from_tt(tt, rootPos)) + ponder = UCIEngine::move(bestThread->rootMoves[0].pv[1], rootPos.is_chess960()); + + auto bestmove = UCIEngine::move(bestThread->rootMoves[0].pv[0], rootPos.is_chess960()); + main_manager()->updates.onBestmove(bestmove, ponder); +} + +// Main iterative deepening loop. It calls search() +// repeatedly with increasing depth until the allocated thinking time has been +// consumed, the user stops the search, or the maximum search depth is reached. +void Search::Worker::iterative_deepening() { + + SearchManager* mainThread = (is_mainthread() ? main_manager() : nullptr); + + Move pv[MAX_PLY + 1]; + + Depth lastBestMoveDepth = 0; + Value lastBestScore = -VALUE_INFINITE; + auto lastBestPV = std::vector{Move::none()}; + + Value alpha, beta; + Value bestValue = -VALUE_INFINITE; + Color us = rootPos.side_to_move(); + double timeReduction = 1, totBestMoveChanges = 0; + int delta, iterIdx = 0; + + // Allocate stack with extra size to allow access from (ss - 7) to (ss + 2): + // (ss - 7) is needed for update_continuation_histories(ss - 1) which accesses (ss - 6), + // (ss + 2) is needed for initialization of cutOffCnt and killers. + Stack stack[MAX_PLY + 10] = {}; + Stack* ss = stack + 7; + + for (int i = 7; i > 0; --i) + { + (ss - i)->continuationHistory = + &this->continuationHistory[0][0][NO_PIECE][0]; // Use as a sentinel + (ss - i)->staticEval = VALUE_NONE; + } + + for (int i = 0; i <= MAX_PLY + 2; ++i) + (ss + i)->ply = i; + + ss->pv = pv; + + if (mainThread) + { + if (mainThread->bestPreviousScore == VALUE_INFINITE) + mainThread->iterValue.fill(VALUE_ZERO); else - { - pos.do_move(m, st); - cnt = leaf ? MoveList(pos).size() : perft(pos, depth - 1); - nodes += cnt; - pos.undo_move(m); - } - if (Root) - sync_cout << UCI::move(m, pos.is_chess960()) << ": " << cnt << sync_endl; + mainThread->iterValue.fill(mainThread->bestPreviousScore); } - return nodes; - } -} // namespace + size_t multiPV = size_t(options["MultiPV"]); + Skill skill(options["Skill Level"], options["UCI_LimitStrength"] ? int(options["UCI_Elo"]) : 0); + // When playing with strength handicap enable MultiPV search that we will + // use behind-the-scenes to retrieve a set of possible moves. + if (skill.enabled()) + multiPV = std::max(multiPV, size_t(4)); -/// Search::init() is called at startup to initialize various lookup tables + multiPV = std::min(multiPV, rootMoves.size()); -void Search::init() { + int searchAgainCounter = 0; - for (int i = 1; i < MAX_MOVES; ++i) - Reductions[i] = int((20.81 + std::log(Threads.size()) / 2) * std::log(i)); -} - - -/// Search::clear() resets search state to its initial value - -void Search::clear() { - - Threads.main()->wait_for_search_finished(); - - Time.availableNodes = 0; - TT.clear(); - Threads.clear(); - Tablebases::init(Options["SyzygyPath"]); // Free mapped files -} - - -/// MainThread::search() is started when the program receives the UCI 'go' -/// command. It searches from the root position and outputs the "bestmove". - -void MainThread::search() { - - if (Limits.perft) - { - nodes = perft(rootPos, Limits.perft); - sync_cout << "\nNodes searched: " << nodes << "\n" << sync_endl; - return; - } - - Color us = rootPos.side_to_move(); - Time.init(Limits, us, rootPos.game_ply()); - TT.new_search(); - - Eval::NNUE::verify(); - - if (rootMoves.empty()) - { - rootMoves.emplace_back(MOVE_NONE); - sync_cout << "info depth 0 score " - << UCI::value(rootPos.checkers() ? -VALUE_MATE : VALUE_DRAW) - << sync_endl; - } - else - { - Threads.start_searching(); // start non-main threads - Thread::search(); // main thread start searching - } - - // When we reach the maximum depth, we can arrive here without a raise of - // Threads.stop. However, if we are pondering or in an infinite search, - // the UCI protocol states that we shouldn't print the best move before the - // GUI sends a "stop" or "ponderhit" command. We therefore simply wait here - // until the GUI sends one of those commands. - - while (!Threads.stop && (ponder || Limits.infinite)) - {} // Busy wait for a stop or a ponder reset - - // Stop the threads if not already stopped (also raise the stop if - // "ponderhit" just reset Threads.ponder). - Threads.stop = true; - - // Wait until all threads have finished - Threads.wait_for_search_finished(); - - // When playing in 'nodes as time' mode, subtract the searched nodes from - // the available ones before exiting. - if (Limits.npmsec) - Time.availableNodes += Limits.inc[us] - Threads.nodes_searched(); - - Thread* bestThread = this; - Skill skill = Skill(Options["Skill Level"], Options["UCI_LimitStrength"] ? int(Options["UCI_Elo"]) : 0); - - if ( int(Options["MultiPV"]) == 1 - && !Limits.depth - && !skill.enabled() - && rootMoves[0].pv[0] != MOVE_NONE) - bestThread = Threads.get_best_thread(); - - bestPreviousScore = bestThread->rootMoves[0].score; - bestPreviousAverageScore = bestThread->rootMoves[0].averageScore; - - // Send again PV info if we have a new best thread - if (bestThread != this) - sync_cout << UCI::pv(bestThread->rootPos, bestThread->completedDepth, -VALUE_INFINITE, VALUE_INFINITE) << sync_endl; - - sync_cout << "bestmove " << UCI::move(bestThread->rootMoves[0].pv[0], rootPos.is_chess960()); - - if (bestThread->rootMoves[0].pv.size() > 1 || bestThread->rootMoves[0].extract_ponder_from_tt(rootPos)) - std::cout << " ponder " << UCI::move(bestThread->rootMoves[0].pv[1], rootPos.is_chess960()); - - std::cout << sync_endl; -} - - -/// Thread::search() is the main iterative deepening loop. It calls search() -/// repeatedly with increasing depth until the allocated thinking time has been -/// consumed, the user stops the search, or the maximum search depth is reached. - -void Thread::search() { - - // To allow access to (ss-7) up to (ss+2), the stack must be oversized. - // The former is needed to allow update_continuation_histories(ss-1, ...), - // which accesses its argument at ss-6, also near the root. - // The latter is needed for statScore and killer initialization. - Stack stack[MAX_PLY+10], *ss = stack+7; - Move pv[MAX_PLY+1]; - Value alpha, beta, delta; - Move lastBestMove = MOVE_NONE; - Depth lastBestMoveDepth = 0; - MainThread* mainThread = (this == Threads.main() ? Threads.main() : nullptr); - double timeReduction = 1, totBestMoveChanges = 0; - Color us = rootPos.side_to_move(); - int iterIdx = 0; - - std::memset(ss-7, 0, 10 * sizeof(Stack)); - for (int i = 7; i > 0; i--) - (ss-i)->continuationHistory = &this->continuationHistory[0][0][NO_PIECE][0]; // Use as a sentinel - - for (int i = 0; i <= MAX_PLY + 2; ++i) - (ss+i)->ply = i; - - ss->pv = pv; - - bestValue = delta = alpha = -VALUE_INFINITE; - beta = VALUE_INFINITE; - - if (mainThread) - { - if (mainThread->bestPreviousScore == VALUE_INFINITE) - for (int i = 0; i < 4; ++i) - mainThread->iterValue[i] = VALUE_ZERO; - else - for (int i = 0; i < 4; ++i) - mainThread->iterValue[i] = mainThread->bestPreviousScore; - } - - size_t multiPV = size_t(Options["MultiPV"]); - Skill skill(Options["Skill Level"], Options["UCI_LimitStrength"] ? int(Options["UCI_Elo"]) : 0); - - // When playing with strength handicap enable MultiPV search that we will - // use behind the scenes to retrieve a set of possible moves. - if (skill.enabled()) - multiPV = std::max(multiPV, (size_t)4); - - multiPV = std::min(multiPV, rootMoves.size()); - - complexityAverage.set(202, 1); - - trend = SCORE_ZERO; - optimism[ us] = Value(39); - optimism[~us] = -optimism[us]; - - int searchAgainCounter = 0; - - // Iterative deepening loop until requested to stop or the target depth is reached - while ( ++rootDepth < MAX_PLY - && !Threads.stop - && !(Limits.depth && mainThread && rootDepth > Limits.depth)) - { - // Age out PV variability metric - if (mainThread) - totBestMoveChanges /= 2; - - // Save the last iteration's scores before first PV line is searched and - // all the move scores except the (new) PV are set to -VALUE_INFINITE. - for (RootMove& rm : rootMoves) - rm.previousScore = rm.score; - - size_t pvFirst = 0; - pvLast = 0; - - if (!Threads.increaseDepth) - searchAgainCounter++; - - // MultiPV loop. We perform a full root search for each PV line - for (pvIdx = 0; pvIdx < multiPV && !Threads.stop; ++pvIdx) - { - if (pvIdx == pvLast) - { - pvFirst = pvLast; - for (pvLast++; pvLast < rootMoves.size(); pvLast++) - if (rootMoves[pvLast].tbRank != rootMoves[pvFirst].tbRank) - break; - } - - // Reset UCI info selDepth for each depth and each PV line - selDepth = 0; - - // Reset aspiration window starting size - if (rootDepth >= 4) - { - Value prev = rootMoves[pvIdx].averageScore; - delta = Value(16) + int(prev) * prev / 19178; - alpha = std::max(prev - delta,-VALUE_INFINITE); - beta = std::min(prev + delta, VALUE_INFINITE); - - // Adjust trend and optimism based on root move's previousScore - int tr = sigmoid(prev, 3, 8, 90, 125, 1); - trend = (us == WHITE ? make_score(tr, tr / 2) - : -make_score(tr, tr / 2)); - - int opt = sigmoid(prev, 8, 17, 144, 13966, 183); - optimism[ us] = Value(opt); - optimism[~us] = -optimism[us]; - } - - // Start with a small aspiration window and, in the case of a fail - // high/low, re-search with a bigger window until we don't fail - // high/low anymore. - int failedHighCnt = 0; - while (true) - { - Depth adjustedDepth = std::max(1, rootDepth - failedHighCnt - searchAgainCounter); - bestValue = Stockfish::search(rootPos, ss, alpha, beta, adjustedDepth, false); - - // Bring the best move to the front. It is critical that sorting - // is done with a stable algorithm because all the values but the - // first and eventually the new best one are set to -VALUE_INFINITE - // and we want to keep the same order for all the moves except the - // new PV that goes to the front. Note that in case of MultiPV - // search the already searched PV lines are preserved. - std::stable_sort(rootMoves.begin() + pvIdx, rootMoves.begin() + pvLast); - - // If search has been stopped, we break immediately. Sorting is - // safe because RootMoves is still valid, although it refers to - // the previous iteration. - if (Threads.stop) - break; - - // When failing high/low give some update (without cluttering - // the UI) before a re-search. - if ( mainThread - && multiPV == 1 - && (bestValue <= alpha || bestValue >= beta) - && Time.elapsed() > 3000) - sync_cout << UCI::pv(rootPos, rootDepth, alpha, beta) << sync_endl; - - // In case of failing low/high increase aspiration window and - // re-search, otherwise exit the loop. - if (bestValue <= alpha) - { - beta = (alpha + beta) / 2; - alpha = std::max(bestValue - delta, -VALUE_INFINITE); - - failedHighCnt = 0; - if (mainThread) - mainThread->stopOnPonderhit = false; - } - else if (bestValue >= beta) - { - beta = std::min(bestValue + delta, VALUE_INFINITE); - ++failedHighCnt; - } - else - break; - - delta += delta / 4 + 2; - - assert(alpha >= -VALUE_INFINITE && beta <= VALUE_INFINITE); - } - - // Sort the PV lines searched so far and update the GUI - std::stable_sort(rootMoves.begin() + pvFirst, rootMoves.begin() + pvIdx + 1); - - if ( mainThread - && (Threads.stop || pvIdx + 1 == multiPV || Time.elapsed() > 3000)) - sync_cout << UCI::pv(rootPos, rootDepth, alpha, beta) << sync_endl; - } - - if (!Threads.stop) - completedDepth = rootDepth; - - if (rootMoves[0].pv[0] != lastBestMove) { - lastBestMove = rootMoves[0].pv[0]; - lastBestMoveDepth = rootDepth; - } - - // Have we found a "mate in x"? - if ( Limits.mate - && bestValue >= VALUE_MATE_IN_MAX_PLY - && VALUE_MATE - bestValue <= 2 * Limits.mate) - Threads.stop = true; - - if (!mainThread) - continue; - - // If skill level is enabled and time is up, pick a sub-optimal best move - if (skill.enabled() && skill.time_to_pick(rootDepth)) - skill.pick_best(multiPV); - - // Use part of the gained time from a previous stable move for the current move - for (Thread* th : Threads) - { - totBestMoveChanges += th->bestMoveChanges; - th->bestMoveChanges = 0; - } - - // Do we have time for the next iteration? Can we stop searching now? - if ( Limits.use_time_management() - && !Threads.stop - && !mainThread->stopOnPonderhit) - { - double fallingEval = (69 + 12 * (mainThread->bestPreviousAverageScore - bestValue) - + 6 * (mainThread->iterValue[iterIdx] - bestValue)) / 781.4; - fallingEval = std::clamp(fallingEval, 0.5, 1.5); - - // If the bestMove is stable over several iterations, reduce time accordingly - timeReduction = lastBestMoveDepth + 10 < completedDepth ? 1.63 : 0.73; - double reduction = (1.56 + mainThread->previousTimeReduction) / (2.20 * timeReduction); - double bestMoveInstability = 1.073 + std::max(1.0, 2.25 - 9.9 / rootDepth) - * totBestMoveChanges / Threads.size(); - int complexity = mainThread->complexityAverage.value(); - double complexPosition = std::clamp(1.0 + (complexity - 326) / 1618.1, 0.5, 1.5); - - double totalTime = Time.optimum() * fallingEval * reduction * bestMoveInstability * complexPosition; - - // Cap used time in case of a single legal move for a better viewer experience in tournaments - // yielding correct scores and sufficiently fast moves. - if (rootMoves.size() == 1) - totalTime = std::min(500.0, totalTime); - - // Stop the search if we have exceeded the totalTime - if (Time.elapsed() > totalTime) - { - // If we are allowed to ponder do not stop the search now but - // keep pondering until the GUI sends "ponderhit" or "stop". - if (mainThread->ponder) - mainThread->stopOnPonderhit = true; - else - Threads.stop = true; - } - else if ( Threads.increaseDepth - && !mainThread->ponder - && Time.elapsed() > totalTime * 0.43) - Threads.increaseDepth = false; - else - Threads.increaseDepth = true; - } - - mainThread->iterValue[iterIdx] = bestValue; - iterIdx = (iterIdx + 1) & 3; - } - - if (!mainThread) - return; - - mainThread->previousTimeReduction = timeReduction; - - // If skill level is enabled, swap best PV line with the sub-optimal one - if (skill.enabled()) - std::swap(rootMoves[0], *std::find(rootMoves.begin(), rootMoves.end(), - skill.best ? skill.best : skill.pick_best(multiPV))); -} - - -namespace { - - // search<>() is the main search function for both PV and non-PV nodes - - template - Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, bool cutNode) { - - constexpr bool PvNode = nodeType != NonPV; - constexpr bool rootNode = nodeType == Root; - const Depth maxNextDepth = rootNode ? depth : depth + 1; - - // Check if we have an upcoming move which draws by repetition, or - // if the opponent had an alternative move earlier to this position. - if ( !rootNode - && pos.rule50_count() >= 3 - && alpha < VALUE_DRAW - && pos.has_game_cycle(ss->ply)) + // Iterative deepening loop until requested to stop or the target depth is reached + while (++rootDepth < MAX_PLY && !threads.stop + && !(limits.depth && mainThread && rootDepth > limits.depth)) { - alpha = value_draw(pos.this_thread()); - if (alpha >= beta) - return alpha; + // Age out PV variability metric + if (mainThread) + totBestMoveChanges /= 2; + + // Save the last iteration's scores before the first PV line is searched and + // all the move scores except the (new) PV are set to -VALUE_INFINITE. + for (RootMove& rm : rootMoves) + rm.previousScore = rm.score; + + size_t pvFirst = 0; + pvLast = 0; + + if (!threads.increaseDepth) + searchAgainCounter++; + + // MultiPV loop. We perform a full root search for each PV line + for (pvIdx = 0; pvIdx < multiPV && !threads.stop; ++pvIdx) + { + if (pvIdx == pvLast) + { + pvFirst = pvLast; + for (pvLast++; pvLast < rootMoves.size(); pvLast++) + if (rootMoves[pvLast].tbRank != rootMoves[pvFirst].tbRank) + break; + } + + // Reset UCI info selDepth for each depth and each PV line + selDepth = 0; + + // Reset aspiration window starting size + Value avg = rootMoves[pvIdx].averageScore; + delta = 10 + avg * avg / 9530; + alpha = std::max(avg - delta, -VALUE_INFINITE); + beta = std::min(avg + delta, VALUE_INFINITE); + + // Adjust optimism based on root move's averageScore (~4 Elo) + optimism[us] = 119 * avg / (std::abs(avg) + 88); + optimism[~us] = -optimism[us]; + + // Start with a small aspiration window and, in the case of a fail + // high/low, re-search with a bigger window until we don't fail + // high/low anymore. + int failedHighCnt = 0; + while (true) + { + // Adjust the effective depth searched, but ensure at least one effective increment + // for every four searchAgain steps (see issue #2717). + Depth adjustedDepth = + std::max(1, rootDepth - failedHighCnt - 3 * (searchAgainCounter + 1) / 4); + bestValue = search(rootPos, ss, alpha, beta, adjustedDepth, false); + + // Bring the best move to the front. It is critical that sorting + // is done with a stable algorithm because all the values but the + // first and eventually the new best one is set to -VALUE_INFINITE + // and we want to keep the same order for all the moves except the + // new PV that goes to the front. Note that in the case of MultiPV + // search the already searched PV lines are preserved. + std::stable_sort(rootMoves.begin() + pvIdx, rootMoves.begin() + pvLast); + + // If search has been stopped, we break immediately. Sorting is + // safe because RootMoves is still valid, although it refers to + // the previous iteration. + if (threads.stop) + break; + + // When failing high/low give some update (without cluttering + // the UI) before a re-search. + if (mainThread && multiPV == 1 && (bestValue <= alpha || bestValue >= beta) + && elapsed() > 3000) + main_manager()->pv(*this, threads, tt, rootDepth); + + // In case of failing low/high increase aspiration window and + // re-search, otherwise exit the loop. + if (bestValue <= alpha) + { + beta = (alpha + beta) / 2; + alpha = std::max(bestValue - delta, -VALUE_INFINITE); + + failedHighCnt = 0; + if (mainThread) + mainThread->stopOnPonderhit = false; + } + else if (bestValue >= beta) + { + beta = std::min(bestValue + delta, VALUE_INFINITE); + ++failedHighCnt; + } + else + break; + + delta += delta / 3; + + assert(alpha >= -VALUE_INFINITE && beta <= VALUE_INFINITE); + } + + // Sort the PV lines searched so far and update the GUI + std::stable_sort(rootMoves.begin() + pvFirst, rootMoves.begin() + pvIdx + 1); + + if (mainThread + && (threads.stop || pvIdx + 1 == multiPV || elapsed() > 3000) + // A thread that aborted search can have mated-in/TB-loss PV and score + // that cannot be trusted, i.e. it can be delayed or refuted if we would have + // had time to fully search other root-moves. Thus we suppress this output and + // below pick a proven score/PV for this thread (from the previous iteration). + && !(threads.abortedSearch && rootMoves[0].uciScore <= VALUE_TB_LOSS_IN_MAX_PLY)) + main_manager()->pv(*this, threads, tt, rootDepth); + } + + if (!threads.stop) + completedDepth = rootDepth; + + // We make sure not to pick an unproven mated-in score, + // in case this thread prematurely stopped search (aborted-search). + if (threads.abortedSearch && rootMoves[0].score != -VALUE_INFINITE + && rootMoves[0].score <= VALUE_TB_LOSS_IN_MAX_PLY) + { + // Bring the last best move to the front for best thread selection. + Utility::move_to_front(rootMoves, [&lastBestPV = std::as_const(lastBestPV)]( + const auto& rm) { return rm == lastBestPV[0]; }); + rootMoves[0].pv = lastBestPV; + rootMoves[0].score = rootMoves[0].uciScore = lastBestScore; + } + else if (rootMoves[0].pv[0] != lastBestPV[0]) + { + lastBestPV = rootMoves[0].pv; + lastBestScore = rootMoves[0].score; + lastBestMoveDepth = rootDepth; + } + + if (!mainThread) + continue; + + // Have we found a "mate in x"? + if (limits.mate && rootMoves[0].score == rootMoves[0].uciScore + && ((rootMoves[0].score >= VALUE_MATE_IN_MAX_PLY + && VALUE_MATE - rootMoves[0].score <= 2 * limits.mate) + || (rootMoves[0].score != -VALUE_INFINITE + && rootMoves[0].score <= VALUE_MATED_IN_MAX_PLY + && VALUE_MATE + rootMoves[0].score <= 2 * limits.mate))) + threads.stop = true; + + // If the skill level is enabled and time is up, pick a sub-optimal best move + if (skill.enabled() && skill.time_to_pick(rootDepth)) + skill.pick_best(rootMoves, multiPV); + + // Use part of the gained time from a previous stable move for the current move + for (Thread* th : threads) + { + totBestMoveChanges += th->worker->bestMoveChanges; + th->worker->bestMoveChanges = 0; + } + + // Do we have time for the next iteration? Can we stop searching now? + if (limits.use_time_management() && !threads.stop && !mainThread->stopOnPonderhit) + { + int nodesEffort = rootMoves[0].effort * 100 / std::max(size_t(1), size_t(nodes)); + + double fallingEval = (1067 + 223 * (mainThread->bestPreviousAverageScore - bestValue) + + 97 * (mainThread->iterValue[iterIdx] - bestValue)) + / 10000.0; + fallingEval = std::clamp(fallingEval, 0.580, 1.667); + + // If the bestMove is stable over several iterations, reduce time accordingly + timeReduction = lastBestMoveDepth + 8 < completedDepth ? 1.495 : 0.687; + double reduction = (1.48 + mainThread->previousTimeReduction) / (2.17 * timeReduction); + double bestMoveInstability = 1 + 1.88 * totBestMoveChanges / threads.size(); + int el = std::clamp((bestValue + 750) / 150, 0, 9); + double recapture = limits.capSq == rootMoves[0].pv[0].to_sq() ? 0.955 : 1.005; + + double totalTime = mainThread->tm.optimum() * fallingEval * reduction + * bestMoveInstability * EvalLevel[el] * recapture; + + // Cap used time in case of a single legal move for a better viewer experience + if (rootMoves.size() == 1) + totalTime = std::min(500.0, totalTime); + + auto elapsedTime = elapsed(); + + if (completedDepth >= 10 && nodesEffort >= 97 && elapsedTime > totalTime * 0.739 + && !mainThread->ponder) + threads.stop = true; + + // Stop the search if we have exceeded the totalTime + if (elapsedTime > totalTime) + { + // If we are allowed to ponder do not stop the search now but + // keep pondering until the GUI sends "ponderhit" or "stop". + if (mainThread->ponder) + mainThread->stopOnPonderhit = true; + else + threads.stop = true; + } + else + threads.increaseDepth = mainThread->ponder || elapsedTime <= totalTime * 0.506; + } + + mainThread->iterValue[iterIdx] = bestValue; + iterIdx = (iterIdx + 1) & 3; } + if (!mainThread) + return; + + mainThread->previousTimeReduction = timeReduction; + + // If the skill level is enabled, swap the best PV line with the sub-optimal one + if (skill.enabled()) + std::swap(rootMoves[0], + *std::find(rootMoves.begin(), rootMoves.end(), + skill.best ? skill.best : skill.pick_best(rootMoves, multiPV))); +} + +void Search::Worker::clear() { + counterMoves.fill(Move::none()); + mainHistory.fill(0); + captureHistory.fill(0); + pawnHistory.fill(0); + correctionHistory.fill(0); + + for (bool inCheck : {false, true}) + for (StatsType c : {NoCaptures, Captures}) + for (auto& to : continuationHistory[inCheck][c]) + for (auto& h : to) + h->fill(-60); + + for (size_t i = 1; i < reductions.size(); ++i) + reductions[i] = int((18.93 + std::log(size_t(options["Threads"])) / 2) * std::log(i)); + + refreshTable.clear(networks); +} + + +// Main search function for both PV and non-PV nodes. +template +Value Search::Worker::search( + Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, bool cutNode) { + + constexpr bool PvNode = nodeType != NonPV; + constexpr bool rootNode = nodeType == Root; + // Dive into quiescence search when the depth reaches zero if (depth <= 0) - return qsearch(pos, ss, alpha, beta); + return qsearch < PvNode ? PV : NonPV > (pos, ss, alpha, beta); + + // Check if we have an upcoming move that draws by repetition, or + // if the opponent had an alternative move earlier to this position. + if (!rootNode && alpha < VALUE_DRAW && pos.has_game_cycle(ss->ply)) + { + alpha = value_draw(this->nodes); + if (alpha >= beta) + return alpha; + } assert(-VALUE_INFINITE <= alpha && alpha < beta && beta <= VALUE_INFINITE); assert(PvNode || (alpha == beta - 1)); assert(0 < depth && depth < MAX_PLY); assert(!(PvNode && cutNode)); - Move pv[MAX_PLY+1], capturesSearched[32], quietsSearched[64]; + Move pv[MAX_PLY + 1], capturesSearched[32], quietsSearched[32]; StateInfo st; ASSERT_ALIGNED(&st, Eval::NNUE::CacheLineSize); TTEntry* tte; - Key posKey; - Move ttMove, move, excludedMove, bestMove; - Depth extension, newDepth; - Value bestValue, value, ttValue, eval, maxValue, probCutBeta; - bool givesCheck, improving, didLMR, priorCapture; - bool captureOrPromotion, doFullDepthSearch, moveCountPruning, ttCapture; - Piece movedPiece; - int moveCount, captureCount, quietCount, bestMoveCount, improvement, complexity; + Key posKey; + Move ttMove, move, excludedMove, bestMove; + Depth extension, newDepth; + Value bestValue, value, ttValue, eval, maxValue, probCutBeta; + bool givesCheck, improving, priorCapture, opponentWorsening; + bool capture, moveCountPruning, ttCapture; + Piece movedPiece; + int moveCount, captureCount, quietCount; // Step 1. Initialize node - Thread* thisThread = pos.this_thread(); + Worker* thisThread = this; ss->inCheck = pos.checkers(); priorCapture = pos.captured_piece(); Color us = pos.side_to_move(); - moveCount = bestMoveCount = captureCount = quietCount = ss->moveCount = 0; - bestValue = -VALUE_INFINITE; - maxValue = VALUE_INFINITE; + moveCount = captureCount = quietCount = ss->moveCount = 0; + bestValue = -VALUE_INFINITE; + maxValue = VALUE_INFINITE; // Check for the available remaining time - if (thisThread == Threads.main()) - static_cast(thisThread)->check_time(); + if (is_mainthread()) + main_manager()->check_time(*thisThread); // Used to send selDepth info to GUI (selDepth counts from 1, ply from 0) if (PvNode && thisThread->selDepth < ss->ply + 1) @@ -578,20 +571,20 @@ namespace { if (!rootNode) { // Step 2. Check for aborted search and immediate draw - if ( Threads.stop.load(std::memory_order_relaxed) - || pos.is_draw(ss->ply) + if (threads.stop.load(std::memory_order_relaxed) || pos.is_draw(ss->ply) || ss->ply >= MAX_PLY) - return (ss->ply >= MAX_PLY && !ss->inCheck) ? evaluate(pos) - : value_draw(pos.this_thread()); + return (ss->ply >= MAX_PLY && !ss->inCheck) + ? evaluate(networks, pos, refreshTable, thisThread->optimism[us]) + : value_draw(thisThread->nodes); // Step 3. Mate distance pruning. Even if we mate at the next move our score - // would be at best mate_in(ss->ply+1), but if alpha is already bigger because + // would be at best mate_in(ss->ply + 1), but if alpha is already bigger because // a shorter mate was found upward in the tree then there is no need to search // because we will never beat the current alpha. Same logic but with reversed - // signs applies also in the opposite condition of being mated instead of giving - // mate. In this case return a fail-high score. + // signs apply also in the opposite condition of being mated instead of giving + // mate. In this case, return a fail-high score. alpha = std::max(mated_in(ss->ply), alpha); - beta = std::min(mate_in(ss->ply+1), beta); + beta = std::min(mate_in(ss->ply + 1), beta); if (alpha >= beta) return alpha; } @@ -600,107 +593,92 @@ namespace { assert(0 <= ss->ply && ss->ply < MAX_PLY); - (ss+1)->ttPv = false; - (ss+1)->excludedMove = bestMove = MOVE_NONE; - (ss+2)->killers[0] = (ss+2)->killers[1] = MOVE_NONE; - ss->doubleExtensions = (ss-1)->doubleExtensions; - ss->depth = depth; - Square prevSq = to_sq((ss-1)->currentMove); + bestMove = Move::none(); + (ss + 2)->killers[0] = (ss + 2)->killers[1] = Move::none(); + (ss + 2)->cutoffCnt = 0; + Square prevSq = ((ss - 1)->currentMove).is_ok() ? ((ss - 1)->currentMove).to_sq() : SQ_NONE; + ss->statScore = 0; - // Initialize statScore to zero for the grandchildren of the current position. - // So statScore is shared between all grandchildren and only the first grandchild - // starts with statScore = 0. Later grandchildren start with the last calculated - // statScore of the previous grandchild. This influences the reduction rules in - // LMR which are based on the statScore of parent position. - if (!rootNode) - (ss+2)->statScore = 0; - - // Step 4. Transposition table lookup. We don't want the score of a partial - // search to overwrite a previous full search TT value, so we use a different - // position key in case of an excluded move. + // Step 4. Transposition table lookup. excludedMove = ss->excludedMove; - posKey = excludedMove == MOVE_NONE ? pos.key() : pos.key() ^ make_key(excludedMove); - tte = TT.probe(posKey, ss->ttHit); - ttValue = ss->ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE; - ttMove = rootNode ? thisThread->rootMoves[thisThread->pvIdx].pv[0] - : ss->ttHit ? tte->move() : MOVE_NONE; - ttCapture = ttMove && pos.capture_or_promotion(ttMove); + posKey = pos.key(); + tte = tt.probe(posKey, ss->ttHit); + ttValue = ss->ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE; + ttMove = rootNode ? thisThread->rootMoves[thisThread->pvIdx].pv[0] + : ss->ttHit ? tte->move() + : Move::none(); + ttCapture = ttMove && pos.capture_stage(ttMove); + + // At this point, if excluded, skip straight to step 6, static eval. However, + // to save indentation, we list the condition in all code between here and there. if (!excludedMove) ss->ttPv = PvNode || (ss->ttHit && tte->is_pv()); // At non-PV nodes we check for an early TT cutoff - if ( !PvNode - && ss->ttHit - && tte->depth() > depth - (thisThread->id() % 2 == 1) - && ttValue != VALUE_NONE // Possible in case of TT access race - && (ttValue >= beta ? (tte->bound() & BOUND_LOWER) - : (tte->bound() & BOUND_UPPER))) + if (!PvNode && !excludedMove && tte->depth() > depth + && ttValue != VALUE_NONE // Possible in case of TT access race or if !ttHit + && (tte->bound() & (ttValue >= beta ? BOUND_LOWER : BOUND_UPPER))) { - // If ttMove is quiet, update move sorting heuristics on TT hit (~1 Elo) - if (ttMove) + // If ttMove is quiet, update move sorting heuristics on TT hit (~2 Elo) + if (ttMove && ttValue >= beta) { - if (ttValue >= beta) - { - // Bonus for a quiet ttMove that fails high (~3 Elo) - if (!ttCapture) - update_quiet_stats(pos, ss, ttMove, stat_bonus(depth)); + // Bonus for a quiet ttMove that fails high (~2 Elo) + if (!ttCapture) + update_quiet_stats(pos, ss, *this, ttMove, stat_bonus(depth)); - // Extra penalty for early quiet moves of the previous ply (~0 Elo) - if ((ss-1)->moveCount <= 2 && !priorCapture) - update_continuation_histories(ss-1, pos.piece_on(prevSq), prevSq, -stat_bonus(depth + 1)); - } - // Penalty for a quiet ttMove that fails low (~1 Elo) - else if (!ttCapture) - { - int penalty = -stat_bonus(depth); - thisThread->mainHistory[us][from_to(ttMove)] << penalty; - update_continuation_histories(ss, pos.moved_piece(ttMove), to_sq(ttMove), penalty); - } + // Extra penalty for early quiet moves of + // the previous ply (~1 Elo on STC, ~2 Elo on LTC) + if (prevSq != SQ_NONE && (ss - 1)->moveCount <= 2 && !priorCapture) + update_continuation_histories(ss - 1, pos.piece_on(prevSq), prevSq, + -stat_malus(depth + 1)); } // Partial workaround for the graph history interaction problem // For high rule50 counts don't produce transposition table cutoffs. if (pos.rule50_count() < 90) - return ttValue; + return ttValue >= beta && std::abs(ttValue) < VALUE_TB_WIN_IN_MAX_PLY + ? (ttValue * 3 + beta) / 4 + : ttValue; } // Step 5. Tablebases probe - if (!rootNode && TB::Cardinality) + if (!rootNode && !excludedMove && tbConfig.cardinality) { int piecesCount = pos.count(); - if ( piecesCount <= TB::Cardinality - && (piecesCount < TB::Cardinality || depth >= TB::ProbeDepth) - && pos.rule50_count() == 0 - && !pos.can_castle(ANY_CASTLING)) + if (piecesCount <= tbConfig.cardinality + && (piecesCount < tbConfig.cardinality || depth >= tbConfig.probeDepth) + && pos.rule50_count() == 0 && !pos.can_castle(ANY_CASTLING)) { TB::ProbeState err; - TB::WDLScore wdl = Tablebases::probe_wdl(pos, &err); + TB::WDLScore wdl = Tablebases::probe_wdl(pos, &err); // Force check of time on the next occasion - if (thisThread == Threads.main()) - static_cast(thisThread)->callsCnt = 0; + if (is_mainthread()) + main_manager()->callsCnt = 0; if (err != TB::ProbeState::FAIL) { thisThread->tbHits.fetch_add(1, std::memory_order_relaxed); - int drawScore = TB::UseRule50 ? 1 : 0; + int drawScore = tbConfig.useRule50 ? 1 : 0; - // use the range VALUE_MATE_IN_MAX_PLY to VALUE_TB_WIN_IN_MAX_PLY to score - value = wdl < -drawScore ? VALUE_MATED_IN_MAX_PLY + ss->ply + 1 - : wdl > drawScore ? VALUE_MATE_IN_MAX_PLY - ss->ply - 1 - : VALUE_DRAW + 2 * wdl * drawScore; + Value tbValue = VALUE_TB - ss->ply; - Bound b = wdl < -drawScore ? BOUND_UPPER - : wdl > drawScore ? BOUND_LOWER : BOUND_EXACT; + // use the range VALUE_TB to VALUE_TB_WIN_IN_MAX_PLY to score + value = wdl < -drawScore ? -tbValue + : wdl > drawScore ? tbValue + : VALUE_DRAW + 2 * wdl * drawScore; - if ( b == BOUND_EXACT - || (b == BOUND_LOWER ? value >= beta : value <= alpha)) + Bound b = wdl < -drawScore ? BOUND_UPPER + : wdl > drawScore ? BOUND_LOWER + : BOUND_EXACT; + + if (b == BOUND_EXACT || (b == BOUND_LOWER ? value >= beta : value <= alpha)) { tte->save(posKey, value_to_tt(value, ss->ply), ss->ttPv, b, - std::min(MAX_PLY - 1, depth + 6), - MOVE_NONE, VALUE_NONE); + std::min(MAX_PLY - 1, depth + 6), Move::none(), VALUE_NONE, + tt.generation()); return value; } @@ -716,126 +694,122 @@ namespace { } } - CapturePieceToHistory& captureHistory = thisThread->captureHistory; - // Step 6. Static evaluation of the position + Value unadjustedStaticEval = VALUE_NONE; if (ss->inCheck) { // Skip early pruning when in check ss->staticEval = eval = VALUE_NONE; - improving = false; - improvement = 0; - complexity = 0; + improving = false; goto moves_loop; } + else if (excludedMove) + { + // Providing the hint that this node's accumulator will be used often + // brings significant Elo gain (~13 Elo). + Eval::NNUE::hint_common_parent_position(pos, networks, refreshTable); + unadjustedStaticEval = eval = ss->staticEval; + } else if (ss->ttHit) { // Never assume anything about values stored in TT - ss->staticEval = eval = tte->eval(); - if (eval == VALUE_NONE) - ss->staticEval = eval = evaluate(pos); + unadjustedStaticEval = tte->eval(); + if (unadjustedStaticEval == VALUE_NONE) + unadjustedStaticEval = evaluate(networks, pos, refreshTable, thisThread->optimism[us]); + else if (PvNode) + Eval::NNUE::hint_common_parent_position(pos, networks, refreshTable); - // Randomize draw evaluation - if (eval == VALUE_DRAW) - eval = value_draw(thisThread); + ss->staticEval = eval = to_corrected_static_eval(unadjustedStaticEval, *thisThread, pos); - // ttValue can be used as a better position evaluation (~4 Elo) - if ( ttValue != VALUE_NONE - && (tte->bound() & (ttValue > eval ? BOUND_LOWER : BOUND_UPPER))) + // ttValue can be used as a better position evaluation (~7 Elo) + if (ttValue != VALUE_NONE && (tte->bound() & (ttValue > eval ? BOUND_LOWER : BOUND_UPPER))) eval = ttValue; } else { - ss->staticEval = eval = evaluate(pos); + unadjustedStaticEval = evaluate(networks, pos, refreshTable, thisThread->optimism[us]); + ss->staticEval = eval = to_corrected_static_eval(unadjustedStaticEval, *thisThread, pos); - // Save static evaluation into transposition table - if (!excludedMove) - tte->save(posKey, VALUE_NONE, ss->ttPv, BOUND_NONE, DEPTH_NONE, MOVE_NONE, eval); + // Static evaluation is saved as it was before adjustment by correction history + tte->save(posKey, VALUE_NONE, ss->ttPv, BOUND_NONE, DEPTH_NONE, Move::none(), + unadjustedStaticEval, tt.generation()); } - // Use static evaluation difference to improve quiet move ordering (~3 Elo) - if (is_ok((ss-1)->currentMove) && !(ss-1)->inCheck && !priorCapture) + // Use static evaluation difference to improve quiet move ordering (~9 Elo) + if (((ss - 1)->currentMove).is_ok() && !(ss - 1)->inCheck && !priorCapture) { - int bonus = std::clamp(-16 * int((ss-1)->staticEval + ss->staticEval), -2000, 2000); - thisThread->mainHistory[~us][from_to((ss-1)->currentMove)] << bonus; + int bonus = std::clamp(-13 * int((ss - 1)->staticEval + ss->staticEval), -1796, 1526); + bonus = bonus > 0 ? 2 * bonus : bonus / 2; + thisThread->mainHistory[~us][((ss - 1)->currentMove).from_to()] << bonus; + if (type_of(pos.piece_on(prevSq)) != PAWN && ((ss - 1)->currentMove).type_of() != PROMOTION) + thisThread->pawnHistory[pawn_structure_index(pos)][pos.piece_on(prevSq)][prevSq] + << bonus / 2; } - // Set up the improvement variable, which is the difference between the current - // static evaluation and the previous static evaluation at our turn (if we were - // in check at our previous move we look at the move prior to it). The improvement - // margin and the improving flag are used in various pruning heuristics. - improvement = (ss-2)->staticEval != VALUE_NONE ? ss->staticEval - (ss-2)->staticEval - : (ss-4)->staticEval != VALUE_NONE ? ss->staticEval - (ss-4)->staticEval - : 175; + // Set up the improving flag, which is true if current static evaluation is + // bigger than the previous static evaluation at our turn (if we were in + // check at our previous move we look at static evaluation at move prior to it + // and if we were in check at move prior to it flag is set to true) and is + // false otherwise. The improving flag is used in various pruning heuristics. + improving = (ss - 2)->staticEval != VALUE_NONE + ? ss->staticEval > (ss - 2)->staticEval + : (ss - 4)->staticEval != VALUE_NONE && ss->staticEval > (ss - 4)->staticEval; - improving = improvement > 0; - complexity = abs(ss->staticEval - (us == WHITE ? eg_value(pos.psq_score()) : -eg_value(pos.psq_score()))); + opponentWorsening = ss->staticEval + (ss - 1)->staticEval > 2; - thisThread->complexityAverage.update(complexity); - - // Step 7. Razoring. + // Step 7. Razoring (~1 Elo) // If eval is really low check with qsearch if it can exceed alpha, if it can't, // return a fail low. - if ( !PvNode - && depth <= 7 - && eval < alpha - 348 - 258 * depth * depth) + // Adjust razor margin according to cutoffCnt. (~1 Elo) + if (eval < alpha - 433 - (302 - 141 * ((ss + 1)->cutoffCnt > 3)) * depth * depth) { value = qsearch(pos, ss, alpha - 1, alpha); if (value < alpha) return value; } - // Step 8. Futility pruning: child node (~25 Elo). + // Step 8. Futility pruning: child node (~40 Elo) // The depth condition is important for mate finding. - if ( !ss->ttPv - && depth < 8 - && eval - futility_margin(depth, improving) - (ss-1)->statScore / 256 >= beta - && eval >= beta - && eval < 26305) // larger than VALUE_KNOWN_WIN, but smaller than TB wins. - return eval; + if (!ss->ttPv && depth < 11 + && eval - futility_margin(depth, cutNode && !ss->ttHit, improving, opponentWorsening) + - (ss - 1)->statScore / 254 + >= beta + && eval >= beta && eval < VALUE_TB_WIN_IN_MAX_PLY && (!ttMove || ttCapture)) + return beta > VALUE_TB_LOSS_IN_MAX_PLY ? (eval + beta) / 2 : eval; - // Step 9. Null move search with verification search (~22 Elo) - if ( !PvNode - && (ss-1)->currentMove != MOVE_NULL - && (ss-1)->statScore < 14695 - && eval >= beta - && eval >= ss->staticEval - && ss->staticEval >= beta - 15 * depth - improvement / 15 + 198 + complexity / 28 - && !excludedMove - && pos.non_pawn_material(us) - && (ss->ply >= thisThread->nmpMinPly || us != thisThread->nmpColor)) + // Step 9. Null move search with verification search (~35 Elo) + if (!PvNode && (ss - 1)->currentMove != Move::null() && (ss - 1)->statScore < 16993 + && eval >= beta && ss->staticEval >= beta - 19 * depth + 326 && !excludedMove + && pos.non_pawn_material(us) && ss->ply >= thisThread->nmpMinPly + && beta > VALUE_TB_LOSS_IN_MAX_PLY) { assert(eval - beta >= 0); - // Null move dynamic reduction based on depth, eval and complexity of position - Depth R = std::min(int(eval - beta) / 147, 5) + depth / 3 + 4 - (complexity > 753); + // Null move dynamic reduction based on depth and eval + Depth R = std::min(int(eval - beta) / 134, 6) + depth / 3 + 4; - ss->currentMove = MOVE_NULL; + ss->currentMove = Move::null(); ss->continuationHistory = &thisThread->continuationHistory[0][0][NO_PIECE][0]; - pos.do_null_move(st); + pos.do_null_move(st, tt); - Value nullValue = -search(pos, ss+1, -beta, -beta+1, depth-R, !cutNode); + Value nullValue = -search(pos, ss + 1, -beta, -beta + 1, depth - R, !cutNode); pos.undo_null_move(); - if (nullValue >= beta) + // Do not return unproven mate or TB scores + if (nullValue >= beta && nullValue < VALUE_TB_WIN_IN_MAX_PLY) { - // Do not return unproven mate or TB scores - if (nullValue >= VALUE_TB_WIN_IN_MAX_PLY) - nullValue = beta; - - if (thisThread->nmpMinPly || (abs(beta) < VALUE_KNOWN_WIN && depth < 14)) + if (thisThread->nmpMinPly || depth < 16) return nullValue; - assert(!thisThread->nmpMinPly); // Recursive verification is not allowed + assert(!thisThread->nmpMinPly); // Recursive verification is not allowed // Do verification search at high depths, with null move pruning disabled - // for us, until ply exceeds nmpMinPly. - thisThread->nmpMinPly = ss->ply + 3 * (depth-R) / 4; - thisThread->nmpColor = us; + // until ply exceeds nmpMinPly. + thisThread->nmpMinPly = ss->ply + 3 * (depth - R) / 4; - Value v = search(pos, ss, beta-1, beta, depth-R, false); + Value v = search(pos, ss, beta - 1, beta, depth - R, false); thisThread->nmpMinPly = 0; @@ -844,479 +818,499 @@ namespace { } } - probCutBeta = beta + 179 - 46 * improving; + // Step 10. Internal iterative reductions (~9 Elo) + // For PV nodes without a ttMove, we decrease depth by 3. + if (PvNode && !ttMove) + depth -= 3; - // Step 10. ProbCut (~4 Elo) - // If we have a good enough capture and a reduced search returns a value + // Use qsearch if depth <= 0. + if (depth <= 0) + return qsearch(pos, ss, alpha, beta); + + // For cutNodes without a ttMove, we decrease depth by 2 if depth is high enough. + if (cutNode && depth >= 8 && !ttMove) + depth -= 2; + + // Step 11. ProbCut (~10 Elo) + // If we have a good enough capture (or queen promotion) and a reduced search returns a value // much above beta, we can (almost) safely prune the previous move. - if ( !PvNode - && depth > 4 - && abs(beta) < VALUE_TB_WIN_IN_MAX_PLY - // if value from transposition table is lower than probCutBeta, don't attempt probCut - // there and in further interactions with transposition table cutoff depth is set to depth - 3 - // because probCut search has depth set to depth - 4 but we also do a move before it - // so effective depth is equal to depth - 3 - && !( ss->ttHit - && tte->depth() >= depth - 3 - && ttValue != VALUE_NONE - && ttValue < probCutBeta)) + probCutBeta = beta + 159 - 66 * improving; + if ( + !PvNode && depth > 3 + && std::abs(beta) < VALUE_TB_WIN_IN_MAX_PLY + // If value from transposition table is lower than probCutBeta, don't attempt probCut + // there and in further interactions with transposition table cutoff depth is set to depth - 3 + // because probCut search has depth set to depth - 4 but we also do a move before it + // So effective depth is equal to depth - 3 + && !(tte->depth() >= depth - 3 && ttValue != VALUE_NONE && ttValue < probCutBeta)) { - assert(probCutBeta < VALUE_INFINITE); + assert(probCutBeta < VALUE_INFINITE && probCutBeta > beta); - MovePicker mp(pos, ttMove, probCutBeta - ss->staticEval, &captureHistory); - bool ttPv = ss->ttPv; - ss->ttPv = false; + MovePicker mp(pos, ttMove, probCutBeta - ss->staticEval, &thisThread->captureHistory); - while ((move = mp.next_move()) != MOVE_NONE) + while ((move = mp.next_move()) != Move::none()) if (move != excludedMove && pos.legal(move)) { - assert(pos.capture_or_promotion(move)); + assert(pos.capture_stage(move)); - captureOrPromotion = true; + // Prefetch the TT entry for the resulting position + prefetch(tt.first_entry(pos.key_after(move))); ss->currentMove = move; - ss->continuationHistory = &thisThread->continuationHistory[ss->inCheck] - [captureOrPromotion] - [pos.moved_piece(move)] - [to_sq(move)]; + ss->continuationHistory = + &this + ->continuationHistory[ss->inCheck][true][pos.moved_piece(move)][move.to_sq()]; + thisThread->nodes.fetch_add(1, std::memory_order_relaxed); pos.do_move(move, st); // Perform a preliminary qsearch to verify that the move holds - value = -qsearch(pos, ss+1, -probCutBeta, -probCutBeta+1); + value = -qsearch(pos, ss + 1, -probCutBeta, -probCutBeta + 1); // If the qsearch held, perform the regular search if (value >= probCutBeta) - value = -search(pos, ss+1, -probCutBeta, -probCutBeta+1, depth - 4, !cutNode); + value = -search(pos, ss + 1, -probCutBeta, -probCutBeta + 1, depth - 4, + !cutNode); pos.undo_move(move); if (value >= probCutBeta) { - // if transposition table doesn't have equal or more deep info write probCut data into it - if ( !(ss->ttHit - && tte->depth() >= depth - 3 - && ttValue != VALUE_NONE)) - tte->save(posKey, value_to_tt(value, ss->ply), ttPv, - BOUND_LOWER, - depth - 3, move, ss->staticEval); - return value; + // Save ProbCut data into transposition table + tte->save(posKey, value_to_tt(value, ss->ply), ss->ttPv, BOUND_LOWER, depth - 3, + move, unadjustedStaticEval, tt.generation()); + return std::abs(value) < VALUE_TB_WIN_IN_MAX_PLY ? value - (probCutBeta - beta) + : value; } } - ss->ttPv = ttPv; + + Eval::NNUE::hint_common_parent_position(pos, networks, refreshTable); } - // Step 11. If the position is not in TT, decrease depth by 2 or 1 depending on node type (~3 Elo) - if ( PvNode - && depth >= 3 - && !ttMove) - depth -= 2; +moves_loop: // When in check, search starts here - if ( cutNode - && depth >= 8 - && !ttMove) - depth--; - -moves_loop: // When in check, search starts here - - // Step 12. A small Probcut idea, when we are in check (~0 Elo) - probCutBeta = beta + 481; - if ( ss->inCheck - && !PvNode - && depth >= 2 - && ttCapture - && (tte->bound() & BOUND_LOWER) - && tte->depth() >= depth - 3 - && ttValue >= probCutBeta - && abs(ttValue) <= VALUE_KNOWN_WIN - && abs(beta) <= VALUE_KNOWN_WIN - ) + // Step 12. A small Probcut idea, when we are in check (~4 Elo) + probCutBeta = beta + 420; + if (ss->inCheck && !PvNode && ttCapture && (tte->bound() & BOUND_LOWER) + && tte->depth() >= depth - 4 && ttValue >= probCutBeta + && std::abs(ttValue) < VALUE_TB_WIN_IN_MAX_PLY && std::abs(beta) < VALUE_TB_WIN_IN_MAX_PLY) return probCutBeta; + const PieceToHistory* contHist[] = {(ss - 1)->continuationHistory, + (ss - 2)->continuationHistory, + (ss - 3)->continuationHistory, + (ss - 4)->continuationHistory, + nullptr, + (ss - 6)->continuationHistory}; - const PieceToHistory* contHist[] = { (ss-1)->continuationHistory, (ss-2)->continuationHistory, - nullptr , (ss-4)->continuationHistory, - nullptr , (ss-6)->continuationHistory }; + Move countermove = + prevSq != SQ_NONE ? thisThread->counterMoves[pos.piece_on(prevSq)][prevSq] : Move::none(); - Move countermove = thisThread->counterMoves[pos.piece_on(prevSq)][prevSq]; + MovePicker mp(pos, ttMove, depth, &thisThread->mainHistory, &thisThread->captureHistory, + contHist, &thisThread->pawnHistory, countermove, ss->killers); - MovePicker mp(pos, ttMove, depth, &thisThread->mainHistory, - &captureHistory, - contHist, - countermove, - ss->killers); - - value = bestValue; + value = bestValue; moveCountPruning = false; - // Indicate PvNodes that will probably fail low if the node was searched - // at a depth equal or greater than the current depth, and the result of this search was a fail low. - bool likelyFailLow = PvNode - && ttMove - && (tte->bound() & BOUND_UPPER) - && tte->depth() >= depth; - // Step 13. Loop through all pseudo-legal moves until no moves remain // or a beta cutoff occurs. - while ((move = mp.next_move(moveCountPruning)) != MOVE_NONE) + while ((move = mp.next_move(moveCountPruning)) != Move::none()) { - assert(is_ok(move)); + assert(move.is_ok()); - if (move == excludedMove) - continue; + if (move == excludedMove) + continue; - // At root obey the "searchmoves" option and skip moves not listed in Root - // Move List. As a consequence any illegal move is also skipped. In MultiPV - // mode we also skip PV moves which have been already searched and those - // of lower "TB rank" if we are in a TB root position. - if (rootNode && !std::count(thisThread->rootMoves.begin() + thisThread->pvIdx, - thisThread->rootMoves.begin() + thisThread->pvLast, move)) - continue; + // Check for legality + if (!pos.legal(move)) + continue; - // Check for legality - if (!rootNode && !pos.legal(move)) - continue; + // At root obey the "searchmoves" option and skip moves not listed in Root + // Move List. In MultiPV mode we also skip PV moves that have been already + // searched and those of lower "TB rank" if we are in a TB root position. + if (rootNode + && !std::count(thisThread->rootMoves.begin() + thisThread->pvIdx, + thisThread->rootMoves.begin() + thisThread->pvLast, move)) + continue; - ss->moveCount = ++moveCount; + ss->moveCount = ++moveCount; - if (rootNode && thisThread == Threads.main() && Time.elapsed() > 3000) - sync_cout << "info depth " << depth - << " currmove " << UCI::move(move, pos.is_chess960()) - << " currmovenumber " << moveCount + thisThread->pvIdx << sync_endl; - if (PvNode) - (ss+1)->pv = nullptr; + if (rootNode && is_mainthread() && elapsed() > 3000) + { + main_manager()->updates.onIter( + {depth, UCIEngine::move(move, pos.is_chess960()), moveCount + thisThread->pvIdx}); + } + if (PvNode) + (ss + 1)->pv = nullptr; - extension = 0; - captureOrPromotion = pos.capture_or_promotion(move); - movedPiece = pos.moved_piece(move); - givesCheck = pos.gives_check(move); + extension = 0; + capture = pos.capture_stage(move); + movedPiece = pos.moved_piece(move); + givesCheck = pos.gives_check(move); - // Calculate new depth for this move - newDepth = depth - 1; + // Calculate new depth for this move + newDepth = depth - 1; - Value delta = beta - alpha; + int delta = beta - alpha; - // Step 14. Pruning at shallow depth (~98 Elo). Depth conditions are important for mate finding. - if ( !rootNode - && pos.non_pawn_material(us) - && bestValue > VALUE_TB_LOSS_IN_MAX_PLY) - { - // Skip quiet moves if movecount exceeds our FutilityMoveCount threshold (~7 Elo) - moveCountPruning = moveCount >= futility_move_count(improving, depth); + Depth r = reduction(improving, depth, moveCount, delta); - // Reduced depth of the next LMR search - int lmrDepth = std::max(newDepth - reduction(improving, depth, moveCount, delta, thisThread->rootDelta), 0); + // Step 14. Pruning at shallow depth (~120 Elo). + // Depth conditions are important for mate finding. + if (!rootNode && pos.non_pawn_material(us) && bestValue > VALUE_TB_LOSS_IN_MAX_PLY) + { + // Skip quiet moves if movecount exceeds our FutilityMoveCount threshold (~8 Elo) + if (!moveCountPruning) + moveCountPruning = moveCount >= futility_move_count(improving, depth); - if ( captureOrPromotion - || givesCheck) - { - // Futility pruning for captures (~0 Elo) - if ( !pos.empty(to_sq(move)) - && !givesCheck - && !PvNode - && lmrDepth < 6 - && !ss->inCheck - && ss->staticEval + 281 + 179 * lmrDepth + PieceValue[EG][pos.piece_on(to_sq(move))] - + captureHistory[movedPiece][to_sq(move)][type_of(pos.piece_on(to_sq(move)))] / 6 < alpha) - continue; + // Reduced depth of the next LMR search + int lmrDepth = newDepth - r; - // SEE based pruning (~9 Elo) - if (!pos.see_ge(move, Value(-203) * depth)) - continue; - } - else - { - int history = (*contHist[0])[movedPiece][to_sq(move)] - + (*contHist[1])[movedPiece][to_sq(move)] - + (*contHist[3])[movedPiece][to_sq(move)]; + if (capture || givesCheck) + { + Piece capturedPiece = pos.piece_on(move.to_sq()); + int captHist = + thisThread->captureHistory[movedPiece][move.to_sq()][type_of(capturedPiece)]; - // Continuation history based pruning (~2 Elo) - if ( lmrDepth < 5 - && history < -3875 * (depth - 1)) - continue; + // Futility pruning for captures (~2 Elo) + if (!givesCheck && lmrDepth < 7 && !ss->inCheck) + { + Value futilityValue = ss->staticEval + 295 + 280 * lmrDepth + + PieceValue[capturedPiece] + captHist / 7; + if (futilityValue <= alpha) + continue; + } - history += thisThread->mainHistory[us][from_to(move)]; + // SEE based pruning for captures and checks (~11 Elo) + int seeHist = std::clamp(captHist / 32, -197 * depth, 196 * depth); + if (!pos.see_ge(move, -186 * depth - seeHist)) + continue; + } + else + { + int history = + (*contHist[0])[movedPiece][move.to_sq()] + + (*contHist[1])[movedPiece][move.to_sq()] + + (*contHist[3])[movedPiece][move.to_sq()] / 2 + + thisThread->pawnHistory[pawn_structure_index(pos)][movedPiece][move.to_sq()]; - // Futility pruning: parent node (~9 Elo) - if ( !ss->inCheck - && lmrDepth < 11 - && ss->staticEval + 122 + 138 * lmrDepth + history / 60 <= alpha) - continue; + // Continuation history based pruning (~2 Elo) + if (lmrDepth < 6 && history < -4081 * depth) + continue; - // Prune moves with negative SEE (~3 Elo) - if (!pos.see_ge(move, Value(-25 * lmrDepth * lmrDepth - 20 * lmrDepth))) - continue; - } - } + history += 2 * thisThread->mainHistory[us][move.from_to()]; - // Step 15. Extensions (~66 Elo) - // We take care to not overdo to avoid search getting stuck. - if (ss->ply < thisThread->rootDepth * 2) - { - // Singular extension search (~58 Elo). If all moves but one fail low on a - // search of (alpha-s, beta-s), and just one fails high on (alpha, beta), - // then that move is singular and should be extended. To verify this we do - // a reduced search on all the other moves but the ttMove and if the - // result is lower than ttValue minus a margin, then we will extend the ttMove. - if ( !rootNode - && depth >= 4 + 2 * (PvNode && tte->is_pv()) - && move == ttMove - && !excludedMove // Avoid recursive singular search - /* && ttValue != VALUE_NONE Already implicit in the next condition */ - && abs(ttValue) < VALUE_KNOWN_WIN - && (tte->bound() & BOUND_LOWER) - && tte->depth() >= depth - 3) - { - Value singularBeta = ttValue - 3 * depth; - Depth singularDepth = (depth - 1) / 2; + lmrDepth += history / 4768; - ss->excludedMove = move; - value = search(pos, ss, singularBeta - 1, singularBeta, singularDepth, cutNode); - ss->excludedMove = MOVE_NONE; + Value futilityValue = + ss->staticEval + (bestValue < ss->staticEval - 52 ? 134 : 54) + 142 * lmrDepth; - if (value < singularBeta) - { - extension = 1; + // Futility pruning: parent node (~13 Elo) + if (!ss->inCheck && lmrDepth < 13 && futilityValue <= alpha) + { + if (bestValue <= futilityValue && std::abs(bestValue) < VALUE_TB_WIN_IN_MAX_PLY + && futilityValue < VALUE_TB_WIN_IN_MAX_PLY) + bestValue = (bestValue + futilityValue * 3) / 4; + continue; + } - // Avoid search explosion by limiting the number of double extensions - if ( !PvNode - && value < singularBeta - 26 - && ss->doubleExtensions <= 8) - extension = 2; - } + lmrDepth = std::max(lmrDepth, 0); - // Multi-cut pruning - // Our ttMove is assumed to fail high, and now we failed high also on a reduced - // search without the ttMove. So we assume this expected Cut-node is not singular, - // that multiple moves fail high, and we can prune the whole subtree by returning - // a soft bound. - else if (singularBeta >= beta) - return singularBeta; + // Prune moves with negative SEE (~4 Elo) + if (!pos.see_ge(move, -28 * lmrDepth * lmrDepth)) + continue; + } + } - // If the eval of ttMove is greater than beta, we reduce it (negative extension) - else if (ttValue >= beta) - extension = -2; - } + // Step 15. Extensions (~100 Elo) + // We take care to not overdo to avoid search getting stuck. + if (ss->ply < thisThread->rootDepth * 2) + { + // Singular extension search (~94 Elo). If all moves but one fail low on a + // search of (alpha-s, beta-s), and just one fails high on (alpha, beta), + // then that move is singular and should be extended. To verify this we do + // a reduced search on the position excluding the ttMove and if the result + // is lower than ttValue minus a margin, then we will extend the ttMove. - // Check extensions (~1 Elo) - else if ( givesCheck - && depth > 9 - && abs(ss->staticEval) > 71) - extension = 1; + // Note: the depth margin and singularBeta margin are known for having non-linear + // scaling. Their values are optimized to time controls of 180+1.8 and longer + // so changing them requires tests at these types of time controls. + // Recursive singular search is avoided. + if (!rootNode && move == ttMove && !excludedMove + && depth >= 4 - (thisThread->completedDepth > 32) + ss->ttPv + && std::abs(ttValue) < VALUE_TB_WIN_IN_MAX_PLY && (tte->bound() & BOUND_LOWER) + && tte->depth() >= depth - 3) + { + Value singularBeta = ttValue - (65 + 52 * (ss->ttPv && !PvNode)) * depth / 63; + Depth singularDepth = newDepth / 2; - // Quiet ttMove extensions (~0 Elo) - else if ( PvNode - && move == ttMove - && move == ss->killers[0] - && (*contHist[0])[movedPiece][to_sq(move)] >= 5491) - extension = 1; - } + ss->excludedMove = move; + value = + search(pos, ss, singularBeta - 1, singularBeta, singularDepth, cutNode); + ss->excludedMove = Move::none(); - // Add extension to new depth - newDepth += extension; - ss->doubleExtensions = (ss-1)->doubleExtensions + (extension == 2); + if (value < singularBeta) + { + int doubleMargin = 251 * PvNode - 241 * !ttCapture; + int tripleMargin = + 135 + 234 * PvNode - 248 * !ttCapture + 124 * (ss->ttPv || !ttCapture); + int quadMargin = 447 + 354 * PvNode - 300 * !ttCapture + 206 * ss->ttPv; - // Speculative prefetch as early as possible - prefetch(TT.first_entry(pos.key_after(move))); + extension = 1 + (value < singularBeta - doubleMargin) + + (value < singularBeta - tripleMargin) + + (value < singularBeta - quadMargin); - // Update the current move (this must be done after singular extension search) - ss->currentMove = move; - ss->continuationHistory = &thisThread->continuationHistory[ss->inCheck] - [captureOrPromotion] - [movedPiece] - [to_sq(move)]; + depth += ((!PvNode) && (depth < 14)); + } - // Step 16. Make the move - pos.do_move(move, st, givesCheck); + // Multi-cut pruning + // Our ttMove is assumed to fail high based on the bound of the TT entry, + // and if after excluding the ttMove with a reduced search we fail high over the original beta, + // we assume this expected cut-node is not singular (multiple moves fail high), + // and we can prune the whole subtree by returning a softbound. + else if (singularBeta >= beta) + { + if (!ttCapture) + update_quiet_histories(pos, ss, *this, ttMove, -stat_malus(depth)); - bool doDeeperSearch = false; + return singularBeta; + } - // Step 17. Late moves reduction / extension (LMR, ~98 Elo) - // We use various heuristics for the sons of a node after the first son has - // been searched. In general we would like to reduce them, but there are many - // cases where we extend a son if it has good chances to be "interesting". - if ( depth >= 2 - && moveCount > 1 + (PvNode && ss->ply <= 1) - && ( !ss->ttPv - || !captureOrPromotion - || (cutNode && (ss-1)->moveCount > 1))) - { - Depth r = reduction(improving, depth, moveCount, delta, thisThread->rootDelta); + // Negative extensions + // If other moves failed high over (ttValue - margin) without the ttMove on a reduced search, + // but we cannot do multi-cut because (ttValue - margin) is lower than the original beta, + // we do not know if the ttMove is singular or can do a multi-cut, + // so we reduce the ttMove in favor of other moves based on some conditions: - // Decrease reduction at some PvNodes (~2 Elo) - if ( PvNode - && bestMoveCount <= 3) - r--; + // If the ttMove is assumed to fail high over current beta (~7 Elo) + else if (ttValue >= beta) + extension = -3; - // Decrease reduction if position is or has been on the PV - // and node is not likely to fail low. (~3 Elo) - if ( ss->ttPv - && !likelyFailLow) - r -= 2; + // If we are on a cutNode but the ttMove is not assumed to fail high over current beta (~1 Elo) + else if (cutNode) + extension = -2; - // Decrease reduction if opponent's move count is high (~1 Elo) - if ((ss-1)->moveCount > 7) - r--; + // If the ttMove is assumed to fail low over the value of the reduced search (~1 Elo) + else if (ttValue <= value) + extension = -1; + } - // Increase reduction for cut nodes (~3 Elo) - if (cutNode && move != ss->killers[0]) - r += 2; + // Extension for capturing the previous moved piece (~0 Elo on STC, ~1 Elo on LTC) + else if (PvNode && move == ttMove && move.to_sq() == prevSq + && thisThread->captureHistory[movedPiece][move.to_sq()] + [type_of(pos.piece_on(move.to_sq()))] + > 4016) + extension = 1; + } - // Increase reduction if ttMove is a capture (~3 Elo) - if (ttCapture) - r++; + // Add extension to new depth + newDepth += extension; - ss->statScore = thisThread->mainHistory[us][from_to(move)] - + (*contHist[0])[movedPiece][to_sq(move)] - + (*contHist[1])[movedPiece][to_sq(move)] - + (*contHist[3])[movedPiece][to_sq(move)] - - 4334; + // Speculative prefetch as early as possible + prefetch(tt.first_entry(pos.key_after(move))); - // Decrease/increase reduction for moves with a good/bad history (~30 Elo) - r -= ss->statScore / 15914; + // Update the current move (this must be done after singular extension search) + ss->currentMove = move; + ss->continuationHistory = + &thisThread->continuationHistory[ss->inCheck][capture][movedPiece][move.to_sq()]; - // In general we want to cap the LMR depth search at newDepth. But if reductions - // are really negative and movecount is low, we allow this move to be searched - // deeper than the first move (this may lead to hidden double extensions). - int deeper = r >= -1 ? 0 - : moveCount <= 4 ? 2 - : PvNode && depth > 4 ? 1 - : cutNode && moveCount <= 8 ? 1 - : 0; + uint64_t nodeCount = rootNode ? uint64_t(nodes) : 0; - Depth d = std::clamp(newDepth - r, 1, newDepth + deeper); + // Step 16. Make the move + thisThread->nodes.fetch_add(1, std::memory_order_relaxed); + pos.do_move(move, st, givesCheck); - value = -search(pos, ss+1, -(alpha+1), -alpha, d, true); + // Decrease reduction if position is or has been on the PV (~7 Elo) + if (ss->ttPv) + r -= 1 + (ttValue > alpha) + (tte->depth() >= depth); - // If the son is reduced and fails high it will be re-searched at full depth - doFullDepthSearch = value > alpha && d < newDepth; - doDeeperSearch = value > (alpha + 78 + 11 * (newDepth - d)); - didLMR = true; - } - else - { - doFullDepthSearch = !PvNode || moveCount > 1; - didLMR = false; - } + else if (cutNode && move != ttMove && move != ss->killers[0]) + r++; - // Step 18. Full depth search when LMR is skipped or fails high - if (doFullDepthSearch) - { - value = -search(pos, ss+1, -(alpha+1), -alpha, newDepth + doDeeperSearch, !cutNode); + // Increase reduction for cut nodes (~4 Elo) + if (cutNode) + r += 2 - (tte->depth() >= depth && ss->ttPv); - // If the move passed LMR update its stats - if (didLMR) - { - int bonus = value > alpha ? stat_bonus(newDepth) - : -stat_bonus(newDepth); + // Increase reduction if ttMove is a capture (~3 Elo) + if (ttCapture) + r++; - if (captureOrPromotion) - bonus /= 6; + // Decrease reduction for PvNodes (~0 Elo on STC, ~2 Elo on LTC) + if (PvNode) + r--; - update_continuation_histories(ss, movedPiece, to_sq(move), bonus); - } - } + // Increase reduction if next ply has a lot of fail high (~5 Elo) + if ((ss + 1)->cutoffCnt > 3) + r++; - // For PV nodes only, do a full PV search on the first move or after a fail - // high (in the latter case search only if value < beta), otherwise let the - // parent node fail low with value <= alpha and try another move. - if (PvNode && (moveCount == 1 || (value > alpha && (rootNode || value < beta)))) - { - (ss+1)->pv = pv; - (ss+1)->pv[0] = MOVE_NONE; + // Set reduction to 0 for first picked move (ttMove) (~2 Elo) + // Nullifies all previous reduction adjustments to ttMove and leaves only history to do them + else if (move == ttMove) + r = 0; - value = -search(pos, ss+1, -beta, -alpha, - std::min(maxNextDepth, newDepth), false); - } + ss->statScore = 2 * thisThread->mainHistory[us][move.from_to()] + + (*contHist[0])[movedPiece][move.to_sq()] + + (*contHist[1])[movedPiece][move.to_sq()] + + (*contHist[3])[movedPiece][move.to_sq()] - 5078; - // Step 19. Undo move - pos.undo_move(move); + // Decrease/increase reduction for moves with a good/bad history (~8 Elo) + r -= ss->statScore / 12076; - assert(value > -VALUE_INFINITE && value < VALUE_INFINITE); + // Step 17. Late moves reduction / extension (LMR, ~117 Elo) + if (depth >= 2 && moveCount > 1 + rootNode) + { + // In general we want to cap the LMR depth search at newDepth, but when + // reduction is negative, we allow this move a limited search extension + // beyond the first move depth. + // To prevent problems when the max value is less than the min value, + // std::clamp has been replaced by a more robust implementation. + Depth d = std::max(1, std::min(newDepth - r, newDepth + 1)); - // Step 20. Check for a new best move - // Finished searching the move. If a stop occurred, the return value of - // the search cannot be trusted, and we return immediately without - // updating best move, PV and TT. - if (Threads.stop.load(std::memory_order_relaxed)) - return VALUE_ZERO; + value = -search(pos, ss + 1, -(alpha + 1), -alpha, d, true); - if (rootNode) - { - RootMove& rm = *std::find(thisThread->rootMoves.begin(), - thisThread->rootMoves.end(), move); + // Do a full-depth search when reduced LMR search fails high + if (value > alpha && d < newDepth) + { + // Adjust full-depth search based on LMR results - if the result + // was good enough search deeper, if it was bad enough search shallower. + const bool doDeeperSearch = value > (bestValue + 40 + 2 * newDepth); // (~1 Elo) + const bool doShallowerSearch = value < bestValue + newDepth; // (~2 Elo) - rm.averageScore = rm.averageScore != -VALUE_INFINITE ? (2 * value + rm.averageScore) / 3 : value; + newDepth += doDeeperSearch - doShallowerSearch; - // PV move or new best move? - if (moveCount == 1 || value > alpha) - { - rm.score = value; - rm.selDepth = thisThread->selDepth; - rm.pv.resize(1); + if (newDepth > d) + value = -search(pos, ss + 1, -(alpha + 1), -alpha, newDepth, !cutNode); - assert((ss+1)->pv); + // Post LMR continuation history updates (~1 Elo) + int bonus = value <= alpha ? -stat_malus(newDepth) + : value >= beta ? stat_bonus(newDepth) + : 0; - for (Move* m = (ss+1)->pv; *m != MOVE_NONE; ++m) - rm.pv.push_back(*m); + update_continuation_histories(ss, movedPiece, move.to_sq(), bonus); + } + } - // We record how often the best move has been changed in each iteration. - // This information is used for time management. In MultiPV mode, - // we must take care to only do this for the first PV line. - if ( moveCount > 1 - && !thisThread->pvIdx) - ++thisThread->bestMoveChanges; - } - else - // All other moves but the PV are set to the lowest value: this - // is not a problem when sorting because the sort is stable and the - // move position in the list is preserved - just the PV is pushed up. - rm.score = -VALUE_INFINITE; - } + // Step 18. Full-depth search when LMR is skipped + else if (!PvNode || moveCount > 1) + { + // Increase reduction if ttMove is not present (~6 Elo) + if (!ttMove) + r += 2; - if (value > bestValue) - { - bestValue = value; + // Note that if expected reduction is high, we reduce search depth by 1 here (~9 Elo) + value = -search(pos, ss + 1, -(alpha + 1), -alpha, newDepth - (r > 3), !cutNode); + } - if (value > alpha) - { - bestMove = move; + // For PV nodes only, do a full PV search on the first move or after a fail high, + // otherwise let the parent node fail low with value <= alpha and try another move. + if (PvNode && (moveCount == 1 || value > alpha)) + { + (ss + 1)->pv = pv; + (ss + 1)->pv[0] = Move::none(); - if (PvNode && !rootNode) // Update pv even in fail-high case - update_pv(ss->pv, move, (ss+1)->pv); + value = -search(pos, ss + 1, -beta, -alpha, newDepth, false); + } - if (PvNode && value < beta) // Update alpha! Always alpha < beta - { - alpha = value; - bestMoveCount++; - } - else - { - assert(value >= beta); // Fail high - break; - } - } - } + // Step 19. Undo move + pos.undo_move(move); - // If the move is worse than some previously searched move, remember it to update its stats later - if (move != bestMove) - { - if (captureOrPromotion && captureCount < 32) - capturesSearched[captureCount++] = move; + assert(value > -VALUE_INFINITE && value < VALUE_INFINITE); - else if (!captureOrPromotion && quietCount < 64) - quietsSearched[quietCount++] = move; - } + // Step 20. Check for a new best move + // Finished searching the move. If a stop occurred, the return value of + // the search cannot be trusted, and we return immediately without + // updating best move, PV and TT. + if (threads.stop.load(std::memory_order_relaxed)) + return VALUE_ZERO; + + if (rootNode) + { + RootMove& rm = + *std::find(thisThread->rootMoves.begin(), thisThread->rootMoves.end(), move); + + rm.effort += nodes - nodeCount; + + rm.averageScore = + rm.averageScore != -VALUE_INFINITE ? (2 * value + rm.averageScore) / 3 : value; + + // PV move or new best move? + if (moveCount == 1 || value > alpha) + { + rm.score = rm.uciScore = value; + rm.selDepth = thisThread->selDepth; + rm.scoreLowerbound = rm.scoreUpperbound = false; + + if (value >= beta) + { + rm.scoreLowerbound = true; + rm.uciScore = beta; + } + else if (value <= alpha) + { + rm.scoreUpperbound = true; + rm.uciScore = alpha; + } + + rm.pv.resize(1); + + assert((ss + 1)->pv); + + for (Move* m = (ss + 1)->pv; *m != Move::none(); ++m) + rm.pv.push_back(*m); + + // We record how often the best move has been changed in each iteration. + // This information is used for time management. In MultiPV mode, + // we must take care to only do this for the first PV line. + if (moveCount > 1 && !thisThread->pvIdx) + ++thisThread->bestMoveChanges; + } + else + // All other moves but the PV, are set to the lowest value: this + // is not a problem when sorting because the sort is stable and the + // move position in the list is preserved - just the PV is pushed up. + rm.score = -VALUE_INFINITE; + } + + if (value > bestValue) + { + bestValue = value; + + if (value > alpha) + { + bestMove = move; + + if (PvNode && !rootNode) // Update pv even in fail-high case + update_pv(ss->pv, move, (ss + 1)->pv); + + if (value >= beta) + { + ss->cutoffCnt += 1 + !ttMove; + assert(value >= beta); // Fail high + break; + } + else + { + // Reduce other moves if we have found at least one score improvement (~2 Elo) + if (depth > 2 && depth < 13 && beta < 15868 && value > -14630) + depth -= 2; + + assert(depth > 0); + alpha = value; // Update alpha! Always alpha < beta + } + } + } + + // If the move is worse than some previously searched move, + // remember it, to update its stats later. + if (move != bestMove && moveCount <= 32) + { + if (capture) + capturesSearched[captureCount++] = move; + else + quietsSearched[quietCount++] = move; + } } - // The following condition would detect a stop only after move loop has been - // completed. But in this case bestValue is valid because we have fully - // searched our subtree, and we can anyhow save the result in TT. - /* - if (Threads.stop) - return VALUE_DRAW; - */ - // Step 21. Check for mate and stalemate // All legal moves have been searched and if there are no legal moves, it // must be a mate or a stalemate. If we are in a singular extension search then @@ -1324,58 +1318,69 @@ moves_loop: // When in check, search starts here assert(moveCount || !ss->inCheck || excludedMove || !MoveList(pos).size()); - if (!moveCount) - bestValue = excludedMove ? alpha : - ss->inCheck ? mated_in(ss->ply) - : VALUE_DRAW; + // Adjust best value for fail high cases at non-pv nodes + if (!PvNode && bestValue >= beta && std::abs(bestValue) < VALUE_TB_WIN_IN_MAX_PLY + && std::abs(beta) < VALUE_TB_WIN_IN_MAX_PLY && std::abs(alpha) < VALUE_TB_WIN_IN_MAX_PLY) + bestValue = (bestValue * depth + beta) / (depth + 1); - // If there is a move which produces search value greater than alpha we update stats of searched moves + if (!moveCount) + bestValue = excludedMove ? alpha : ss->inCheck ? mated_in(ss->ply) : VALUE_DRAW; + + // If there is a move that produces search value greater than alpha we update the stats of searched moves else if (bestMove) - update_all_stats(pos, ss, bestMove, bestValue, beta, prevSq, - quietsSearched, quietCount, capturesSearched, captureCount, depth); + update_all_stats(pos, ss, *this, bestMove, bestValue, beta, prevSq, quietsSearched, + quietCount, capturesSearched, captureCount, depth); // Bonus for prior countermove that caused the fail low - else if ( (depth >= 4 || PvNode) - && !priorCapture) + else if (!priorCapture && prevSq != SQ_NONE) { - //Assign extra bonus if current node is PvNode or cutNode - //or fail low was really bad - bool extraBonus = PvNode - || cutNode - || bestValue < alpha - 70 * depth; - - update_continuation_histories(ss-1, pos.piece_on(prevSq), prevSq, stat_bonus(depth) * (1 + extraBonus)); + int bonus = (depth > 5) + (PvNode || cutNode) + ((ss - 1)->statScore < -14455) + + ((ss - 1)->moveCount > 10) + (!ss->inCheck && bestValue <= ss->staticEval - 130) + + (!(ss - 1)->inCheck && bestValue <= -(ss - 1)->staticEval - 77); + update_continuation_histories(ss - 1, pos.piece_on(prevSq), prevSq, + stat_bonus(depth) * bonus); + thisThread->mainHistory[~us][((ss - 1)->currentMove).from_to()] + << stat_bonus(depth) * bonus / 2; } if (PvNode) bestValue = std::min(bestValue, maxValue); // If no good move is found and the previous position was ttPv, then the previous - // opponent move is probably good and the new position is added to the search tree. + // opponent move is probably good and the new position is added to the search tree. (~7 Elo) if (bestValue <= alpha) - ss->ttPv = ss->ttPv || ((ss-1)->ttPv && depth > 3); - // Otherwise, a counter move has been found and if the position is the last leaf - // in the search tree, remove the position from the search tree. - else if (depth > 3) - ss->ttPv = ss->ttPv && (ss+1)->ttPv; + ss->ttPv = ss->ttPv || ((ss - 1)->ttPv && depth > 3); // Write gathered information in transposition table + // Static evaluation is saved as it was before correction history if (!excludedMove && !(rootNode && thisThread->pvIdx)) tte->save(posKey, value_to_tt(bestValue, ss->ply), ss->ttPv, - bestValue >= beta ? BOUND_LOWER : - PvNode && bestMove ? BOUND_EXACT : BOUND_UPPER, - depth, bestMove, ss->staticEval); + bestValue >= beta ? BOUND_LOWER + : PvNode && bestMove ? BOUND_EXACT + : BOUND_UPPER, + depth, bestMove, unadjustedStaticEval, tt.generation()); + + // Adjust correction history + if (!ss->inCheck && (!bestMove || !pos.capture(bestMove)) + && !(bestValue >= beta && bestValue <= ss->staticEval) + && !(!bestMove && bestValue >= ss->staticEval)) + { + auto bonus = std::clamp(int(bestValue - ss->staticEval) * depth / 8, + -CORRECTION_HISTORY_LIMIT / 4, CORRECTION_HISTORY_LIMIT / 4); + thisThread->correctionHistory[us][pawn_structure_index(pos)] << bonus; + } assert(bestValue > -VALUE_INFINITE && bestValue < VALUE_INFINITE); return bestValue; - } +} - // qsearch() is the quiescence search function, which is called by the main search - // function with zero depth, or recursively with further decreasing depth per call. - template - Value qsearch(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth) { +// Quiescence search function, which is called by the main search +// function with zero depth, or recursively with further decreasing depth per call. +// (~155 Elo) +template +Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth) { static_assert(nodeType != Root); constexpr bool PvNode = nodeType == PV; @@ -1384,352 +1389,403 @@ moves_loop: // When in check, search starts here assert(PvNode || (alpha == beta - 1)); assert(depth <= 0); - Move pv[MAX_PLY+1]; + // Check if we have an upcoming move that draws by repetition, or if + // the opponent had an alternative move earlier to this position. (~1 Elo) + if (alpha < VALUE_DRAW && pos.has_game_cycle(ss->ply)) + { + alpha = value_draw(this->nodes); + if (alpha >= beta) + return alpha; + } + + Move pv[MAX_PLY + 1]; StateInfo st; ASSERT_ALIGNED(&st, Eval::NNUE::CacheLineSize); TTEntry* tte; - Key posKey; - Move ttMove, move, bestMove; - Depth ttDepth; - Value bestValue, value, ttValue, futilityValue, futilityBase; - bool pvHit, givesCheck, captureOrPromotion; - int moveCount; + Key posKey; + Move ttMove, move, bestMove; + Depth ttDepth; + Value bestValue, value, ttValue, futilityBase; + bool pvHit, givesCheck, capture; + int moveCount; + Color us = pos.side_to_move(); + // Step 1. Initialize node if (PvNode) { - (ss+1)->pv = pv; - ss->pv[0] = MOVE_NONE; + (ss + 1)->pv = pv; + ss->pv[0] = Move::none(); } - Thread* thisThread = pos.this_thread(); - bestMove = MOVE_NONE; - ss->inCheck = pos.checkers(); - moveCount = 0; + Worker* thisThread = this; + bestMove = Move::none(); + ss->inCheck = pos.checkers(); + moveCount = 0; - // Check for an immediate draw or maximum ply reached - if ( pos.is_draw(ss->ply) - || ss->ply >= MAX_PLY) - return (ss->ply >= MAX_PLY && !ss->inCheck) ? evaluate(pos) : VALUE_DRAW; + // Used to send selDepth info to GUI (selDepth counts from 1, ply from 0) + if (PvNode && thisThread->selDepth < ss->ply + 1) + thisThread->selDepth = ss->ply + 1; + + // Step 2. Check for an immediate draw or maximum ply reached + if (pos.is_draw(ss->ply) || ss->ply >= MAX_PLY) + return (ss->ply >= MAX_PLY && !ss->inCheck) + ? evaluate(networks, pos, refreshTable, thisThread->optimism[us]) + : VALUE_DRAW; assert(0 <= ss->ply && ss->ply < MAX_PLY); - // Decide whether or not to include checks: this fixes also the type of - // TT entry depth that we are going to use. Note that in qsearch we use - // only two types of depth in TT: DEPTH_QS_CHECKS or DEPTH_QS_NO_CHECKS. - ttDepth = ss->inCheck || depth >= DEPTH_QS_CHECKS ? DEPTH_QS_CHECKS - : DEPTH_QS_NO_CHECKS; - // Transposition table lookup - posKey = pos.key(); - tte = TT.probe(posKey, ss->ttHit); - ttValue = ss->ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE; - ttMove = ss->ttHit ? tte->move() : MOVE_NONE; - pvHit = ss->ttHit && tte->is_pv(); + // Decide the replacement and cutoff priority of the qsearch TT entries + ttDepth = ss->inCheck || depth >= DEPTH_QS_CHECKS ? DEPTH_QS_CHECKS : DEPTH_QS_NO_CHECKS; - if ( !PvNode - && ss->ttHit - && tte->depth() >= ttDepth - && ttValue != VALUE_NONE // Only in case of TT access race - && (ttValue >= beta ? (tte->bound() & BOUND_LOWER) - : (tte->bound() & BOUND_UPPER))) + // Step 3. Transposition table lookup + posKey = pos.key(); + tte = tt.probe(posKey, ss->ttHit); + ttValue = ss->ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE; + ttMove = ss->ttHit ? tte->move() : Move::none(); + pvHit = ss->ttHit && tte->is_pv(); + + // At non-PV nodes we check for an early TT cutoff + if (!PvNode && tte->depth() >= ttDepth + && ttValue != VALUE_NONE // Only in case of TT access race or if !ttHit + && (tte->bound() & (ttValue >= beta ? BOUND_LOWER : BOUND_UPPER))) return ttValue; - // Evaluate the position statically + // Step 4. Static evaluation of the position + Value unadjustedStaticEval = VALUE_NONE; if (ss->inCheck) - { - ss->staticEval = VALUE_NONE; bestValue = futilityBase = -VALUE_INFINITE; - } else { if (ss->ttHit) { // Never assume anything about values stored in TT - if ((ss->staticEval = bestValue = tte->eval()) == VALUE_NONE) - ss->staticEval = bestValue = evaluate(pos); + unadjustedStaticEval = tte->eval(); + if (unadjustedStaticEval == VALUE_NONE) + unadjustedStaticEval = + evaluate(networks, pos, refreshTable, thisThread->optimism[us]); + ss->staticEval = bestValue = + to_corrected_static_eval(unadjustedStaticEval, *thisThread, pos); - // ttValue can be used as a better position evaluation (~7 Elo) - if ( ttValue != VALUE_NONE + // ttValue can be used as a better position evaluation (~13 Elo) + if (ttValue != VALUE_NONE && (tte->bound() & (ttValue > bestValue ? BOUND_LOWER : BOUND_UPPER))) bestValue = ttValue; } else - // In case of null move search use previous static eval with a different sign - ss->staticEval = bestValue = - (ss-1)->currentMove != MOVE_NULL ? evaluate(pos) - : -(ss-1)->staticEval; + { + // In case of null move search, use previous static eval with a different sign + unadjustedStaticEval = (ss - 1)->currentMove != Move::null() + ? evaluate(networks, pos, refreshTable, thisThread->optimism[us]) + : -(ss - 1)->staticEval; + ss->staticEval = bestValue = + to_corrected_static_eval(unadjustedStaticEval, *thisThread, pos); + } // Stand pat. Return immediately if static value is at least beta if (bestValue >= beta) { - // Save gathered info in transposition table if (!ss->ttHit) - tte->save(posKey, value_to_tt(bestValue, ss->ply), false, BOUND_LOWER, - DEPTH_NONE, MOVE_NONE, ss->staticEval); + tte->save(posKey, value_to_tt(bestValue, ss->ply), false, BOUND_LOWER, DEPTH_NONE, + Move::none(), unadjustedStaticEval, tt.generation()); return bestValue; } - if (PvNode && bestValue > alpha) + if (bestValue > alpha) alpha = bestValue; - futilityBase = bestValue + 118; + futilityBase = ss->staticEval + 270; } - const PieceToHistory* contHist[] = { (ss-1)->continuationHistory, (ss-2)->continuationHistory, - nullptr , (ss-4)->continuationHistory, - nullptr , (ss-6)->continuationHistory }; + const PieceToHistory* contHist[] = {(ss - 1)->continuationHistory, + (ss - 2)->continuationHistory}; // Initialize a MovePicker object for the current position, and prepare // to search the moves. Because the depth is <= 0 here, only captures, // queen promotions, and other checks (only if depth >= DEPTH_QS_CHECKS) // will be generated. - Square prevSq = to_sq((ss-1)->currentMove); - MovePicker mp(pos, ttMove, depth, &thisThread->mainHistory, - &thisThread->captureHistory, - contHist, - prevSq); + Square prevSq = ((ss - 1)->currentMove).is_ok() ? ((ss - 1)->currentMove).to_sq() : SQ_NONE; + MovePicker mp(pos, ttMove, depth, &thisThread->mainHistory, &thisThread->captureHistory, + contHist, &thisThread->pawnHistory); int quietCheckEvasions = 0; - // Loop through the moves until no moves remain or a beta cutoff occurs - while ((move = mp.next_move()) != MOVE_NONE) + // Step 5. Loop through all pseudo-legal moves until no moves remain + // or a beta cutoff occurs. + while ((move = mp.next_move()) != Move::none()) { - assert(is_ok(move)); + assert(move.is_ok()); - // Check for legality - if (!pos.legal(move)) - continue; + // Check for legality + if (!pos.legal(move)) + continue; - givesCheck = pos.gives_check(move); - captureOrPromotion = pos.capture_or_promotion(move); + givesCheck = pos.gives_check(move); + capture = pos.capture_stage(move); - moveCount++; + moveCount++; - // Futility pruning and moveCount pruning (~5 Elo) - if ( bestValue > VALUE_TB_LOSS_IN_MAX_PLY - && !givesCheck - && to_sq(move) != prevSq - && futilityBase > -VALUE_KNOWN_WIN - && type_of(move) != PROMOTION) - { + // Step 6. Pruning + if (bestValue > VALUE_TB_LOSS_IN_MAX_PLY && pos.non_pawn_material(us)) + { + // Futility pruning and moveCount pruning (~10 Elo) + if (!givesCheck && move.to_sq() != prevSq && futilityBase > VALUE_TB_LOSS_IN_MAX_PLY + && move.type_of() != PROMOTION) + { + if (moveCount > 2) + continue; - if (moveCount > 2) - continue; + Value futilityValue = futilityBase + PieceValue[pos.piece_on(move.to_sq())]; - futilityValue = futilityBase + PieceValue[EG][pos.piece_on(to_sq(move))]; + // If static eval + value of piece we are going to capture is much lower + // than alpha we can prune this move. (~2 Elo) + if (futilityValue <= alpha) + { + bestValue = std::max(bestValue, futilityValue); + continue; + } - if (futilityValue <= alpha) - { - bestValue = std::max(bestValue, futilityValue); - continue; - } + // If static eval is much lower than alpha and move is not winning material + // we can prune this move. (~2 Elo) + if (futilityBase <= alpha && !pos.see_ge(move, 1)) + { + bestValue = std::max(bestValue, futilityBase); + continue; + } - if (futilityBase <= alpha && !pos.see_ge(move, VALUE_ZERO + 1)) - { - bestValue = std::max(bestValue, futilityBase); - continue; - } - } + // If static exchange evaluation is much worse than what is needed to not + // fall below alpha we can prune this move. + if (futilityBase > alpha && !pos.see_ge(move, (alpha - futilityBase) * 4)) + { + bestValue = alpha; + continue; + } + } - // Do not search moves with negative SEE values (~5 Elo) - if ( bestValue > VALUE_TB_LOSS_IN_MAX_PLY - && !pos.see_ge(move)) - continue; + // We prune after the second quiet check evasion move, where being 'in check' is + // implicitly checked through the counter, and being a 'quiet move' apart from + // being a tt move is assumed after an increment because captures are pushed ahead. + if (quietCheckEvasions > 1) + break; - // Speculative prefetch as early as possible - prefetch(TT.first_entry(pos.key_after(move))); + // Continuation history based pruning (~3 Elo) + if (!capture && (*contHist[0])[pos.moved_piece(move)][move.to_sq()] < 0 + && (*contHist[1])[pos.moved_piece(move)][move.to_sq()] < 0) + continue; - ss->currentMove = move; - ss->continuationHistory = &thisThread->continuationHistory[ss->inCheck] - [captureOrPromotion] - [pos.moved_piece(move)] - [to_sq(move)]; + // Do not search moves with bad enough SEE values (~5 Elo) + if (!pos.see_ge(move, -69)) + continue; + } - // Continuation history based pruning (~2 Elo) - if ( !captureOrPromotion - && bestValue > VALUE_TB_LOSS_IN_MAX_PLY - && (*contHist[0])[pos.moved_piece(move)][to_sq(move)] < CounterMovePruneThreshold - && (*contHist[1])[pos.moved_piece(move)][to_sq(move)] < CounterMovePruneThreshold) - continue; + // Speculative prefetch as early as possible + prefetch(tt.first_entry(pos.key_after(move))); - // movecount pruning for quiet check evasions - if ( bestValue > VALUE_TB_LOSS_IN_MAX_PLY - && quietCheckEvasions > 1 - && !captureOrPromotion - && ss->inCheck) - continue; + // Update the current move + ss->currentMove = move; + ss->continuationHistory = + &thisThread + ->continuationHistory[ss->inCheck][capture][pos.moved_piece(move)][move.to_sq()]; - quietCheckEvasions += !captureOrPromotion && ss->inCheck; + quietCheckEvasions += !capture && ss->inCheck; - // Make and search the move - pos.do_move(move, st, givesCheck); - value = -qsearch(pos, ss+1, -beta, -alpha, depth - 1); - pos.undo_move(move); + // Step 7. Make and search the move + thisThread->nodes.fetch_add(1, std::memory_order_relaxed); + pos.do_move(move, st, givesCheck); + value = -qsearch(pos, ss + 1, -beta, -alpha, depth - 1); + pos.undo_move(move); - assert(value > -VALUE_INFINITE && value < VALUE_INFINITE); + assert(value > -VALUE_INFINITE && value < VALUE_INFINITE); - // Check for a new best move - if (value > bestValue) - { - bestValue = value; + // Step 8. Check for a new best move + if (value > bestValue) + { + bestValue = value; - if (value > alpha) - { - bestMove = move; + if (value > alpha) + { + bestMove = move; - if (PvNode) // Update pv even in fail-high case - update_pv(ss->pv, move, (ss+1)->pv); + if (PvNode) // Update pv even in fail-high case + update_pv(ss->pv, move, (ss + 1)->pv); - if (PvNode && value < beta) // Update alpha here! - alpha = value; - else - break; // Fail high - } - } + if (value < beta) // Update alpha here! + alpha = value; + else + break; // Fail high + } + } } + // Step 9. Check for mate // All legal moves have been searched. A special case: if we're in check // and no legal moves were found, it is checkmate. if (ss->inCheck && bestValue == -VALUE_INFINITE) { assert(!MoveList(pos).size()); - - return mated_in(ss->ply); // Plies to mate from the root + return mated_in(ss->ply); // Plies to mate from the root } + if (std::abs(bestValue) < VALUE_TB_WIN_IN_MAX_PLY && bestValue >= beta) + bestValue = (3 * bestValue + beta) / 4; + // Save gathered info in transposition table + // Static evaluation is saved as it was before adjustment by correction history tte->save(posKey, value_to_tt(bestValue, ss->ply), pvHit, - bestValue >= beta ? BOUND_LOWER : BOUND_UPPER, - ttDepth, bestMove, ss->staticEval); + bestValue >= beta ? BOUND_LOWER : BOUND_UPPER, ttDepth, bestMove, + unadjustedStaticEval, tt.generation()); assert(bestValue > -VALUE_INFINITE && bestValue < VALUE_INFINITE); return bestValue; - } +} + +Depth Search::Worker::reduction(bool i, Depth d, int mn, int delta) { + int reductionScale = reductions[d] * reductions[mn]; + return (reductionScale + 1318 - delta * 760 / rootDelta) / 1024 + (!i && reductionScale > 1066); +} + +TimePoint Search::Worker::elapsed() const { + return main_manager()->tm.elapsed([this]() { return threads.nodes_searched(); }); +} - // value_to_tt() adjusts a mate or TB score from "plies to mate from the root" to - // "plies to mate from the current position". Standard scores are unchanged. - // The function is called before storing a value in the transposition table. - - Value value_to_tt(Value v, int ply) { +namespace { +// Adjusts a mate or TB score from "plies to mate from the root" +// to "plies to mate from the current position". Standard scores are unchanged. +// The function is called before storing a value in the transposition table. +Value value_to_tt(Value v, int ply) { assert(v != VALUE_NONE); - - return v >= VALUE_TB_WIN_IN_MAX_PLY ? v + ply - : v <= VALUE_TB_LOSS_IN_MAX_PLY ? v - ply : v; - } + return v >= VALUE_TB_WIN_IN_MAX_PLY ? v + ply : v <= VALUE_TB_LOSS_IN_MAX_PLY ? v - ply : v; +} - // value_from_tt() is the inverse of value_to_tt(): it adjusts a mate or TB score - // from the transposition table (which refers to the plies to mate/be mated from - // current position) to "plies to mate/be mated (TB win/loss) from the root". However, - // for mate scores, to avoid potentially false mate scores related to the 50 moves rule - // and the graph history interaction, we return an optimal TB score instead. - - Value value_from_tt(Value v, int ply, int r50c) { +// Inverse of value_to_tt(): it adjusts a mate or TB score +// from the transposition table (which refers to the plies to mate/be mated from +// current position) to "plies to mate/be mated (TB win/loss) from the root". +// However, to avoid potentially false mate or TB scores related to the 50 moves rule +// and the graph history interaction, we return the highest non-TB score instead. +Value value_from_tt(Value v, int ply, int r50c) { if (v == VALUE_NONE) return VALUE_NONE; - if (v >= VALUE_TB_WIN_IN_MAX_PLY) // TB win or better + // handle TB win or better + if (v >= VALUE_TB_WIN_IN_MAX_PLY) { - if (v >= VALUE_MATE_IN_MAX_PLY && VALUE_MATE - v > 99 - r50c) - return VALUE_MATE_IN_MAX_PLY - 1; // do not return a potentially false mate score + // Downgrade a potentially false mate score + if (v >= VALUE_MATE_IN_MAX_PLY && VALUE_MATE - v > 100 - r50c) + return VALUE_TB_WIN_IN_MAX_PLY - 1; + + // Downgrade a potentially false TB score. + if (VALUE_TB - v > 100 - r50c) + return VALUE_TB_WIN_IN_MAX_PLY - 1; return v - ply; } - if (v <= VALUE_TB_LOSS_IN_MAX_PLY) // TB loss or worse + // handle TB loss or worse + if (v <= VALUE_TB_LOSS_IN_MAX_PLY) { - if (v <= VALUE_MATED_IN_MAX_PLY && VALUE_MATE + v > 99 - r50c) - return VALUE_MATED_IN_MAX_PLY + 1; // do not return a potentially false mate score + // Downgrade a potentially false mate score. + if (v <= VALUE_MATED_IN_MAX_PLY && VALUE_MATE + v > 100 - r50c) + return VALUE_TB_LOSS_IN_MAX_PLY + 1; + + // Downgrade a potentially false TB score. + if (VALUE_TB + v > 100 - r50c) + return VALUE_TB_LOSS_IN_MAX_PLY + 1; return v + ply; } return v; - } +} - // update_pv() adds current move and appends child pv[] +// Adds current move and appends child pv[] +void update_pv(Move* pv, Move move, const Move* childPv) { - void update_pv(Move* pv, Move move, Move* childPv) { - - for (*pv++ = move; childPv && *childPv != MOVE_NONE; ) + for (*pv++ = move; childPv && *childPv != Move::none();) *pv++ = *childPv++; - *pv = MOVE_NONE; - } + *pv = Move::none(); +} - // update_all_stats() updates stats at the end of search() when a bestMove is found +// Updates stats at the end of search() when a bestMove is found +void update_all_stats(const Position& pos, + Stack* ss, + Search::Worker& workerThread, + Move bestMove, + Value bestValue, + Value beta, + Square prevSq, + Move* quietsSearched, + int quietCount, + Move* capturesSearched, + int captureCount, + Depth depth) { - void update_all_stats(const Position& pos, Stack* ss, Move bestMove, Value bestValue, Value beta, Square prevSq, - Move* quietsSearched, int quietCount, Move* capturesSearched, int captureCount, Depth depth) { + CapturePieceToHistory& captureHistory = workerThread.captureHistory; + Piece moved_piece = pos.moved_piece(bestMove); + PieceType captured; - int bonus1, bonus2; - Color us = pos.side_to_move(); - Thread* thisThread = pos.this_thread(); - CapturePieceToHistory& captureHistory = thisThread->captureHistory; - Piece moved_piece = pos.moved_piece(bestMove); - PieceType captured = type_of(pos.piece_on(to_sq(bestMove))); + int quietMoveBonus = stat_bonus(depth + 1); + int quietMoveMalus = stat_malus(depth); - bonus1 = stat_bonus(depth + 1); - bonus2 = bestValue > beta + PawnValueMg ? bonus1 // larger bonus - : stat_bonus(depth); // smaller bonus - - if (!pos.capture_or_promotion(bestMove)) + if (!pos.capture_stage(bestMove)) { - // Increase stats for the best move in case it was a quiet move - update_quiet_stats(pos, ss, bestMove, bonus2); + int bestMoveBonus = bestValue > beta + 165 ? quietMoveBonus // larger bonus + : stat_bonus(depth); // smaller bonus + + update_quiet_stats(pos, ss, workerThread, bestMove, bestMoveBonus); // Decrease stats for all non-best quiet moves for (int i = 0; i < quietCount; ++i) - { - thisThread->mainHistory[us][from_to(quietsSearched[i])] << -bonus2; - update_continuation_histories(ss, pos.moved_piece(quietsSearched[i]), to_sq(quietsSearched[i]), -bonus2); - } + update_quiet_histories(pos, ss, workerThread, quietsSearched[i], -quietMoveMalus); } else + { // Increase stats for the best move in case it was a capture move - captureHistory[moved_piece][to_sq(bestMove)][captured] << bonus1; + captured = type_of(pos.piece_on(bestMove.to_sq())); + captureHistory[moved_piece][bestMove.to_sq()][captured] << quietMoveBonus; + } // Extra penalty for a quiet early move that was not a TT move or // main killer move in previous ply when it gets refuted. - if ( ((ss-1)->moveCount == 1 + (ss-1)->ttHit || ((ss-1)->currentMove == (ss-1)->killers[0])) + if (prevSq != SQ_NONE + && ((ss - 1)->moveCount == 1 + (ss - 1)->ttHit + || ((ss - 1)->currentMove == (ss - 1)->killers[0])) && !pos.captured_piece()) - update_continuation_histories(ss-1, pos.piece_on(prevSq), prevSq, -bonus1); + update_continuation_histories(ss - 1, pos.piece_on(prevSq), prevSq, -quietMoveMalus); // Decrease stats for all non-best capture moves for (int i = 0; i < captureCount; ++i) { moved_piece = pos.moved_piece(capturesSearched[i]); - captured = type_of(pos.piece_on(to_sq(capturesSearched[i]))); - captureHistory[moved_piece][to_sq(capturesSearched[i])][captured] << -bonus1; + captured = type_of(pos.piece_on(capturesSearched[i].to_sq())); + captureHistory[moved_piece][capturesSearched[i].to_sq()][captured] << -quietMoveMalus; } - } +} - // update_continuation_histories() updates histories of the move pairs formed - // by moves at ply -1, -2, -4, and -6 with current move. +// Updates histories of the move pairs formed +// by moves at ply -1, -2, -3, -4, and -6 with current move. +void update_continuation_histories(Stack* ss, Piece pc, Square to, int bonus) { - void update_continuation_histories(Stack* ss, Piece pc, Square to, int bonus) { - - for (int i : {1, 2, 4, 6}) + for (int i : {1, 2, 3, 4, 6}) { - // Only update first 2 continuation histories if we are in check + // Only update the first 2 continuation histories if we are in check if (ss->inCheck && i > 2) break; - if (is_ok((ss-i)->currentMove)) - (*(ss-i)->continuationHistory)[pc][to] << bonus; + if (((ss - i)->currentMove).is_ok()) + (*(ss - i)->continuationHistory)[pc][to] << bonus / (1 + 3 * (i == 3)); } - } +} - - // update_quiet_stats() updates move sorting heuristics - - void update_quiet_stats(const Position& pos, Stack* ss, Move move, int bonus) { +// Updates move sorting heuristics +void update_refutations(const Position& pos, Stack* ss, Search::Worker& workerThread, Move move) { // Update killers if (ss->killers[0] != move) @@ -1738,31 +1794,45 @@ moves_loop: // When in check, search starts here ss->killers[0] = move; } - Color us = pos.side_to_move(); - Thread* thisThread = pos.this_thread(); - thisThread->mainHistory[us][from_to(move)] << bonus; - update_continuation_histories(ss, pos.moved_piece(move), to_sq(move), bonus); - // Update countermove history - if (is_ok((ss-1)->currentMove)) + if (((ss - 1)->currentMove).is_ok()) { - Square prevSq = to_sq((ss-1)->currentMove); - thisThread->counterMoves[pos.piece_on(prevSq)][prevSq] = move; + Square prevSq = ((ss - 1)->currentMove).to_sq(); + workerThread.counterMoves[pos.piece_on(prevSq)][prevSq] = move; } - } +} - // When playing with strength handicap, choose best move among a set of RootMoves - // using a statistical rule dependent on 'level'. Idea by Heinz van Saanen. +void update_quiet_histories( + const Position& pos, Stack* ss, Search::Worker& workerThread, Move move, int bonus) { - Move Skill::pick_best(size_t multiPV) { + Color us = pos.side_to_move(); + workerThread.mainHistory[us][move.from_to()] << bonus; - const RootMoves& rootMoves = Threads.main()->rootMoves; - static PRNG rng(now()); // PRNG sequence should be non-deterministic + update_continuation_histories(ss, pos.moved_piece(move), move.to_sq(), bonus); + + int pIndex = pawn_structure_index(pos); + workerThread.pawnHistory[pIndex][pos.moved_piece(move)][move.to_sq()] << bonus; +} + +// Updates move sorting heuristics +void update_quiet_stats( + const Position& pos, Stack* ss, Search::Worker& workerThread, Move move, int bonus) { + + update_refutations(pos, ss, workerThread, move); + update_quiet_histories(pos, ss, workerThread, move, bonus); +} + +} + +// When playing with strength handicap, choose the best move among a set of RootMoves +// using a statistical rule dependent on 'level'. Idea by Heinz van Saanen. +Move Skill::pick_best(const RootMoves& rootMoves, size_t multiPV) { + static PRNG rng(now()); // PRNG sequence should be non-deterministic // RootMoves are already sorted by score in descending order - Value topScore = rootMoves[0].score; - int delta = std::min(topScore - rootMoves[multiPV - 1].score, PawnValueMg); - int maxScore = -VALUE_INFINITE; + Value topScore = rootMoves[0].score; + int delta = std::min(topScore - rootMoves[multiPV - 1].score, int(PawnValue)); + int maxScore = -VALUE_INFINITE; double weakness = 120 - 2 * level; // Choose best move. For each move score we add two terms, both dependent on @@ -1771,123 +1841,124 @@ moves_loop: // When in check, search starts here for (size_t i = 0; i < multiPV; ++i) { // This is our magic formula - int push = int(( weakness * int(topScore - rootMoves[i].score) - + delta * (rng.rand() % int(weakness))) / 128); + int push = (weakness * int(topScore - rootMoves[i].score) + + delta * (rng.rand() % int(weakness))) + / 128; if (rootMoves[i].score + push >= maxScore) { maxScore = rootMoves[i].score + push; - best = rootMoves[i].pv[0]; + best = rootMoves[i].pv[0]; } } return best; - } - -} // namespace - - -/// MainThread::check_time() is used to print debug info and, more importantly, -/// to detect when we are out of available time and thus stop the search. - -void MainThread::check_time() { - - if (--callsCnt > 0) - return; - - // When using nodes, ensure checking rate is not lower than 0.1% of nodes - callsCnt = Limits.nodes ? std::min(1024, int(Limits.nodes / 1024)) : 1024; - - static TimePoint lastInfoTime = now(); - - TimePoint elapsed = Time.elapsed(); - TimePoint tick = Limits.startTime + elapsed; - - if (tick - lastInfoTime >= 1000) - { - lastInfoTime = tick; - dbg_print(); - } - - // We should not stop pondering until told so by the GUI - if (ponder) - return; - - if ( (Limits.use_time_management() && (elapsed > Time.maximum() - 10 || stopOnPonderhit)) - || (Limits.movetime && elapsed >= Limits.movetime) - || (Limits.nodes && Threads.nodes_searched() >= (uint64_t)Limits.nodes)) - Threads.stop = true; } -/// UCI::pv() formats PV information according to the UCI protocol. UCI requires -/// that all (if any) unsearched PV lines are sent using a previous search score. +// Used to print debug info and, more importantly, +// to detect when we are out of available time and thus stop the search. +void SearchManager::check_time(Search::Worker& worker) { + if (--callsCnt > 0) + return; -string UCI::pv(const Position& pos, Depth depth, Value alpha, Value beta) { + // When using nodes, ensure checking rate is not lower than 0.1% of nodes + callsCnt = worker.limits.nodes ? std::min(512, int(worker.limits.nodes / 1024)) : 512; - std::stringstream ss; - TimePoint elapsed = Time.elapsed() + 1; - const RootMoves& rootMoves = pos.this_thread()->rootMoves; - size_t pvIdx = pos.this_thread()->pvIdx; - size_t multiPV = std::min((size_t)Options["MultiPV"], rootMoves.size()); - uint64_t nodesSearched = Threads.nodes_searched(); - uint64_t tbHits = Threads.tb_hits() + (TB::RootInTB ? rootMoves.size() : 0); + static TimePoint lastInfoTime = now(); - for (size_t i = 0; i < multiPV; ++i) - { - bool updated = rootMoves[i].score != -VALUE_INFINITE; + TimePoint elapsed = tm.elapsed([&worker]() { return worker.threads.nodes_searched(); }); + TimePoint tick = worker.limits.startTime + elapsed; - if (depth == 1 && !updated && i > 0) - continue; + if (tick - lastInfoTime >= 1000) + { + lastInfoTime = tick; + dbg_print(); + } - Depth d = updated ? depth : std::max(1, depth - 1); - Value v = updated ? rootMoves[i].score : rootMoves[i].previousScore; + // We should not stop pondering until told so by the GUI + if (ponder) + return; - if (v == -VALUE_INFINITE) - v = VALUE_ZERO; - - bool tb = TB::RootInTB && abs(v) < VALUE_MATE_IN_MAX_PLY; - v = tb ? rootMoves[i].tbScore : v; - - if (ss.rdbuf()->in_avail()) // Not at first line - ss << "\n"; - - ss << "info" - << " depth " << d - << " seldepth " << rootMoves[i].selDepth - << " multipv " << i + 1 - << " score " << UCI::value(v); - - if (Options["UCI_ShowWDL"]) - ss << UCI::wdl(v, pos.game_ply()); - - if (!tb && i == pvIdx) - ss << (v >= beta ? " lowerbound" : v <= alpha ? " upperbound" : ""); - - ss << " nodes " << nodesSearched - << " nps " << nodesSearched * 1000 / elapsed; - - if (elapsed > 1000) // Earlier makes little sense - ss << " hashfull " << TT.hashfull(); - - ss << " tbhits " << tbHits - << " time " << elapsed - << " pv"; - - for (Move m : rootMoves[i].pv) - ss << " " << UCI::move(m, pos.is_chess960()); - } - - return ss.str(); + if ( + // Later we rely on the fact that we can at least use the mainthread previous + // root-search score and PV in a multithreaded environment to prove mated-in scores. + worker.completedDepth >= 1 + && ((worker.limits.use_time_management() && (elapsed > tm.maximum() || stopOnPonderhit)) + || (worker.limits.movetime && elapsed >= worker.limits.movetime) + || (worker.limits.nodes && worker.threads.nodes_searched() >= worker.limits.nodes))) + worker.threads.stop = worker.threads.abortedSearch = true; } +void SearchManager::pv(const Search::Worker& worker, + const ThreadPool& threads, + const TranspositionTable& tt, + Depth depth) const { -/// RootMove::extract_ponder_from_tt() is called in case we have no ponder move -/// before exiting the search, for instance, in case we stop the search during a -/// fail high at root. We try hard to have a ponder move to return to the GUI, -/// otherwise in case of 'ponder on' we have nothing to think on. + const auto nodes = threads.nodes_searched(); + const auto& rootMoves = worker.rootMoves; + const auto& pos = worker.rootPos; + size_t pvIdx = worker.pvIdx; + TimePoint time = tm.elapsed([nodes]() { return nodes; }) + 1; + size_t multiPV = std::min(size_t(worker.options["MultiPV"]), rootMoves.size()); + uint64_t tbHits = threads.tb_hits() + (worker.tbConfig.rootInTB ? rootMoves.size() : 0); -bool RootMove::extract_ponder_from_tt(Position& pos) { + for (size_t i = 0; i < multiPV; ++i) + { + bool updated = rootMoves[i].score != -VALUE_INFINITE; + + if (depth == 1 && !updated && i > 0) + continue; + + Depth d = updated ? depth : std::max(1, depth - 1); + Value v = updated ? rootMoves[i].uciScore : rootMoves[i].previousScore; + + if (v == -VALUE_INFINITE) + v = VALUE_ZERO; + + bool tb = worker.tbConfig.rootInTB && std::abs(v) <= VALUE_TB; + v = tb ? rootMoves[i].tbScore : v; + + std::string pv; + for (Move m : rootMoves[i].pv) + pv += UCIEngine::move(m, pos.is_chess960()) + " "; + + // remove last whitespace + if (!pv.empty()) + pv.pop_back(); + + auto wdl = worker.options["UCI_ShowWDL"] ? UCIEngine::wdl(v, pos) : ""; + auto bound = rootMoves[i].scoreLowerbound + ? "lowerbound" + : (rootMoves[i].scoreUpperbound ? "upperbound" : ""); + + InfoFull info; + + info.depth = d; + info.selDepth = rootMoves[i].selDepth; + info.multiPV = i + 1; + info.score = {v, pos}; + info.wdl = wdl; + + if (i == pvIdx && !tb && updated) // tablebase- and previous-scores are exact + info.bound = bound; + + info.timeMs = time; + info.nodes = nodes; + info.nps = nodes * 1000 / time; + info.tbHits = tbHits; + info.pv = pv; + info.hashfull = tt.hashfull(); + + updates.onUpdateFull(info); + } +} + +// Called in case we have no ponder move before exiting the search, +// for instance, in case we stop the search during a fail high at root. +// We try hard to have a ponder move to return to the GUI, +// otherwise in case of 'ponder on' we have nothing to think about. +bool RootMove::extract_ponder_from_tt(const TranspositionTable& tt, Position& pos) { StateInfo st; ASSERT_ALIGNED(&st, Eval::NNUE::CacheLineSize); @@ -1895,16 +1966,15 @@ bool RootMove::extract_ponder_from_tt(Position& pos) { bool ttHit; assert(pv.size() == 1); - - if (pv[0] == MOVE_NONE) + if (pv[0] == Move::none()) return false; pos.do_move(pv[0], st); - TTEntry* tte = TT.probe(pos.key(), ttHit); + TTEntry* tte = tt.probe(pos.key(), ttHit); if (ttHit) { - Move m = tte->move(); // Local copy to be SMP safe + Move m = tte->move(); // Local copy to be SMP safe if (MoveList(pos).contains(m)) pv.push_back(m); } @@ -1913,51 +1983,5 @@ bool RootMove::extract_ponder_from_tt(Position& pos) { return pv.size() > 1; } -void Tablebases::rank_root_moves(Position& pos, Search::RootMoves& rootMoves) { - RootInTB = false; - UseRule50 = bool(Options["Syzygy50MoveRule"]); - ProbeDepth = int(Options["SyzygyProbeDepth"]); - Cardinality = int(Options["SyzygyProbeLimit"]); - bool dtz_available = true; - - // Tables with fewer pieces than SyzygyProbeLimit are searched with - // ProbeDepth == DEPTH_ZERO - if (Cardinality > MaxCardinality) - { - Cardinality = MaxCardinality; - ProbeDepth = 0; - } - - if (Cardinality >= popcount(pos.pieces()) && !pos.can_castle(ANY_CASTLING)) - { - // Rank moves using DTZ tables - RootInTB = root_probe(pos, rootMoves); - - if (!RootInTB) - { - // DTZ tables are missing; try to rank moves using WDL tables - dtz_available = false; - RootInTB = root_probe_wdl(pos, rootMoves); - } - } - - if (RootInTB) - { - // Sort moves according to TB rank - std::stable_sort(rootMoves.begin(), rootMoves.end(), - [](const RootMove &a, const RootMove &b) { return a.tbRank > b.tbRank; } ); - - // Probe during search only if DTZ is not available and we are winning - if (dtz_available || rootMoves[0].tbScore <= VALUE_DRAW) - Cardinality = 0; - } - else - { - // Clean up if root_probe() and root_probe_wdl() have failed - for (auto& m : rootMoves) - m.tbRank = 0; - } -} - -} // namespace Stockfish +} // namespace Stockfish diff --git a/src/search.h b/src/search.h index 806295a1..cb73a5af 100644 --- a/src/search.h +++ b/src/search.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,98 +19,303 @@ #ifndef SEARCH_H_INCLUDED #define SEARCH_H_INCLUDED +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include "misc.h" #include "movepick.h" +#include "position.h" +#include "score.h" +#include "syzygy/tbprobe.h" +#include "timeman.h" #include "types.h" +#include "nnue/nnue_accumulator.h" namespace Stockfish { -class Position; +namespace Eval::NNUE { +struct Networks; +} + +// Different node types, used as a template parameter +enum NodeType { + NonPV, + PV, + Root +}; + +class TranspositionTable; +class ThreadPool; +class OptionsMap; namespace Search { -/// Threshold used for countermoves based pruning -constexpr int CounterMovePruneThreshold = 0; - - -/// Stack struct keeps track of the information we need to remember from nodes -/// shallower and deeper in the tree during the search. Each search thread has -/// its own array of Stack objects, indexed by the current ply. - +// Stack struct keeps track of the information we need to remember from nodes +// shallower and deeper in the tree during the search. Each search thread has +// its own array of Stack objects, indexed by the current ply. struct Stack { - Move* pv; - PieceToHistory* continuationHistory; - int ply; - Move currentMove; - Move excludedMove; - Move killers[2]; - Value staticEval; - Depth depth; - int statScore; - int moveCount; - bool inCheck; - bool ttPv; - bool ttHit; - int doubleExtensions; + Move* pv; + PieceToHistory* continuationHistory; + int ply; + Move currentMove; + Move excludedMove; + Move killers[2]; + Value staticEval; + int statScore; + int moveCount; + bool inCheck; + bool ttPv; + bool ttHit; + int cutoffCnt; }; -/// RootMove struct is used for moves at the root of the tree. For each root move -/// we store a score and a PV (really a refutation in the case of moves which -/// fail low). Score is normally set at -VALUE_INFINITE for all non-pv moves. - +// RootMove struct is used for moves at the root of the tree. For each root move +// we store a score and a PV (really a refutation in the case of moves which +// fail low). Score is normally set at -VALUE_INFINITE for all non-pv moves. struct RootMove { - explicit RootMove(Move m) : pv(1, m) {} - bool extract_ponder_from_tt(Position& pos); - bool operator==(const Move& m) const { return pv[0] == m; } - bool operator<(const RootMove& m) const { // Sort in descending order - return m.score != score ? m.score < score - : m.previousScore < previousScore; - } + explicit RootMove(Move m) : + pv(1, m) {} + bool extract_ponder_from_tt(const TranspositionTable& tt, Position& pos); + bool operator==(const Move& m) const { return pv[0] == m; } + // Sort in descending order + bool operator<(const RootMove& m) const { + return m.score != score ? m.score < score : m.previousScore < previousScore; + } - Value score = -VALUE_INFINITE; - Value previousScore = -VALUE_INFINITE; - Value averageScore = -VALUE_INFINITE; - int selDepth = 0; - int tbRank = 0; - Value tbScore; - std::vector pv; + uint64_t effort = 0; + Value score = -VALUE_INFINITE; + Value previousScore = -VALUE_INFINITE; + Value averageScore = -VALUE_INFINITE; + Value uciScore = -VALUE_INFINITE; + bool scoreLowerbound = false; + bool scoreUpperbound = false; + int selDepth = 0; + int tbRank = 0; + Value tbScore; + std::vector pv; }; -typedef std::vector RootMoves; +using RootMoves = std::vector; -/// LimitsType struct stores information sent by GUI about available time to -/// search the current move, maximum depth/time, or if we are in analysis mode. - +// LimitsType struct stores information sent by the caller about the analysis required. struct LimitsType { - LimitsType() { // Init explicitly due to broken value-initialization of non POD in MSVC - time[WHITE] = time[BLACK] = inc[WHITE] = inc[BLACK] = npmsec = movetime = TimePoint(0); - movestogo = depth = mate = perft = infinite = 0; - nodes = 0; - } + // Init explicitly due to broken value-initialization of non POD in MSVC + LimitsType() { + time[WHITE] = time[BLACK] = inc[WHITE] = inc[BLACK] = npmsec = movetime = TimePoint(0); + movestogo = depth = mate = perft = infinite = 0; + nodes = 0; + ponderMode = false; + } - bool use_time_management() const { - return time[WHITE] || time[BLACK]; - } + bool use_time_management() const { return time[WHITE] || time[BLACK]; } - std::vector searchmoves; - TimePoint time[COLOR_NB], inc[COLOR_NB], npmsec, movetime, startTime; - int movestogo, depth, mate, perft, infinite; - int64_t nodes; + std::vector searchmoves; + TimePoint time[COLOR_NB], inc[COLOR_NB], npmsec, movetime, startTime; + int movestogo, depth, mate, perft, infinite; + uint64_t nodes; + bool ponderMode; + Square capSq; }; -extern LimitsType Limits; -void init(); -void clear(); +// The UCI stores the uci options, thread pool, and transposition table. +// This struct is used to easily forward data to the Search::Worker class. +struct SharedState { + SharedState(const OptionsMap& optionsMap, + ThreadPool& threadPool, + TranspositionTable& transpositionTable, + const Eval::NNUE::Networks& nets) : + options(optionsMap), + threads(threadPool), + tt(transpositionTable), + networks(nets) {} -} // namespace Search + const OptionsMap& options; + ThreadPool& threads; + TranspositionTable& tt; + const Eval::NNUE::Networks& networks; +}; -} // namespace Stockfish +class Worker; -#endif // #ifndef SEARCH_H_INCLUDED +// Null Object Pattern, implement a common interface for the SearchManagers. +// A Null Object will be given to non-mainthread workers. +class ISearchManager { + public: + virtual ~ISearchManager() {} + virtual void check_time(Search::Worker&) = 0; +}; + +struct InfoShort { + int depth; + Score score; +}; + +struct InfoFull: InfoShort { + int selDepth; + size_t multiPV; + std::string_view wdl; + std::string_view bound; + size_t timeMs; + size_t nodes; + size_t nps; + size_t tbHits; + std::string_view pv; + int hashfull; +}; + +struct InfoIteration { + int depth; + std::string_view currmove; + size_t currmovenumber; +}; + +// SearchManager manages the search from the main thread. It is responsible for +// keeping track of the time, and storing data strictly related to the main thread. +class SearchManager: public ISearchManager { + public: + using UpdateShort = std::function; + using UpdateFull = std::function; + using UpdateIter = std::function; + using UpdateBestmove = std::function; + + struct UpdateContext { + UpdateShort onUpdateNoMoves; + UpdateFull onUpdateFull; + UpdateIter onIter; + UpdateBestmove onBestmove; + }; + + + SearchManager(const UpdateContext& updateContext) : + updates(updateContext) {} + + void check_time(Search::Worker& worker) override; + + void pv(const Search::Worker& worker, + const ThreadPool& threads, + const TranspositionTable& tt, + Depth depth) const; + + Stockfish::TimeManagement tm; + int callsCnt; + std::atomic_bool ponder; + + std::array iterValue; + double previousTimeReduction; + Value bestPreviousScore; + Value bestPreviousAverageScore; + bool stopOnPonderhit; + + size_t id; + + const UpdateContext& updates; +}; + +class NullSearchManager: public ISearchManager { + public: + void check_time(Search::Worker&) override {} +}; + + +// Search::Worker is the class that does the actual search. +// It is instantiated once per thread, and it is responsible for keeping track +// of the search history, and storing data required for the search. +class Worker { + public: + Worker(SharedState&, std::unique_ptr, size_t); + + // Called at instantiation to initialize Reductions tables + // Reset histories, usually before a new game + void clear(); + + // Called when the program receives the UCI 'go' command. + // It searches from the root position and outputs the "bestmove". + void start_searching(); + + bool is_mainthread() const { return thread_idx == 0; } + + // Public because they need to be updatable by the stats + CounterMoveHistory counterMoves; + ButterflyHistory mainHistory; + CapturePieceToHistory captureHistory; + ContinuationHistory continuationHistory[2][2]; + PawnHistory pawnHistory; + CorrectionHistory correctionHistory; + + private: + void iterative_deepening(); + + // Main search function for both PV and non-PV nodes + template + Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, bool cutNode); + + // Quiescence search function, which is called by the main search + template + Value qsearch(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth = 0); + + Depth reduction(bool i, Depth d, int mn, int delta); + + // Get a pointer to the search manager, only allowed to be called by the + // main thread. + SearchManager* main_manager() const { + assert(thread_idx == 0); + return static_cast(manager.get()); + } + + TimePoint elapsed() const; + + LimitsType limits; + + size_t pvIdx, pvLast; + std::atomic nodes, tbHits, bestMoveChanges; + int selDepth, nmpMinPly; + + Value optimism[COLOR_NB]; + + Position rootPos; + StateInfo rootState; + RootMoves rootMoves; + Depth rootDepth, completedDepth; + Value rootDelta; + + size_t thread_idx; + + // Reductions lookup table initialized at startup + std::array reductions; // [depth or moveNumber] + + // The main thread has a SearchManager, the others have a NullSearchManager + std::unique_ptr manager; + + Tablebases::Config tbConfig; + + const OptionsMap& options; + ThreadPool& threads; + TranspositionTable& tt; + const Eval::NNUE::Networks& networks; + + // Used by NNUE + Eval::NNUE::AccumulatorCaches refreshTable; + + friend class Stockfish::ThreadPool; + friend class SearchManager; +}; + + +} // namespace Search + +} // namespace Stockfish + +#endif // #ifndef SEARCH_H_INCLUDED diff --git a/src/simd.h b/src/simd.h deleted file mode 100644 index 7b9e8fb2..00000000 --- a/src/simd.h +++ /dev/null @@ -1,387 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#ifndef STOCKFISH_SIMD_H_INCLUDED -#define STOCKFISH_SIMD_H_INCLUDED - -#if defined(USE_AVX2) -# include - -#elif defined(USE_SSE41) -# include - -#elif defined(USE_SSSE3) -# include - -#elif defined(USE_SSE2) -# include - -#elif defined(USE_MMX) -# include - -#elif defined(USE_NEON) -# include -#endif - -// The inline asm is only safe for GCC, where it is necessary to get good codegen. -// See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101693 -// Clang does fine without it. -// Play around here: https://godbolt.org/z/7EWqrYq51 -#if (defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER)) -#define USE_INLINE_ASM -#endif - -// Use either the AVX512 or AVX-VNNI version of the VNNI instructions. -#if defined(USE_AVXVNNI) -#define VNNI_PREFIX "%{vex%} " -#else -#define VNNI_PREFIX "" -#endif - -namespace Stockfish::Simd { - -#if defined (USE_AVX512) - - [[maybe_unused]] static int m512_hadd(__m512i sum, int bias) { - return _mm512_reduce_add_epi32(sum) + bias; - } - - /* - Parameters: - sum0 = [zmm0.i128[0], zmm0.i128[1], zmm0.i128[2], zmm0.i128[3]] - sum1 = [zmm1.i128[0], zmm1.i128[1], zmm1.i128[2], zmm1.i128[3]] - sum2 = [zmm2.i128[0], zmm2.i128[1], zmm2.i128[2], zmm2.i128[3]] - sum3 = [zmm3.i128[0], zmm3.i128[1], zmm3.i128[2], zmm3.i128[3]] - - Returns: - ret = [ - reduce_add_epi32(zmm0.i128[0]), reduce_add_epi32(zmm1.i128[0]), reduce_add_epi32(zmm2.i128[0]), reduce_add_epi32(zmm3.i128[0]), - reduce_add_epi32(zmm0.i128[1]), reduce_add_epi32(zmm1.i128[1]), reduce_add_epi32(zmm2.i128[1]), reduce_add_epi32(zmm3.i128[1]), - reduce_add_epi32(zmm0.i128[2]), reduce_add_epi32(zmm1.i128[2]), reduce_add_epi32(zmm2.i128[2]), reduce_add_epi32(zmm3.i128[2]), - reduce_add_epi32(zmm0.i128[3]), reduce_add_epi32(zmm1.i128[3]), reduce_add_epi32(zmm2.i128[3]), reduce_add_epi32(zmm3.i128[3]) - ] - */ - [[maybe_unused]] static __m512i m512_hadd128x16_interleave( - __m512i sum0, __m512i sum1, __m512i sum2, __m512i sum3) { - - __m512i sum01a = _mm512_unpacklo_epi32(sum0, sum1); - __m512i sum01b = _mm512_unpackhi_epi32(sum0, sum1); - - __m512i sum23a = _mm512_unpacklo_epi32(sum2, sum3); - __m512i sum23b = _mm512_unpackhi_epi32(sum2, sum3); - - __m512i sum01 = _mm512_add_epi32(sum01a, sum01b); - __m512i sum23 = _mm512_add_epi32(sum23a, sum23b); - - __m512i sum0123a = _mm512_unpacklo_epi64(sum01, sum23); - __m512i sum0123b = _mm512_unpackhi_epi64(sum01, sum23); - - return _mm512_add_epi32(sum0123a, sum0123b); - } - - [[maybe_unused]] static __m128i m512_haddx4( - __m512i sum0, __m512i sum1, __m512i sum2, __m512i sum3, - __m128i bias) { - - __m512i sum = m512_hadd128x16_interleave(sum0, sum1, sum2, sum3); - - __m256i sum256lo = _mm512_castsi512_si256(sum); - __m256i sum256hi = _mm512_extracti64x4_epi64(sum, 1); - - sum256lo = _mm256_add_epi32(sum256lo, sum256hi); - - __m128i sum128lo = _mm256_castsi256_si128(sum256lo); - __m128i sum128hi = _mm256_extracti128_si256(sum256lo, 1); - - return _mm_add_epi32(_mm_add_epi32(sum128lo, sum128hi), bias); - } - - [[maybe_unused]] static void m512_add_dpbusd_epi32( - __m512i& acc, - __m512i a, - __m512i b) { - -# if defined (USE_VNNI) -# if defined (USE_INLINE_ASM) - asm( - "vpdpbusd %[b], %[a], %[acc]\n\t" - : [acc]"+v"(acc) - : [a]"v"(a), [b]"vm"(b) - ); -# else - acc = _mm512_dpbusd_epi32(acc, a, b); -# endif -# else -# if defined (USE_INLINE_ASM) - __m512i tmp = _mm512_maddubs_epi16(a, b); - asm( - "vpmaddwd %[tmp], %[ones], %[tmp]\n\t" - "vpaddd %[acc], %[tmp], %[acc]\n\t" - : [acc]"+v"(acc), [tmp]"+&v"(tmp) - : [ones]"v"(_mm512_set1_epi16(1)) - ); -# else - __m512i product0 = _mm512_maddubs_epi16(a, b); - product0 = _mm512_madd_epi16(product0, _mm512_set1_epi16(1)); - acc = _mm512_add_epi32(acc, product0); -# endif -# endif - } - - [[maybe_unused]] static void m512_add_dpbusd_epi32x2( - __m512i& acc, - __m512i a0, __m512i b0, - __m512i a1, __m512i b1) { - -# if defined (USE_VNNI) -# if defined (USE_INLINE_ASM) - asm( - "vpdpbusd %[b0], %[a0], %[acc]\n\t" - "vpdpbusd %[b1], %[a1], %[acc]\n\t" - : [acc]"+v"(acc) - : [a0]"v"(a0), [b0]"vm"(b0), [a1]"v"(a1), [b1]"vm"(b1) - ); -# else - acc = _mm512_dpbusd_epi32(acc, a0, b0); - acc = _mm512_dpbusd_epi32(acc, a1, b1); -# endif -# else -# if defined (USE_INLINE_ASM) - __m512i tmp0 = _mm512_maddubs_epi16(a0, b0); - __m512i tmp1 = _mm512_maddubs_epi16(a1, b1); - asm( - "vpaddsw %[tmp0], %[tmp1], %[tmp0]\n\t" - "vpmaddwd %[tmp0], %[ones], %[tmp0]\n\t" - "vpaddd %[acc], %[tmp0], %[acc]\n\t" - : [acc]"+v"(acc), [tmp0]"+&v"(tmp0) - : [tmp1]"v"(tmp1), [ones]"v"(_mm512_set1_epi16(1)) - ); -# else - __m512i product0 = _mm512_maddubs_epi16(a0, b0); - __m512i product1 = _mm512_maddubs_epi16(a1, b1); - product0 = _mm512_adds_epi16(product0, product1); - product0 = _mm512_madd_epi16(product0, _mm512_set1_epi16(1)); - acc = _mm512_add_epi32(acc, product0); -# endif -# endif - } - -#endif - -#if defined (USE_AVX2) - - [[maybe_unused]] static int m256_hadd(__m256i sum, int bias) { - __m128i sum128 = _mm_add_epi32(_mm256_castsi256_si128(sum), _mm256_extracti128_si256(sum, 1)); - sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_BADC)); - sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_CDAB)); - return _mm_cvtsi128_si32(sum128) + bias; - } - - [[maybe_unused]] static __m128i m256_haddx4( - __m256i sum0, __m256i sum1, __m256i sum2, __m256i sum3, - __m128i bias) { - - sum0 = _mm256_hadd_epi32(sum0, sum1); - sum2 = _mm256_hadd_epi32(sum2, sum3); - - sum0 = _mm256_hadd_epi32(sum0, sum2); - - __m128i sum128lo = _mm256_castsi256_si128(sum0); - __m128i sum128hi = _mm256_extracti128_si256(sum0, 1); - - return _mm_add_epi32(_mm_add_epi32(sum128lo, sum128hi), bias); - } - - [[maybe_unused]] static void m256_add_dpbusd_epi32( - __m256i& acc, - __m256i a, - __m256i b) { - -# if defined (USE_VNNI) -# if defined (USE_INLINE_ASM) - asm( - VNNI_PREFIX "vpdpbusd %[b], %[a], %[acc]\n\t" - : [acc]"+v"(acc) - : [a]"v"(a), [b]"vm"(b) - ); -# else - acc = _mm256_dpbusd_epi32(acc, a, b); -# endif -# else -# if defined (USE_INLINE_ASM) - __m256i tmp = _mm256_maddubs_epi16(a, b); - asm( - "vpmaddwd %[tmp], %[ones], %[tmp]\n\t" - "vpaddd %[acc], %[tmp], %[acc]\n\t" - : [acc]"+v"(acc), [tmp]"+&v"(tmp) - : [ones]"v"(_mm256_set1_epi16(1)) - ); -# else - __m256i product0 = _mm256_maddubs_epi16(a, b); - product0 = _mm256_madd_epi16(product0, _mm256_set1_epi16(1)); - acc = _mm256_add_epi32(acc, product0); -# endif -# endif - } - - [[maybe_unused]] static void m256_add_dpbusd_epi32x2( - __m256i& acc, - __m256i a0, __m256i b0, - __m256i a1, __m256i b1) { - -# if defined (USE_VNNI) -# if defined (USE_INLINE_ASM) - asm( - VNNI_PREFIX "vpdpbusd %[b0], %[a0], %[acc]\n\t" - VNNI_PREFIX "vpdpbusd %[b1], %[a1], %[acc]\n\t" - : [acc]"+v"(acc) - : [a0]"v"(a0), [b0]"vm"(b0), [a1]"v"(a1), [b1]"vm"(b1) - ); -# else - acc = _mm256_dpbusd_epi32(acc, a0, b0); - acc = _mm256_dpbusd_epi32(acc, a1, b1); -# endif -# else -# if defined (USE_INLINE_ASM) - __m256i tmp0 = _mm256_maddubs_epi16(a0, b0); - __m256i tmp1 = _mm256_maddubs_epi16(a1, b1); - asm( - "vpaddsw %[tmp0], %[tmp1], %[tmp0]\n\t" - "vpmaddwd %[tmp0], %[ones], %[tmp0]\n\t" - "vpaddd %[acc], %[tmp0], %[acc]\n\t" - : [acc]"+v"(acc), [tmp0]"+&v"(tmp0) - : [tmp1]"v"(tmp1), [ones]"v"(_mm256_set1_epi16(1)) - ); -# else - __m256i product0 = _mm256_maddubs_epi16(a0, b0); - __m256i product1 = _mm256_maddubs_epi16(a1, b1); - product0 = _mm256_adds_epi16(product0, product1); - product0 = _mm256_madd_epi16(product0, _mm256_set1_epi16(1)); - acc = _mm256_add_epi32(acc, product0); -# endif -# endif - } - -#endif - -#if defined (USE_SSSE3) - - [[maybe_unused]] static int m128_hadd(__m128i sum, int bias) { - sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0x4E)); //_MM_PERM_BADC - sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0xB1)); //_MM_PERM_CDAB - return _mm_cvtsi128_si32(sum) + bias; - } - - [[maybe_unused]] static __m128i m128_haddx4( - __m128i sum0, __m128i sum1, __m128i sum2, __m128i sum3, - __m128i bias) { - - sum0 = _mm_hadd_epi32(sum0, sum1); - sum2 = _mm_hadd_epi32(sum2, sum3); - sum0 = _mm_hadd_epi32(sum0, sum2); - return _mm_add_epi32(sum0, bias); - } - - [[maybe_unused]] static void m128_add_dpbusd_epi32( - __m128i& acc, - __m128i a, - __m128i b) { - -# if defined (USE_INLINE_ASM) - __m128i tmp = _mm_maddubs_epi16(a, b); - asm( - "pmaddwd %[ones], %[tmp]\n\t" - "paddd %[tmp], %[acc]\n\t" - : [acc]"+v"(acc), [tmp]"+&v"(tmp) - : [ones]"v"(_mm_set1_epi16(1)) - ); -# else - __m128i product0 = _mm_maddubs_epi16(a, b); - product0 = _mm_madd_epi16(product0, _mm_set1_epi16(1)); - acc = _mm_add_epi32(acc, product0); -# endif - } - - [[maybe_unused]] static void m128_add_dpbusd_epi32x2( - __m128i& acc, - __m128i a0, __m128i b0, - __m128i a1, __m128i b1) { - -# if defined (USE_INLINE_ASM) - __m128i tmp0 = _mm_maddubs_epi16(a0, b0); - __m128i tmp1 = _mm_maddubs_epi16(a1, b1); - asm( - "paddsw %[tmp1], %[tmp0]\n\t" - "pmaddwd %[ones], %[tmp0]\n\t" - "paddd %[tmp0], %[acc]\n\t" - : [acc]"+v"(acc), [tmp0]"+&v"(tmp0) - : [tmp1]"v"(tmp1), [ones]"v"(_mm_set1_epi16(1)) - ); -# else - __m128i product0 = _mm_maddubs_epi16(a0, b0); - __m128i product1 = _mm_maddubs_epi16(a1, b1); - product0 = _mm_adds_epi16(product0, product1); - product0 = _mm_madd_epi16(product0, _mm_set1_epi16(1)); - acc = _mm_add_epi32(acc, product0); -# endif - } - -#endif - -#if defined (USE_NEON) - - [[maybe_unused]] static int neon_m128_reduce_add_epi32(int32x4_t s) { -# if USE_NEON >= 8 - return vaddvq_s32(s); -# else - return s[0] + s[1] + s[2] + s[3]; -# endif - } - - [[maybe_unused]] static int neon_m128_hadd(int32x4_t sum, int bias) { - return neon_m128_reduce_add_epi32(sum) + bias; - } - - [[maybe_unused]] static int32x4_t neon_m128_haddx4( - int32x4_t sum0, int32x4_t sum1, int32x4_t sum2, int32x4_t sum3, - int32x4_t bias) { - - int32x4_t hsums { - neon_m128_reduce_add_epi32(sum0), - neon_m128_reduce_add_epi32(sum1), - neon_m128_reduce_add_epi32(sum2), - neon_m128_reduce_add_epi32(sum3) - }; - return vaddq_s32(hsums, bias); - } - - [[maybe_unused]] static void neon_m128_add_dpbusd_epi32x2( - int32x4_t& acc, - int8x8_t a0, int8x8_t b0, - int8x8_t a1, int8x8_t b1) { - - int16x8_t product = vmull_s8(a0, b0); - product = vmlal_s8(product, a1, b1); - acc = vpadalq_s16(acc, product); - } - -#endif - -} - -#endif // STOCKFISH_SIMD_H_INCLUDED diff --git a/src/syzygy/tbprobe.cpp b/src/syzygy/tbprobe.cpp index a1315244..722dc9d3 100644 --- a/src/syzygy/tbprobe.cpp +++ b/src/syzygy/tbprobe.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -16,38 +16,44 @@ along with this program. If not, see . */ +#include "tbprobe.h" + #include #include +#include #include -#include // For std::memset and std::memcpy +#include +#include #include #include +#include #include -#include -#include -#include #include +#include +#include +#include +#include +#include +#include #include "../bitboard.h" +#include "../misc.h" #include "../movegen.h" #include "../position.h" #include "../search.h" #include "../types.h" -#include "../uci.h" - -#include "tbprobe.h" +#include "../ucioption.h" #ifndef _WIN32 -#include -#include -#include -#include + #include + #include + #include #else -#define WIN32_LEAN_AND_MEAN -#ifndef NOMINMAX -# define NOMINMAX // Disable macros min() and max() -#endif -#include + #define WIN32_LEAN_AND_MEAN + #ifndef NOMINMAX + #define NOMINMAX // Disable macros min() and max() + #endif + #include #endif using namespace Stockfish::Tablebases; @@ -58,59 +64,69 @@ namespace Stockfish { namespace { -constexpr int TBPIECES = 7; // Max number of supported pieces +constexpr int TBPIECES = 7; // Max number of supported pieces +constexpr int MAX_DTZ = + 1 << 18; // Max DTZ supported, large enough to deal with the syzygy TB limit. -enum { BigEndian, LittleEndian }; -enum TBType { WDL, DTZ }; // Used as template parameter +enum { + BigEndian, + LittleEndian +}; +enum TBType { + WDL, + DTZ +}; // Used as template parameter // Each table has a set of flags: all of them refer to DTZ tables, the last one to WDL tables -enum TBFlag { STM = 1, Mapped = 2, WinPlies = 4, LossPlies = 8, Wide = 16, SingleValue = 128 }; +enum TBFlag { + STM = 1, + Mapped = 2, + WinPlies = 4, + LossPlies = 8, + Wide = 16, + SingleValue = 128 +}; inline WDLScore operator-(WDLScore d) { return WDLScore(-int(d)); } -inline Square operator^(Square s, int i) { return Square(int(s) ^ i); } +inline Square operator^(Square s, int i) { return Square(int(s) ^ i); } -const std::string PieceToChar = " PNBRQK pnbrqk"; +constexpr std::string_view PieceToChar = " PNBRQK pnbrqk"; int MapPawns[SQUARE_NB]; int MapB1H1H7[SQUARE_NB]; int MapA1D1D4[SQUARE_NB]; -int MapKK[10][SQUARE_NB]; // [MapA1D1D4][SQUARE_NB] +int MapKK[10][SQUARE_NB]; // [MapA1D1D4][SQUARE_NB] -int Binomial[6][SQUARE_NB]; // [k][n] k elements from a set of n elements -int LeadPawnIdx[6][SQUARE_NB]; // [leadPawnsCnt][SQUARE_NB] -int LeadPawnsSize[6][4]; // [leadPawnsCnt][FILE_A..FILE_D] +int Binomial[6][SQUARE_NB]; // [k][n] k elements from a set of n elements +int LeadPawnIdx[6][SQUARE_NB]; // [leadPawnsCnt][SQUARE_NB] +int LeadPawnsSize[6][4]; // [leadPawnsCnt][FILE_A..FILE_D] // Comparison function to sort leading pawns in ascending MapPawns[] order bool pawns_comp(Square i, Square j) { return MapPawns[i] < MapPawns[j]; } -int off_A1H8(Square sq) { return int(rank_of(sq)) - file_of(sq); } +int off_A1H8(Square sq) { return int(rank_of(sq)) - file_of(sq); } -constexpr Value WDL_to_value[] = { - -VALUE_MATE + MAX_PLY + 1, - VALUE_DRAW - 2, - VALUE_DRAW, - VALUE_DRAW + 2, - VALUE_MATE - MAX_PLY - 1 -}; +constexpr Value WDL_to_value[] = {-VALUE_MATE + MAX_PLY + 1, VALUE_DRAW - 2, VALUE_DRAW, + VALUE_DRAW + 2, VALUE_MATE - MAX_PLY - 1}; template -inline void swap_endian(T& x) -{ - static_assert(std::is_unsigned::value, "Argument of swap_endian not unsigned"); +inline void swap_endian(T& x) { + static_assert(std::is_unsigned_v, "Argument of swap_endian not unsigned"); - uint8_t tmp, *c = (uint8_t*)&x; + uint8_t tmp, *c = (uint8_t*) &x; for (int i = 0; i < Half; ++i) tmp = c[i], c[i] = c[End - i], c[End - i] = tmp; } -template<> inline void swap_endian(uint8_t&) {} +template<> +inline void swap_endian(uint8_t&) {} -template T number(void* addr) -{ +template +T number(void* addr) { T v; - if ((uintptr_t)addr & (alignof(T) - 1)) // Unaligned pointer (very rare) + if (uintptr_t(addr) & (alignof(T) - 1)) // Unaligned pointer (very rare) std::memcpy(&v, addr, sizeof(T)); else - v = *((T*)addr); + v = *((T*) addr); if (LE != IsLittleEndian) swap_endian(v); @@ -121,18 +137,20 @@ template T number(void* addr) // like captures and pawn moves but we can easily recover the correct dtz of the // previous move if we know the position's WDL score. int dtz_before_zeroing(WDLScore wdl) { - return wdl == WDLWin ? 1 : - wdl == WDLCursedWin ? 101 : - wdl == WDLBlessedLoss ? -101 : - wdl == WDLLoss ? -1 : 0; + return wdl == WDLWin ? 1 + : wdl == WDLCursedWin ? 101 + : wdl == WDLBlessedLoss ? -101 + : wdl == WDLLoss ? -1 + : 0; } // Return the sign of a number (-1, 0, 1) -template int sign_of(T val) { +template +int sign_of(T val) { return (T(0) < val) - (val < T(0)); } -// Numbers in little endian used by sparseIndex[] to point into blockLength[] +// Numbers in little-endian used by sparseIndex[] to point into blockLength[] struct SparseEntry { char block[4]; // Number of block char offset[2]; // Offset within the block @@ -140,18 +158,22 @@ struct SparseEntry { static_assert(sizeof(SparseEntry) == 6, "SparseEntry must be 6 bytes"); -typedef uint16_t Sym; // Huffman symbol +using Sym = uint16_t; // Huffman symbol struct LR { - enum Side { Left, Right }; + enum Side { + Left, + Right + }; - uint8_t lr[3]; // The first 12 bits is the left-hand symbol, the second 12 - // bits is the right-hand symbol. If symbol has length 1, - // then the left-hand symbol is the stored value. + uint8_t lr[3]; // The first 12 bits is the left-hand symbol, the second 12 + // bits is the right-hand symbol. If the symbol has length 1, + // then the left-hand symbol is the stored value. template Sym get() { - return S == Left ? ((lr[1] & 0xF) << 8) | lr[0] : - S == Right ? (lr[2] << 4) | (lr[1] >> 4) : (assert(false), Sym(-1)); + return S == Left ? ((lr[1] & 0xF) << 8) | lr[0] + : S == Right ? (lr[2] << 4) | (lr[1] >> 4) + : (assert(false), Sym(-1)); } }; @@ -166,11 +188,11 @@ static_assert(sizeof(LR) == 3, "LR tree entry must be 3 bytes"); // class TBFile memory maps/unmaps the single .rtbw and .rtbz files. Files are // memory mapped for best performance. Files are mapped at first access: at init // time only existence of the file is checked. -class TBFile : public std::ifstream { +class TBFile: public std::ifstream { std::string fname; -public: + public: // Look for and open the file among the Paths directories where the .rtbw // and .rtbz files can be found. Multiple directories are separated by ";" // on Windows and by ":" on Unix-based operating systems. @@ -187,7 +209,7 @@ public: constexpr char SepChar = ';'; #endif std::stringstream ss(Paths); - std::string path; + std::string path; while (std::getline(ss, path, SepChar)) { @@ -198,17 +220,14 @@ public: } } - // Memory map the file and check it. File should be already open and will be - // closed after mapping. + // Memory map the file and check it. uint8_t* map(void** baseAddress, uint64_t* mapping, TBType type) { - - assert(is_open()); - - close(); // Need to re-open to get native file descriptor + if (is_open()) + close(); // Need to re-open to get native file descriptor #ifndef _WIN32 struct stat statbuf; - int fd = ::open(fname.c_str(), O_RDONLY); + int fd = ::open(fname.c_str(), O_RDONLY); if (fd == -1) return *baseAddress = nullptr, nullptr; @@ -221,11 +240,11 @@ public: exit(EXIT_FAILURE); } - *mapping = statbuf.st_size; + *mapping = statbuf.st_size; *baseAddress = mmap(nullptr, statbuf.st_size, PROT_READ, MAP_SHARED, fd, 0); -#if defined(MADV_RANDOM) + #if defined(MADV_RANDOM) madvise(*baseAddress, statbuf.st_size, MADV_RANDOM); -#endif + #endif ::close(fd); if (*baseAddress == MAP_FAILED) @@ -235,8 +254,8 @@ public: } #else // Note FILE_FLAG_RANDOM_ACCESS is only a hint to Windows and as such may get ignored. - HANDLE fd = CreateFile(fname.c_str(), GENERIC_READ, FILE_SHARE_READ, nullptr, - OPEN_EXISTING, FILE_FLAG_RANDOM_ACCESS, nullptr); + HANDLE fd = CreateFileA(fname.c_str(), GENERIC_READ, FILE_SHARE_READ, nullptr, + OPEN_EXISTING, FILE_FLAG_RANDOM_ACCESS, nullptr); if (fd == INVALID_HANDLE_VALUE) return *baseAddress = nullptr, nullptr; @@ -259,7 +278,7 @@ public: exit(EXIT_FAILURE); } - *mapping = (uint64_t)mmap; + *mapping = uint64_t(mmap); *baseAddress = MapViewOfFile(mmap, FILE_MAP_READ, 0, 0, 0); if (!*baseAddress) @@ -269,10 +288,9 @@ public: exit(EXIT_FAILURE); } #endif - uint8_t* data = (uint8_t*)*baseAddress; + uint8_t* data = (uint8_t*) *baseAddress; - constexpr uint8_t Magics[][4] = { { 0xD7, 0x66, 0x0C, 0xA5 }, - { 0x71, 0xE8, 0x23, 0x5D } }; + constexpr uint8_t Magics[][4] = {{0xD7, 0x66, 0x0C, 0xA5}, {0x71, 0xE8, 0x23, 0x5D}}; if (memcmp(data, Magics[type == WDL], 4)) { @@ -281,7 +299,7 @@ public: return *baseAddress = nullptr, nullptr; } - return data + 4; // Skip Magics's header + return data + 4; // Skip Magics's header } static void unmap(void* baseAddress, uint64_t mapping) { @@ -290,36 +308,38 @@ public: munmap(baseAddress, mapping); #else UnmapViewOfFile(baseAddress); - CloseHandle((HANDLE)mapping); + CloseHandle((HANDLE) mapping); #endif } }; std::string TBFile::Paths; -// struct PairsData contains low level indexing information to access TB data. -// There are 8, 4 or 2 PairsData records for each TBTable, according to type of -// table and if positions have pawns or not. It is populated at first access. +// struct PairsData contains low-level indexing information to access TB data. +// There are 8, 4, or 2 PairsData records for each TBTable, according to the type +// of table and if positions have pawns or not. It is populated at first access. struct PairsData { - uint8_t flags; // Table flags, see enum TBFlag - uint8_t maxSymLen; // Maximum length in bits of the Huffman symbols - uint8_t minSymLen; // Minimum length in bits of the Huffman symbols - uint32_t blocksNum; // Number of blocks in the TB file - size_t sizeofBlock; // Block size in bytes - size_t span; // About every span values there is a SparseIndex[] entry - Sym* lowestSym; // lowestSym[l] is the symbol of length l with the lowest value - LR* btree; // btree[sym] stores the left and right symbols that expand sym - uint16_t* blockLength; // Number of stored positions (minus one) for each block: 1..65536 - uint32_t blockLengthSize; // Size of blockLength[] table: padded so it's bigger than blocksNum - SparseEntry* sparseIndex; // Partial indices into blockLength[] - size_t sparseIndexSize; // Size of SparseIndex[] table - uint8_t* data; // Start of Huffman compressed data - std::vector base64; // base64[l - min_sym_len] is the 64bit-padded lowest symbol of length l - std::vector symlen; // Number of values (-1) represented by a given Huffman symbol: 1..256 - Piece pieces[TBPIECES]; // Position pieces: the order of pieces defines the groups - uint64_t groupIdx[TBPIECES+1]; // Start index used for the encoding of the group's pieces - int groupLen[TBPIECES+1]; // Number of pieces in a given group: KRKN -> (3, 1) - uint16_t map_idx[4]; // WDLWin, WDLLoss, WDLCursedWin, WDLBlessedLoss (used in DTZ) + uint8_t flags; // Table flags, see enum TBFlag + uint8_t maxSymLen; // Maximum length in bits of the Huffman symbols + uint8_t minSymLen; // Minimum length in bits of the Huffman symbols + uint32_t blocksNum; // Number of blocks in the TB file + size_t sizeofBlock; // Block size in bytes + size_t span; // About every span values there is a SparseIndex[] entry + Sym* lowestSym; // lowestSym[l] is the symbol of length l with the lowest value + LR* btree; // btree[sym] stores the left and right symbols that expand sym + uint16_t* blockLength; // Number of stored positions (minus one) for each block: 1..65536 + uint32_t blockLengthSize; // Size of blockLength[] table: padded so it's bigger than blocksNum + SparseEntry* sparseIndex; // Partial indices into blockLength[] + size_t sparseIndexSize; // Size of SparseIndex[] table + uint8_t* data; // Start of Huffman compressed data + std::vector + base64; // base64[l - min_sym_len] is the 64bit-padded lowest symbol of length l + std::vector + symlen; // Number of values (-1) represented by a given Huffman symbol: 1..256 + Piece pieces[TBPIECES]; // Position pieces: the order of pieces defines the groups + uint64_t groupIdx[TBPIECES + 1]; // Start index used for the encoding of the group's pieces + int groupLen[TBPIECES + 1]; // Number of pieces in a given group: KRKN -> (3, 1) + uint16_t map_idx[4]; // WDLWin, WDLLoss, WDLCursedWin, WDLBlessedLoss (used in DTZ) }; // struct TBTable contains indexing information to access the corresponding TBFile. @@ -328,27 +348,27 @@ struct PairsData { // first access, when the corresponding file is memory mapped. template struct TBTable { - typedef typename std::conditional::type Ret; + using Ret = std::conditional_t; static constexpr int Sides = Type == WDL ? 2 : 1; std::atomic_bool ready; - void* baseAddress; - uint8_t* map; - uint64_t mapping; - Key key; - Key key2; - int pieceCount; - bool hasPawns; - bool hasUniquePieces; - uint8_t pawnCount[2]; // [Lead color / other color] - PairsData items[Sides][4]; // [wtm / btm][FILE_A..FILE_D or 0] + void* baseAddress; + uint8_t* map; + uint64_t mapping; + Key key; + Key key2; + int pieceCount; + bool hasPawns; + bool hasUniquePieces; + uint8_t pawnCount[2]; // [Lead color / other color] + PairsData items[Sides][4]; // [wtm / btm][FILE_A..FILE_D or 0] - PairsData* get(int stm, int f) { - return &items[stm % Sides][hasPawns ? f : 0]; - } + PairsData* get(int stm, int f) { return &items[stm % Sides][hasPawns ? f : 0]; } - TBTable() : ready(false), baseAddress(nullptr) {} + TBTable() : + ready(false), + baseAddress(nullptr) {} explicit TBTable(const std::string& code); explicit TBTable(const TBTable& wdl); @@ -359,26 +379,26 @@ struct TBTable { }; template<> -TBTable::TBTable(const std::string& code) : TBTable() { +TBTable::TBTable(const std::string& code) : + TBTable() { StateInfo st; - Position pos; + Position pos; - key = pos.set(code, WHITE, &st).material_key(); + key = pos.set(code, WHITE, &st).material_key(); pieceCount = pos.count(); - hasPawns = pos.pieces(PAWN); + hasPawns = pos.pieces(PAWN); hasUniquePieces = false; - for (Color c : { WHITE, BLACK }) + for (Color c : {WHITE, BLACK}) for (PieceType pt = PAWN; pt < KING; ++pt) if (popcount(pos.pieces(c, pt)) == 1) hasUniquePieces = true; // Set the leading color. In case both sides have pawns the leading color - // is the side with less pawns because this leads to better compression. - bool c = !pos.count(BLACK) - || ( pos.count(WHITE) - && pos.count(BLACK) >= pos.count(WHITE)); + // is the side with fewer pawns because this leads to better compression. + bool c = !pos.count(BLACK) + || (pos.count(WHITE) && pos.count(BLACK) >= pos.count(WHITE)); pawnCount[0] = pos.count(c ? WHITE : BLACK); pawnCount[1] = pos.count(c ? BLACK : WHITE); @@ -387,36 +407,36 @@ TBTable::TBTable(const std::string& code) : TBTable() { } template<> -TBTable::TBTable(const TBTable& wdl) : TBTable() { +TBTable::TBTable(const TBTable& wdl) : + TBTable() { // Use the corresponding WDL table to avoid recalculating all from scratch - key = wdl.key; - key2 = wdl.key2; - pieceCount = wdl.pieceCount; - hasPawns = wdl.hasPawns; + key = wdl.key; + key2 = wdl.key2; + pieceCount = wdl.pieceCount; + hasPawns = wdl.hasPawns; hasUniquePieces = wdl.hasUniquePieces; - pawnCount[0] = wdl.pawnCount[0]; - pawnCount[1] = wdl.pawnCount[1]; + pawnCount[0] = wdl.pawnCount[0]; + pawnCount[1] = wdl.pawnCount[1]; } // class TBTables creates and keeps ownership of the TBTable objects, one for -// each TB file found. It supports a fast, hash based, table lookup. Populated +// each TB file found. It supports a fast, hash-based, table lookup. Populated // at init time, accessed at probe time. class TBTables { - struct Entry - { - Key key; + struct Entry { + Key key; TBTable* wdl; TBTable* dtz; - template + template TBTable* get() const { - return (TBTable*)(Type == WDL ? (void*)wdl : (void*)dtz); + return (TBTable*) (Type == WDL ? (void*) wdl : (void*) dtz); } }; - static constexpr int Size = 1 << 12; // 4K table, indexed by key's 12 lsb + static constexpr int Size = 1 << 12; // 4K table, indexed by key's 12 lsb static constexpr int Overflow = 1; // Number of elements allowed to map to the last bucket Entry hashTable[Size + Overflow]; @@ -425,23 +445,26 @@ class TBTables { std::deque> dtzTable; void insert(Key key, TBTable* wdl, TBTable* dtz) { - uint32_t homeBucket = (uint32_t)key & (Size - 1); - Entry entry{ key, wdl, dtz }; + uint32_t homeBucket = uint32_t(key) & (Size - 1); + Entry entry{key, wdl, dtz}; // Ensure last element is empty to avoid overflow when looking up - for (uint32_t bucket = homeBucket; bucket < Size + Overflow - 1; ++bucket) { + for (uint32_t bucket = homeBucket; bucket < Size + Overflow - 1; ++bucket) + { Key otherKey = hashTable[bucket].key; - if (otherKey == key || !hashTable[bucket].get()) { + if (otherKey == key || !hashTable[bucket].get()) + { hashTable[bucket] = entry; return; } // Robin Hood hashing: If we've probed for longer than this element, // insert here and search for a new spot for the other element instead. - uint32_t otherHomeBucket = (uint32_t)otherKey & (Size - 1); - if (otherHomeBucket > homeBucket) { + uint32_t otherHomeBucket = uint32_t(otherKey) & (Size - 1); + if (otherHomeBucket > homeBucket) + { std::swap(entry, hashTable[bucket]); - key = otherKey; + key = otherKey; homeBucket = otherHomeBucket; } } @@ -449,10 +472,11 @@ class TBTables { exit(EXIT_FAILURE); } -public: + public: template TBTable* get(Key key) { - for (const Entry* entry = &hashTable[(uint32_t)key & (Size - 1)]; ; ++entry) { + for (const Entry* entry = &hashTable[uint32_t(key) & (Size - 1)];; ++entry) + { if (entry->key == key || !entry->get()) return entry->get(); } @@ -464,7 +488,7 @@ public: dtzTable.clear(); } size_t size() const { return wdlTable.size(); } - void add(const std::vector& pieces); + void add(const std::vector& pieces); }; TBTables TBTables; @@ -478,20 +502,20 @@ void TBTables::add(const std::vector& pieces) { for (PieceType pt : pieces) code += PieceToChar[pt]; - TBFile file(code.insert(code.find('K', 1), "v") + ".rtbw"); // KRK -> KRvK + TBFile file(code.insert(code.find('K', 1), "v") + ".rtbw"); // KRK -> KRvK - if (!file.is_open()) // Only WDL file is checked + if (!file.is_open()) // Only WDL file is checked return; file.close(); - MaxCardinality = std::max((int)pieces.size(), MaxCardinality); + MaxCardinality = std::max(int(pieces.size()), MaxCardinality); wdlTable.emplace_back(code); dtzTable.emplace_back(wdlTable.back()); // Insert into the hash keys for both colors: KRvK with KR white and black - insert(wdlTable.back().key , &wdlTable.back(), &dtzTable.back()); + insert(wdlTable.back().key, &wdlTable.back(), &dtzTable.back()); insert(wdlTable.back().key2, &wdlTable.back(), &dtzTable.back()); } @@ -507,9 +531,9 @@ void TBTables::add(const std::vector& pieces) { // mostly-draw or mostly-win tables this can leave many 64-byte blocks only half-filled, so // in such cases blocks are 32 bytes long. The blocks of DTZ tables are up to 1024 bytes long. // The generator picks the size that leads to the smallest table. The "book" of symbols and -// Huffman codes is the same for all blocks in the table. A non-symmetric pawnless TB file +// Huffman codes are the same for all blocks in the table. A non-symmetric pawnless TB file // will have one table for wtm and one for btm, a TB file with pawns will have tables per -// file a,b,c,d also in this case one set for wtm and one for btm. +// file a,b,c,d also, in this case, one set for wtm and one for btm. int decompress_pairs(PairsData* d, uint64_t idx) { // Special case where all table positions store the same value @@ -534,10 +558,10 @@ int decompress_pairs(PairsData* d, uint64_t idx) { uint32_t k = uint32_t(idx / d->span); // Then we read the corresponding SparseIndex[] entry - uint32_t block = number(&d->sparseIndex[k].block); - int offset = number(&d->sparseIndex[k].offset); + uint32_t block = number(&d->sparseIndex[k].block); + int offset = number(&d->sparseIndex[k].offset); - // Now compute the difference idx - I(k). From definition of k we know that + // Now compute the difference idx - I(k). From the definition of k, we know that // // idx = k * d->span + idx % d->span (2) // @@ -547,7 +571,7 @@ int decompress_pairs(PairsData* d, uint64_t idx) { // Sum the above to offset to find the offset corresponding to our idx offset += diff; - // Move to previous/next block, until we reach the correct block that contains idx, + // Move to the previous/next block, until we reach the correct block that contains idx, // that is when 0 <= offset <= d->blockLength[block] while (offset < 0) offset += d->blockLength[--block] + 1; @@ -556,18 +580,19 @@ int decompress_pairs(PairsData* d, uint64_t idx) { offset -= d->blockLength[block++] + 1; // Finally, we find the start address of our block of canonical Huffman symbols - uint32_t* ptr = (uint32_t*)(d->data + ((uint64_t)block * d->sizeofBlock)); + uint32_t* ptr = (uint32_t*) (d->data + (uint64_t(block) * d->sizeofBlock)); // Read the first 64 bits in our block, this is a (truncated) sequence of // unknown number of symbols of unknown length but we know the first one - // is at the beginning of this 64 bits sequence. - uint64_t buf64 = number(ptr); ptr += 2; + // is at the beginning of this 64-bit sequence. + uint64_t buf64 = number(ptr); + ptr += 2; int buf64Size = 64; Sym sym; while (true) { - int len = 0; // This is the symbol length - d->min_sym_len + int len = 0; // This is the symbol length - d->min_sym_len // Now get the symbol length. For any symbol s64 of length l right-padded // to 64 bits we know that d->base64[l-1] >= s64 >= d->base64[l] so we @@ -583,24 +608,25 @@ int decompress_pairs(PairsData* d, uint64_t idx) { // Now add the value of the lowest symbol of length len to get our symbol sym += number(&d->lowestSym[len]); - // If our offset is within the number of values represented by symbol sym - // we are done... + // If our offset is within the number of values represented by symbol sym, + // we are done. if (offset < d->symlen[sym] + 1) break; // ...otherwise update the offset and continue to iterate offset -= d->symlen[sym] + 1; - len += d->minSymLen; // Get the real length - buf64 <<= len; // Consume the just processed symbol + len += d->minSymLen; // Get the real length + buf64 <<= len; // Consume the just processed symbol buf64Size -= len; - if (buf64Size <= 32) { // Refill the buffer + if (buf64Size <= 32) + { // Refill the buffer buf64Size += 32; - buf64 |= (uint64_t)number(ptr++) << (64 - buf64Size); + buf64 |= uint64_t(number(ptr++)) << (64 - buf64Size); } } - // Ok, now we have our symbol that expands into d->symlen[sym] + 1 symbols. + // Now we have our symbol that expands into d->symlen[sym] + 1 symbols. // We binary-search for our value recursively expanding into the left and // right child symbols until we reach a leaf node where symlen[sym] + 1 == 1 // that will store the value we need. @@ -610,11 +636,12 @@ int decompress_pairs(PairsData* d, uint64_t idx) { // If a symbol contains 36 sub-symbols (d->symlen[sym] + 1 = 36) and // expands in a pair (d->symlen[left] = 23, d->symlen[right] = 11), then - // we know that, for instance the ten-th value (offset = 10) will be on + // we know that, for instance, the tenth value (offset = 10) will be on // the left side because in Recursive Pairing child symbols are adjacent. if (offset < d->symlen[left] + 1) sym = left; - else { + else + { offset -= d->symlen[left] + 1; sym = d->btree[sym].get(); } @@ -628,68 +655,79 @@ bool check_dtz_stm(TBTable*, int, File) { return true; } bool check_dtz_stm(TBTable* entry, int stm, File f) { auto flags = entry->get(stm, f)->flags; - return (flags & TBFlag::STM) == stm - || ((entry->key == entry->key2) && !entry->hasPawns); + return (flags & TBFlag::STM) == stm || ((entry->key == entry->key2) && !entry->hasPawns); } // DTZ scores are sorted by frequency of occurrence and then assigned the // values 0, 1, 2, ... in order of decreasing frequency. This is done for each // of the four WDLScore values. The mapping information necessary to reconstruct -// the original values is stored in the TB file and read during map[] init. +// the original values are stored in the TB file and read during map[] init. WDLScore map_score(TBTable*, File, int value, WDLScore) { return WDLScore(value - 2); } int map_score(TBTable* entry, File f, int value, WDLScore wdl) { - constexpr int WDLMap[] = { 1, 3, 0, 2, 0 }; + constexpr int WDLMap[] = {1, 3, 0, 2, 0}; auto flags = entry->get(0, f)->flags; - uint8_t* map = entry->map; + uint8_t* map = entry->map; uint16_t* idx = entry->get(0, f)->map_idx; - if (flags & TBFlag::Mapped) { + if (flags & TBFlag::Mapped) + { if (flags & TBFlag::Wide) - value = ((uint16_t *)map)[idx[WDLMap[wdl + 2]] + value]; + value = ((uint16_t*) map)[idx[WDLMap[wdl + 2]] + value]; else value = map[idx[WDLMap[wdl + 2]] + value]; } // DTZ tables store distance to zero in number of moves or plies. We - // want to return plies, so we have convert to plies when needed. - if ( (wdl == WDLWin && !(flags & TBFlag::WinPlies)) - || (wdl == WDLLoss && !(flags & TBFlag::LossPlies)) - || wdl == WDLCursedWin - || wdl == WDLBlessedLoss) + // want to return plies, so we have to convert to plies when needed. + if ((wdl == WDLWin && !(flags & TBFlag::WinPlies)) + || (wdl == WDLLoss && !(flags & TBFlag::LossPlies)) || wdl == WDLCursedWin + || wdl == WDLBlessedLoss) value *= 2; return value + 1; } +// A temporary fix for the compiler bug with AVX-512. (#4450) +#ifdef USE_AVX512 + #if defined(__clang__) && defined(__clang_major__) && __clang_major__ >= 15 + #define CLANG_AVX512_BUG_FIX __attribute__((optnone)) + #endif +#endif + +#ifndef CLANG_AVX512_BUG_FIX + #define CLANG_AVX512_BUG_FIX +#endif + // Compute a unique index out of a position and use it to probe the TB file. To -// encode k pieces of same type and color, first sort the pieces by square in +// encode k pieces of the same type and color, first sort the pieces by square in // ascending order s1 <= s2 <= ... <= sk then compute the unique index as: // // idx = Binomial[1][s1] + Binomial[2][s2] + ... + Binomial[k][sk] // template -Ret do_probe_table(const Position& pos, T* entry, WDLScore wdl, ProbeState* result) { +CLANG_AVX512_BUG_FIX Ret +do_probe_table(const Position& pos, T* entry, WDLScore wdl, ProbeState* result) { - Square squares[TBPIECES]; - Piece pieces[TBPIECES]; - uint64_t idx; - int next = 0, size = 0, leadPawnsCnt = 0; + Square squares[TBPIECES]; + Piece pieces[TBPIECES]; + uint64_t idx; + int next = 0, size = 0, leadPawnsCnt = 0; PairsData* d; - Bitboard b, leadPawns = 0; - File tbFile = FILE_A; + Bitboard b, leadPawns = 0; + File tbFile = FILE_A; // A given TB entry like KRK has associated two material keys: KRvk and Kvkr. // If both sides have the same pieces keys are equal. In this case TB tables - // only store the 'white to move' case, so if the position to lookup has black + // only stores the 'white to move' case, so if the position to lookup has black // to move, we need to switch the color and flip the squares before to lookup. bool symmetricBlackToMove = (entry->key == entry->key2 && pos.side_to_move()); - // TB files are calculated for white as stronger side. For instance we have - // KRvK, not KvKR. A position where stronger side is white will have its - // material key == entry->key, otherwise we have to switch the color and + // TB files are calculated for white as the stronger side. For instance, we + // have KRvK, not KvKR. A position where the stronger side is white will have + // its material key == entry->key, otherwise we have to switch the color and // flip the squares before to lookup. bool blackStronger = (pos.material_key() != entry->key); @@ -700,7 +738,8 @@ Ret do_probe_table(const Position& pos, T* entry, WDLScore wdl, ProbeState* resu // For pawns, TB files store 4 separate tables according if leading pawn is on // file a, b, c or d after reordering. The leading pawn is the one with maximum // MapPawns[] value, that is the one most toward the edges and with lowest rank. - if (entry->hasPawns) { + if (entry->hasPawns) + { // In all the 4 tables, pawns are at the beginning of the piece sequence and // their color is the reference one. So we just pick the first one. @@ -729,9 +768,10 @@ Ret do_probe_table(const Position& pos, T* entry, WDLScore wdl, ProbeState* resu // Now we are ready to get all the position pieces (but the lead pawns) and // directly map them to the correct color and square. b = pos.pieces() ^ leadPawns; - do { - Square s = pop_lsb(b); - squares[size] = s ^ flipSquares; + do + { + Square s = pop_lsb(b); + squares[size] = s ^ flipSquares; pieces[size++] = Piece(pos.piece_on(s) ^ flipColor); } while (b); @@ -758,7 +798,8 @@ Ret do_probe_table(const Position& pos, T* entry, WDLScore wdl, ProbeState* resu // Encode leading pawns starting with the one with minimum MapPawns[] and // proceeding in ascending order. - if (entry->hasPawns) { + if (entry->hasPawns) + { idx = LeadPawnIdx[leadPawnsCnt][squares[0]]; std::stable_sort(squares + 1, squares + leadPawnsCnt, pawns_comp); @@ -766,7 +807,7 @@ Ret do_probe_table(const Position& pos, T* entry, WDLScore wdl, ProbeState* resu for (int i = 1; i < leadPawnsCnt; ++i) idx += Binomial[i][MapPawns[squares[i]]]; - goto encode_remaining; // With pawns we have finished special treatments + goto encode_remaining; // With pawns we have finished special treatments } // In positions without pawns, we further flip the squares to ensure leading @@ -777,11 +818,12 @@ Ret do_probe_table(const Position& pos, T* entry, WDLScore wdl, ProbeState* resu // Look for the first piece of the leading group not on the A1-D4 diagonal // and ensure it is mapped below the diagonal. - for (int i = 0; i < d->groupLen[0]; ++i) { + for (int i = 0; i < d->groupLen[0]; ++i) + { if (!off_A1H8(squares[i])) continue; - if (off_A1H8(squares[i]) > 0) // A1-H8 diagonal flip: SQ_A3 -> SQ_C1 + if (off_A1H8(squares[i]) > 0) // A1-H8 diagonal flip: SQ_A3 -> SQ_C1 for (int j = i; j < size; ++j) squares[j] = Square(((squares[j] >> 3) | (squares[j] << 3)) & 63); break; @@ -812,43 +854,38 @@ Ret do_probe_table(const Position& pos, T* entry, WDLScore wdl, ProbeState* resu // Rs "together" in 62 * 61 / 2 ways (we divide by 2 because rooks can be // swapped and still get the same position.) // - // In case we have at least 3 unique pieces (included kings) we encode them + // In case we have at least 3 unique pieces (including kings) we encode them // together. - if (entry->hasUniquePieces) { + if (entry->hasUniquePieces) + { - int adjust1 = squares[1] > squares[0]; + int adjust1 = squares[1] > squares[0]; int adjust2 = (squares[2] > squares[0]) + (squares[2] > squares[1]); // First piece is below a1-h8 diagonal. MapA1D1D4[] maps the b1-d1-d3 - // triangle to 0...5. There are 63 squares for second piece and and 62 + // triangle to 0...5. There are 63 squares for second piece and 62 // (mapped to 0...61) for the third. if (off_A1H8(squares[0])) - idx = ( MapA1D1D4[squares[0]] * 63 - + (squares[1] - adjust1)) * 62 - + squares[2] - adjust2; + idx = (MapA1D1D4[squares[0]] * 63 + (squares[1] - adjust1)) * 62 + squares[2] - adjust2; // First piece is on a1-h8 diagonal, second below: map this occurrence to // 6 to differentiate from the above case, rank_of() maps a1-d4 diagonal // to 0...3 and finally MapB1H1H7[] maps the b1-h1-h7 triangle to 0..27. else if (off_A1H8(squares[1])) - idx = ( 6 * 63 + rank_of(squares[0]) * 28 - + MapB1H1H7[squares[1]]) * 62 - + squares[2] - adjust2; + idx = (6 * 63 + rank_of(squares[0]) * 28 + MapB1H1H7[squares[1]]) * 62 + squares[2] + - adjust2; // First two pieces are on a1-h8 diagonal, third below else if (off_A1H8(squares[2])) - idx = 6 * 63 * 62 + 4 * 28 * 62 - + rank_of(squares[0]) * 7 * 28 - + (rank_of(squares[1]) - adjust1) * 28 - + MapB1H1H7[squares[2]]; + idx = 6 * 63 * 62 + 4 * 28 * 62 + rank_of(squares[0]) * 7 * 28 + + (rank_of(squares[1]) - adjust1) * 28 + MapB1H1H7[squares[2]]; // All 3 pieces on the diagonal a1-h8 else - idx = 6 * 63 * 62 + 4 * 28 * 62 + 4 * 7 * 28 - + rank_of(squares[0]) * 7 * 6 - + (rank_of(squares[1]) - adjust1) * 6 - + (rank_of(squares[2]) - adjust2); - } else + idx = 6 * 63 * 62 + 4 * 28 * 62 + 4 * 7 * 28 + rank_of(squares[0]) * 7 * 6 + + (rank_of(squares[1]) - adjust1) * 6 + (rank_of(squares[2]) - adjust2); + } + else // We don't have at least 3 unique pieces, like in KRRvKBB, just map // the kings. idx = MapKK[MapA1D1D4[squares[0]]][squares[1]]; @@ -857,7 +894,7 @@ encode_remaining: idx *= d->groupIdx[0]; Square* groupSq = squares + d->groupLen[0]; - // Encode remaining pawns then pieces according to square, in ascending order + // Encode remaining pawns and then pieces according to square, in ascending order bool remainingPawns = entry->hasPawns && entry->pawnCount[1]; while (d->groupLen[++next]) @@ -866,10 +903,10 @@ encode_remaining: uint64_t n = 0; // Map down a square if "comes later" than a square in the previous - // groups (similar to what done earlier for leading group pieces). + // groups (similar to what was done earlier for leading group pieces). for (int i = 0; i < d->groupLen[next]; ++i) { - auto f = [&](Square s) { return groupSq[i] > s; }; + auto f = [&](Square s) { return groupSq[i] > s; }; auto adjust = std::count_if(squares, groupSq, f); n += Binomial[i + 1][groupSq[i] - adjust - 8 * remainingPawns]; } @@ -884,7 +921,7 @@ encode_remaining: } // Group together pieces that will be encoded together. The general rule is that -// a group contains pieces of same type and color. The exception is the leading +// a group contains pieces of the same type and color. The exception is the leading // group that, in case of positions without pawns, can be formed by 3 different // pieces (default) or by the king pair when there is not a unique piece apart // from the kings. When there are pawns, pawns are always first in pieces[]. @@ -907,7 +944,7 @@ void set_groups(T& e, PairsData* d, int order[], File f) { else d->groupLen[++n] = 1; - d->groupLen[++n] = 0; // Zero-terminated + d->groupLen[++n] = 0; // Zero-terminated // The sequence in pieces[] defines the groups, but not the order in which // they are encoded. If the pieces in a group g can be combined on the board @@ -920,24 +957,23 @@ void set_groups(T& e, PairsData* d, int order[], File f) { // pawns/pieces -> remaining pawns -> remaining pieces. In particular the // first group is at order[0] position and the remaining pawns, when present, // are at order[1] position. - bool pp = e.hasPawns && e.pawnCount[1]; // Pawns on both sides - int next = pp ? 2 : 1; - int freeSquares = 64 - d->groupLen[0] - (pp ? d->groupLen[1] : 0); - uint64_t idx = 1; + bool pp = e.hasPawns && e.pawnCount[1]; // Pawns on both sides + int next = pp ? 2 : 1; + int freeSquares = 64 - d->groupLen[0] - (pp ? d->groupLen[1] : 0); + uint64_t idx = 1; for (int k = 0; next < n || k == order[0] || k == order[1]; ++k) - if (k == order[0]) // Leading pawns or pieces + if (k == order[0]) // Leading pawns or pieces { d->groupIdx[0] = idx; - idx *= e.hasPawns ? LeadPawnsSize[d->groupLen[0]][f] - : e.hasUniquePieces ? 31332 : 462; + idx *= e.hasPawns ? LeadPawnsSize[d->groupLen[0]][f] : e.hasUniquePieces ? 31332 : 462; } - else if (k == order[1]) // Remaining pawns + else if (k == order[1]) // Remaining pawns { d->groupIdx[1] = idx; idx *= Binomial[d->groupLen[1]][48 - d->groupLen[0]]; } - else // Remaining pieces + else // Remaining pieces { d->groupIdx[next] = idx; idx *= Binomial[d->groupLen[next]][freeSquares]; @@ -949,11 +985,11 @@ void set_groups(T& e, PairsData* d, int order[], File f) { // In Recursive Pairing each symbol represents a pair of children symbols. So // read d->btree[] symbols data and expand each one in his left and right child -// symbol until reaching the leafs that represent the symbol value. +// symbol until reaching the leaves that represent the symbol value. uint8_t set_symlen(PairsData* d, Sym s, std::vector& visited) { - visited[s] = true; // We can set it now because tree is acyclic - Sym sr = d->btree[s].get(); + visited[s] = true; // We can set it now because tree is acyclic + Sym sr = d->btree[s].get(); if (sr == 0xFFF) return 0; @@ -973,10 +1009,11 @@ uint8_t* set_sizes(PairsData* d, uint8_t* data) { d->flags = *data++; - if (d->flags & TBFlag::SingleValue) { + if (d->flags & TBFlag::SingleValue) + { d->blocksNum = d->blockLengthSize = 0; - d->span = d->sparseIndexSize = 0; // Broken MSVC zero-init - d->minSymLen = *data++; // Here we store the single value + d->span = d->sparseIndexSize = 0; // Broken MSVC zero-init + d->minSymLen = *data++; // Here we store the single value return data; } @@ -984,50 +1021,60 @@ uint8_t* set_sizes(PairsData* d, uint8_t* data) { // element stores the biggest index that is the tb size. uint64_t tbSize = d->groupIdx[std::find(d->groupLen, d->groupLen + 7, 0) - d->groupLen]; - d->sizeofBlock = 1ULL << *data++; - d->span = 1ULL << *data++; - d->sparseIndexSize = size_t((tbSize + d->span - 1) / d->span); // Round up - auto padding = number(data++); - d->blocksNum = number(data); data += sizeof(uint32_t); - d->blockLengthSize = d->blocksNum + padding; // Padded to ensure SparseIndex[] - // does not point out of range. + d->sizeofBlock = 1ULL << *data++; + d->span = 1ULL << *data++; + d->sparseIndexSize = size_t((tbSize + d->span - 1) / d->span); // Round up + auto padding = number(data++); + d->blocksNum = number(data); + data += sizeof(uint32_t); + d->blockLengthSize = d->blocksNum + padding; // Padded to ensure SparseIndex[] + // does not point out of range. d->maxSymLen = *data++; d->minSymLen = *data++; - d->lowestSym = (Sym*)data; + d->lowestSym = (Sym*) data; d->base64.resize(d->maxSymLen - d->minSymLen + 1); + // See https://en.wikipedia.org/wiki/Huffman_coding // The canonical code is ordered such that longer symbols (in terms of - // the number of bits of their Huffman code) have lower numeric value, + // the number of bits of their Huffman code) have a lower numeric value, // so that d->lowestSym[i] >= d->lowestSym[i+1] (when read as LittleEndian). // Starting from this we compute a base64[] table indexed by symbol length // and containing 64 bit values so that d->base64[i] >= d->base64[i+1]. - // See https://en.wikipedia.org/wiki/Huffman_coding - for (int i = d->base64.size() - 2; i >= 0; --i) { - d->base64[i] = (d->base64[i + 1] + number(&d->lowestSym[i]) - - number(&d->lowestSym[i + 1])) / 2; - assert(d->base64[i] * 2 >= d->base64[i+1]); + // Implementation note: we first cast the unsigned size_t "base64.size()" + // to a signed int "base64_size" variable and then we are able to subtract 2, + // avoiding unsigned overflow warnings. + + int base64_size = static_cast(d->base64.size()); + for (int i = base64_size - 2; i >= 0; --i) + { + d->base64[i] = (d->base64[i + 1] + number(&d->lowestSym[i]) + - number(&d->lowestSym[i + 1])) + / 2; + + assert(d->base64[i] * 2 >= d->base64[i + 1]); } // Now left-shift by an amount so that d->base64[i] gets shifted 1 bit more // than d->base64[i+1] and given the above assert condition, we ensure that // d->base64[i] >= d->base64[i+1]. Moreover for any symbol s64 of length i // and right-padded to 64 bits holds d->base64[i-1] >= s64 >= d->base64[i]. - for (size_t i = 0; i < d->base64.size(); ++i) - d->base64[i] <<= 64 - i - d->minSymLen; // Right-padding to 64 bits + for (int i = 0; i < base64_size; ++i) + d->base64[i] <<= 64 - i - d->minSymLen; // Right-padding to 64 bits - data += d->base64.size() * sizeof(Sym); - d->symlen.resize(number(data)); data += sizeof(uint16_t); - d->btree = (LR*)data; + data += base64_size * sizeof(Sym); + d->symlen.resize(number(data)); + data += sizeof(uint16_t); + d->btree = (LR*) data; // The compression scheme used is "Recursive Pairing", that replaces the most // frequent adjacent pair of symbols in the source message by a new symbol, // reevaluating the frequencies of all of the symbol pairs with respect to // the extended alphabet, and then repeating the process. - // See http://www.larsson.dogma.net/dcc99.pdf + // See https://web.archive.org/web/20201106232444/http://www.larsson.dogma.net/dcc99.pdf std::vector visited(d->symlen.size()); - for (Sym sym = 0; sym < d->symlen.size(); ++sym) + for (std::size_t sym = 0; sym < d->symlen.size(); ++sym) if (!visited[sym]) d->symlen[sym] = set_symlen(d, sym, visited); @@ -1040,67 +1087,77 @@ uint8_t* set_dtz_map(TBTable& e, uint8_t* data, File maxFile) { e.map = data; - for (File f = FILE_A; f <= maxFile; ++f) { + for (File f = FILE_A; f <= maxFile; ++f) + { auto flags = e.get(0, f)->flags; - if (flags & TBFlag::Mapped) { - if (flags & TBFlag::Wide) { - data += (uintptr_t)data & 1; // Word alignment, we may have a mixed table - for (int i = 0; i < 4; ++i) { // Sequence like 3,x,x,x,1,x,0,2,x,x - e.get(0, f)->map_idx[i] = (uint16_t)((uint16_t *)data - (uint16_t *)e.map + 1); + if (flags & TBFlag::Mapped) + { + if (flags & TBFlag::Wide) + { + data += uintptr_t(data) & 1; // Word alignment, we may have a mixed table + for (int i = 0; i < 4; ++i) + { // Sequence like 3,x,x,x,1,x,0,2,x,x + e.get(0, f)->map_idx[i] = uint16_t((uint16_t*) data - (uint16_t*) e.map + 1); data += 2 * number(data) + 2; } } - else { - for (int i = 0; i < 4; ++i) { - e.get(0, f)->map_idx[i] = (uint16_t)(data - e.map + 1); + else + { + for (int i = 0; i < 4; ++i) + { + e.get(0, f)->map_idx[i] = uint16_t(data - e.map + 1); data += *data + 1; } } } } - return data += (uintptr_t)data & 1; // Word alignment + return data += uintptr_t(data) & 1; // Word alignment } -// Populate entry's PairsData records with data from the just memory mapped file. +// Populate entry's PairsData records with data from the just memory-mapped file. // Called at first access. template void set(T& e, uint8_t* data) { PairsData* d; - enum { Split = 1, HasPawns = 2 }; + enum { + Split = 1, + HasPawns = 2 + }; - assert(e.hasPawns == bool(*data & HasPawns)); + assert(e.hasPawns == bool(*data & HasPawns)); assert((e.key != e.key2) == bool(*data & Split)); - data++; // First byte stores flags + data++; // First byte stores flags - const int sides = T::Sides == 2 && (e.key != e.key2) ? 2 : 1; + const int sides = T::Sides == 2 && (e.key != e.key2) ? 2 : 1; const File maxFile = e.hasPawns ? FILE_D : FILE_A; - bool pp = e.hasPawns && e.pawnCount[1]; // Pawns on both sides + bool pp = e.hasPawns && e.pawnCount[1]; // Pawns on both sides assert(!pp || e.pawnCount[0]); - for (File f = FILE_A; f <= maxFile; ++f) { + for (File f = FILE_A; f <= maxFile; ++f) + { for (int i = 0; i < sides; i++) *e.get(i, f) = PairsData(); - int order[][2] = { { *data & 0xF, pp ? *(data + 1) & 0xF : 0xF }, - { *data >> 4, pp ? *(data + 1) >> 4 : 0xF } }; + int order[][2] = {{*data & 0xF, pp ? *(data + 1) & 0xF : 0xF}, + {*data >> 4, pp ? *(data + 1) >> 4 : 0xF}}; data += 1 + pp; for (int k = 0; k < e.pieceCount; ++k, ++data) for (int i = 0; i < sides; i++) - e.get(i, f)->pieces[k] = Piece(i ? *data >> 4 : *data & 0xF); + e.get(i, f)->pieces[k] = Piece(i ? *data >> 4 : *data & 0xF); for (int i = 0; i < sides; ++i) set_groups(e, e.get(i, f), order[i], f); } - data += (uintptr_t)data & 1; // Word alignment + data += uintptr_t(data) & 1; // Word alignment for (File f = FILE_A; f <= maxFile; ++f) for (int i = 0; i < sides; i++) @@ -1109,28 +1166,31 @@ void set(T& e, uint8_t* data) { data = set_dtz_map(e, data, maxFile); for (File f = FILE_A; f <= maxFile; ++f) - for (int i = 0; i < sides; i++) { - (d = e.get(i, f))->sparseIndex = (SparseEntry*)data; + for (int i = 0; i < sides; i++) + { + (d = e.get(i, f))->sparseIndex = (SparseEntry*) data; data += d->sparseIndexSize * sizeof(SparseEntry); } for (File f = FILE_A; f <= maxFile; ++f) - for (int i = 0; i < sides; i++) { - (d = e.get(i, f))->blockLength = (uint16_t*)data; + for (int i = 0; i < sides; i++) + { + (d = e.get(i, f))->blockLength = (uint16_t*) data; data += d->blockLengthSize * sizeof(uint16_t); } for (File f = FILE_A; f <= maxFile; ++f) - for (int i = 0; i < sides; i++) { - data = (uint8_t*)(((uintptr_t)data + 0x3F) & ~0x3F); // 64 byte alignment + for (int i = 0; i < sides; i++) + { + data = (uint8_t*) ((uintptr_t(data) + 0x3F) & ~0x3F); // 64 byte alignment (d = e.get(i, f))->data = data; data += d->blocksNum * d->sizeofBlock; } } -// If the TB file corresponding to the given position is already memory mapped -// then return its base address, otherwise try to memory map and init it. Called -// at every probe, memory map and init only at first access. Function is thread +// If the TB file corresponding to the given position is already memory-mapped +// then return its base address, otherwise, try to memory map and init it. Called +// at every probe, memory map, and init only at first access. Function is thread // safe and can be called concurrently. template void* mapped(TBTable& e, const Position& pos) { @@ -1140,22 +1200,23 @@ void* mapped(TBTable& e, const Position& pos) { // Use 'acquire' to avoid a thread reading 'ready' == true while // another is still working. (compiler reordering may cause this). if (e.ready.load(std::memory_order_acquire)) - return e.baseAddress; // Could be nullptr if file does not exist + return e.baseAddress; // Could be nullptr if file does not exist std::scoped_lock lk(mutex); - if (e.ready.load(std::memory_order_relaxed)) // Recheck under lock + if (e.ready.load(std::memory_order_relaxed)) // Recheck under lock return e.baseAddress; // Pieces strings in decreasing order for each color, like ("KPP","KR") std::string fname, w, b; - for (PieceType pt = KING; pt >= PAWN; --pt) { + for (PieceType pt = KING; pt >= PAWN; --pt) + { w += std::string(popcount(pos.pieces(WHITE, pt)), PieceToChar[pt]); b += std::string(popcount(pos.pieces(BLACK, pt)), PieceToChar[pt]); } - fname = (e.key == pos.material_key() ? w + 'v' + b : b + 'v' + w) - + (Type == WDL ? ".rtbw" : ".rtbz"); + fname = + (e.key == pos.material_key() ? w + 'v' + b : b + 'v' + w) + (Type == WDL ? ".rtbw" : ".rtbz"); uint8_t* data = TBFile(fname).map(&e.baseAddress, &e.mapping, Type); @@ -1169,7 +1230,7 @@ void* mapped(TBTable& e, const Position& pos) { template::Ret> Ret probe_table(const Position& pos, ProbeState* result, WDLScore wdl = WDLDraw) { - if (pos.count() == 2) // KvK + if (pos.count() == 2) // KvK return Ret(WDLDraw); TBTable* entry = TBTables.get(pos.material_key()); @@ -1181,7 +1242,7 @@ Ret probe_table(const Position& pos, ProbeState* result, WDLScore wdl = WDLDraw) } // For a position where the side to move has a winning capture it is not necessary -// to store a winning value so the generator treats such positions as "don't cares" +// to store a winning value so the generator treats such positions as "don't care" // and tries to assign to it a value that improves the compression ratio. Similarly, // if the side to move has a drawing capture, then the position is at least drawn. // If the position is won, then the TB needs to store a win value. But if the @@ -1190,22 +1251,21 @@ Ret probe_table(const Position& pos, ProbeState* result, WDLScore wdl = WDLDraw) // their results and must probe the position itself. The "best" result of these // probes is the correct result for the position. // DTZ tables do not store values when a following move is a zeroing winning move -// (winning capture or winning pawn move). Also DTZ store wrong values for positions +// (winning capture or winning pawn move). Also, DTZ store wrong values for positions // where the best move is an ep-move (even if losing). So in all these cases set // the state to ZEROING_BEST_MOVE. template WDLScore search(Position& pos, ProbeState* result) { - WDLScore value, bestValue = WDLLoss; + WDLScore value, bestValue = WDLLoss; StateInfo st; - auto moveList = MoveList(pos); + auto moveList = MoveList(pos); size_t totalCount = moveList.size(), moveCount = 0; for (const Move move : moveList) { - if ( !pos.capture(move) - && (!CheckZeroingMoves || type_of(pos.moved_piece(move)) != PAWN)) + if (!pos.capture(move) && (!CheckZeroingMoves || type_of(pos.moved_piece(move)) != PAWN)) continue; moveCount++; @@ -1223,7 +1283,7 @@ WDLScore search(Position& pos, ProbeState* result) { if (value >= WDLWin) { - *result = ZEROING_BEST_MOVE; // Winning DTZ-zeroing move + *result = ZEROING_BEST_MOVE; // Winning DTZ-zeroing move return value; } } @@ -1249,23 +1309,22 @@ WDLScore search(Position& pos, ProbeState* result) { // DTZ stores a "don't care" value if bestValue is a win if (bestValue >= value) - return *result = ( bestValue > WDLDraw - || noMoreMoves ? ZEROING_BEST_MOVE : OK), bestValue; + return *result = (bestValue > WDLDraw || noMoreMoves ? ZEROING_BEST_MOVE : OK), bestValue; return *result = OK, value; } -} // namespace +} // namespace -/// Tablebases::init() is called at startup and after every change to -/// "SyzygyPath" UCI option to (re)create the various tables. It is not thread -/// safe, nor it needs to be. +// Called at startup and after every change to +// "SyzygyPath" UCI option to (re)create the various tables. It is not thread +// safe, nor it needs to be. void Tablebases::init(const std::string& paths) { TBTables.clear(); MaxCardinality = 0; - TBFile::Paths = paths; + TBFile::Paths = paths; if (paths.empty() || paths == "") return; @@ -1290,21 +1349,21 @@ void Tablebases::init(const std::string& paths) { for (auto s : diagonal) MapA1D1D4[s] = code++; - // MapKK[] encodes all the 461 possible legal positions of two kings where + // MapKK[] encodes all the 462 possible legal positions of two kings where // the first is in the a1-d1-d4 triangle. If the first king is on the a1-d4 - // diagonal, the other one shall not to be above the a1-h8 diagonal. + // diagonal, the other one shall not be above the a1-h8 diagonal. std::vector> bothOnDiagonal; code = 0; for (int idx = 0; idx < 10; idx++) for (Square s1 = SQ_A1; s1 <= SQ_D4; ++s1) - if (MapA1D1D4[s1] == idx && (idx || s1 == SQ_B1)) // SQ_B1 is mapped to 0 + if (MapA1D1D4[s1] == idx && (idx || s1 == SQ_B1)) // SQ_B1 is mapped to 0 { for (Square s2 = SQ_A1; s2 <= SQ_H8; ++s2) if ((PseudoAttacks[KING][s1] | s1) & s2) - continue; // Illegal position + continue; // Illegal position else if (!off_A1H8(s1) && off_A1H8(s2) > 0) - continue; // First on diagonal, second above + continue; // First on diagonal, second above else if (!off_A1H8(s1) && !off_A1H8(s2)) bothOnDiagonal.emplace_back(idx, s2); @@ -1313,7 +1372,7 @@ void Tablebases::init(const std::string& paths) { MapKK[idx][s2] = code++; } - // Legal positions with both kings on diagonal are encoded as last ones + // Legal positions with both kings on a diagonal are encoded as last ones for (auto p : bothOnDiagonal) MapKK[p.first][p.second] = code++; @@ -1321,16 +1380,16 @@ void Tablebases::init(const std::string& paths) { // are Binomial[k][n] ways to choose k elements from a set of n elements. Binomial[0][0] = 1; - for (int n = 1; n < 64; n++) // Squares - for (int k = 0; k < 6 && k <= n; ++k) // Pieces - Binomial[k][n] = (k > 0 ? Binomial[k - 1][n - 1] : 0) - + (k < n ? Binomial[k ][n - 1] : 0); + for (int n = 1; n < 64; n++) // Squares + for (int k = 0; k < 6 && k <= n; ++k) // Pieces + Binomial[k][n] = + (k > 0 ? Binomial[k - 1][n - 1] : 0) + (k < n ? Binomial[k][n - 1] : 0); // MapPawns[s] encodes squares a2-h7 to 0..47. This is the number of possible // available squares when the leading one is in 's'. Moreover the pawn with - // highest MapPawns[] is the leading pawn, the one nearest the edge and, - // among pawns with same file, the one with lowest rank. - int availableSquares = 47; // Available squares when lead pawn is in a2 + // highest MapPawns[] is the leading pawn, the one nearest the edge, and + // among pawns with the same file, the one with the lowest rank. + int availableSquares = 47; // Available squares when lead pawn is in a2 // Init the tables for the encoding of leading pawns group: with 7-men TB we // can have up to 5 leading pawns (KPPPPPK). @@ -1354,7 +1413,7 @@ void Tablebases::init(const std::string& paths) { // due to mirroring: sq == a3 -> no a2, h2, so MapPawns[a3] = 45 if (leadPawnsCnt == 1) { - MapPawns[sq] = availableSquares--; + MapPawns[sq] = availableSquares--; MapPawns[flip_file(sq)] = availableSquares--; } LeadPawnIdx[leadPawnsCnt][sq] = idx; @@ -1365,20 +1424,24 @@ void Tablebases::init(const std::string& paths) { } // Add entries in TB tables if the corresponding ".rtbw" file exists - for (PieceType p1 = PAWN; p1 < KING; ++p1) { + for (PieceType p1 = PAWN; p1 < KING; ++p1) + { TBTables.add({KING, p1, KING}); - for (PieceType p2 = PAWN; p2 <= p1; ++p2) { + for (PieceType p2 = PAWN; p2 <= p1; ++p2) + { TBTables.add({KING, p1, p2, KING}); TBTables.add({KING, p1, KING, p2}); for (PieceType p3 = PAWN; p3 < KING; ++p3) TBTables.add({KING, p1, p2, KING, p3}); - for (PieceType p3 = PAWN; p3 <= p2; ++p3) { + for (PieceType p3 = PAWN; p3 <= p2; ++p3) + { TBTables.add({KING, p1, p2, p3, KING}); - for (PieceType p4 = PAWN; p4 <= p3; ++p4) { + for (PieceType p4 = PAWN; p4 <= p3; ++p4) + { TBTables.add({KING, p1, p2, p3, p4, KING}); for (PieceType p5 = PAWN; p5 <= p4; ++p5) @@ -1388,7 +1451,8 @@ void Tablebases::init(const std::string& paths) { TBTables.add({KING, p1, p2, p3, p4, KING, p5}); } - for (PieceType p4 = PAWN; p4 < KING; ++p4) { + for (PieceType p4 = PAWN; p4 < KING; ++p4) + { TBTables.add({KING, p1, p2, p3, KING, p4}); for (PieceType p5 = PAWN; p5 <= p4; ++p5) @@ -1447,13 +1511,13 @@ WDLScore Tablebases::probe_wdl(Position& pos, ProbeState* result) { // then do not accept moves leading to dtz + 50-move-counter == 100. int Tablebases::probe_dtz(Position& pos, ProbeState* result) { - *result = OK; + *result = OK; WDLScore wdl = search(pos, result); - if (*result == FAIL || wdl == WDLDraw) // DTZ tables don't store draws + if (*result == FAIL || wdl == WDLDraw) // DTZ tables don't store draws return 0; - // DTZ stores a 'don't care' value in this case, or even a plain wrong + // DTZ stores a 'don't care value in this case, or even a plain wrong // one as in case the best move is a losing ep, so it cannot be probed. if (*result == ZEROING_BEST_MOVE) return dtz_before_zeroing(wdl); @@ -1469,7 +1533,7 @@ int Tablebases::probe_dtz(Position& pos, ProbeState* result) { // DTZ stores results for the other side, so we need to do a 1-ply search and // find the winning move that minimizes DTZ. StateInfo st; - int minDTZ = 0xFFFF; + int minDTZ = 0xFFFF; for (const Move move : MoveList(pos)) { @@ -1480,9 +1544,8 @@ int Tablebases::probe_dtz(Position& pos, ProbeState* result) { // For zeroing moves we want the dtz of the move _before_ doing it, // otherwise we will get the dtz of the next move sequence. Search the // position after the move to get the score sign (because even in a - // winning position we could make a losing capture or going for a draw). - dtz = zeroing ? -dtz_before_zeroing(search(pos, result)) - : -probe_dtz(pos, result); + // winning position we could make a losing capture or go for a draw). + dtz = zeroing ? -dtz_before_zeroing(search(pos, result)) : -probe_dtz(pos, result); // If the move mates, force minDTZ to 1 if (dtz == 1 && pos.checkers() && MoveList(pos).size() == 0) @@ -1511,10 +1574,10 @@ int Tablebases::probe_dtz(Position& pos, ProbeState* result) { // Use the DTZ tables to rank root moves. // // A return value false indicates that not all probes were successful. -bool Tablebases::root_probe(Position& pos, Search::RootMoves& rootMoves) { +bool Tablebases::root_probe(Position& pos, Search::RootMoves& rootMoves, bool rule50) { - ProbeState result; - StateInfo st; + ProbeState result = OK; + StateInfo st; // Obtain 50-move counter for the root position int cnt50 = pos.rule50_count(); @@ -1522,7 +1585,7 @@ bool Tablebases::root_probe(Position& pos, Search::RootMoves& rootMoves) { // Check whether a position was repeated since the last zeroing move. bool rep = pos.has_repeated(); - int dtz, bound = Options["Syzygy50MoveRule"] ? 900 : 1; + int dtz, bound = rule50 ? (MAX_DTZ - 100) : 1; // Probe and rank each move for (auto& m : rootMoves) @@ -1534,28 +1597,24 @@ bool Tablebases::root_probe(Position& pos, Search::RootMoves& rootMoves) { { // In case of a zeroing move, dtz is one of -101/-1/0/1/101 WDLScore wdl = -probe_wdl(pos, &result); - dtz = dtz_before_zeroing(wdl); + dtz = dtz_before_zeroing(wdl); } else if (pos.is_draw(1)) { - // In case a root move leads to a draw by repetition or - // 50-move rule, we set dtz to zero. Note: since we are - // only 1 ply from the root, this must be a true 3-fold - // repetition inside the game history. + // In case a root move leads to a draw by repetition or 50-move rule, + // we set dtz to zero. Note: since we are only 1 ply from the root, + // this must be a true 3-fold repetition inside the game history. dtz = 0; } else { // Otherwise, take dtz for the new position and correct by 1 ply dtz = -probe_dtz(pos, &result); - dtz = dtz > 0 ? dtz + 1 - : dtz < 0 ? dtz - 1 : dtz; + dtz = dtz > 0 ? dtz + 1 : dtz < 0 ? dtz - 1 : dtz; } // Make sure that a mating move is assigned a dtz value of 1 - if ( pos.checkers() - && dtz == 2 - && MoveList(pos).size() == 0) + if (pos.checkers() && dtz == 2 && MoveList(pos).size() == 0) dtz = 1; pos.undo_move(m.pv[0]); @@ -1565,19 +1624,19 @@ bool Tablebases::root_probe(Position& pos, Search::RootMoves& rootMoves) { // Better moves are ranked higher. Certain wins are ranked equally. // Losing moves are ranked equally unless a 50-move draw is in sight. - int r = dtz > 0 ? (dtz + cnt50 <= 99 && !rep ? 1000 : 1000 - (dtz + cnt50)) - : dtz < 0 ? (-dtz * 2 + cnt50 < 100 ? -1000 : -1000 + (-dtz + cnt50)) - : 0; + int r = dtz > 0 ? (dtz + cnt50 <= 99 && !rep ? MAX_DTZ : MAX_DTZ - (dtz + cnt50)) + : dtz < 0 ? (-dtz * 2 + cnt50 < 100 ? -MAX_DTZ : -MAX_DTZ + (-dtz + cnt50)) + : 0; m.tbRank = r; // Determine the score to be displayed for this move. Assign at least // 1 cp to cursed wins and let it grow to 49 cp as the positions gets // closer to a real win. - m.tbScore = r >= bound ? VALUE_MATE - MAX_PLY - 1 - : r > 0 ? Value((std::max( 3, r - 800) * int(PawnValueEg)) / 200) - : r == 0 ? VALUE_DRAW - : r > -bound ? Value((std::min(-3, r + 800) * int(PawnValueEg)) / 200) - : -VALUE_MATE + MAX_PLY + 1; + m.tbScore = r >= bound ? VALUE_MATE - MAX_PLY - 1 + : r > 0 ? Value((std::max(3, r - (MAX_DTZ - 200)) * int(PawnValue)) / 200) + : r == 0 ? VALUE_DRAW + : r > -bound ? Value((std::min(-3, r + (MAX_DTZ - 200)) * int(PawnValue)) / 200) + : -VALUE_MATE + MAX_PLY + 1; } return true; @@ -1588,15 +1647,14 @@ bool Tablebases::root_probe(Position& pos, Search::RootMoves& rootMoves) { // This is a fallback for the case that some or all DTZ tables are missing. // // A return value false indicates that not all probes were successful. -bool Tablebases::root_probe_wdl(Position& pos, Search::RootMoves& rootMoves) { +bool Tablebases::root_probe_wdl(Position& pos, Search::RootMoves& rootMoves, bool rule50) { - static const int WDL_to_rank[] = { -1000, -899, 0, 899, 1000 }; + static const int WDL_to_rank[] = {-MAX_DTZ, -MAX_DTZ + 101, 0, MAX_DTZ - 101, MAX_DTZ}; - ProbeState result; - StateInfo st; - WDLScore wdl; + ProbeState result = OK; + StateInfo st; + WDLScore wdl; - bool rule50 = Options["Syzygy50MoveRule"]; // Probe and rank each move for (auto& m : rootMoves) @@ -1616,12 +1674,67 @@ bool Tablebases::root_probe_wdl(Position& pos, Search::RootMoves& rootMoves) { m.tbRank = WDL_to_rank[wdl + 2]; if (!rule50) - wdl = wdl > WDLDraw ? WDLWin - : wdl < WDLDraw ? WDLLoss : WDLDraw; + wdl = wdl > WDLDraw ? WDLWin : wdl < WDLDraw ? WDLLoss : WDLDraw; m.tbScore = WDL_to_value[wdl + 2]; } return true; } -} // namespace Stockfish +Config Tablebases::rank_root_moves(const OptionsMap& options, + Position& pos, + Search::RootMoves& rootMoves) { + Config config; + + if (rootMoves.empty()) + return config; + + config.rootInTB = false; + config.useRule50 = bool(options["Syzygy50MoveRule"]); + config.probeDepth = int(options["SyzygyProbeDepth"]); + config.cardinality = int(options["SyzygyProbeLimit"]); + + bool dtz_available = true; + + // Tables with fewer pieces than SyzygyProbeLimit are searched with + // probeDepth == DEPTH_ZERO + if (config.cardinality > MaxCardinality) + { + config.cardinality = MaxCardinality; + config.probeDepth = 0; + } + + if (config.cardinality >= popcount(pos.pieces()) && !pos.can_castle(ANY_CASTLING)) + { + // Rank moves using DTZ tables + config.rootInTB = root_probe(pos, rootMoves, options["Syzygy50MoveRule"]); + + if (!config.rootInTB) + { + // DTZ tables are missing; try to rank moves using WDL tables + dtz_available = false; + config.rootInTB = root_probe_wdl(pos, rootMoves, options["Syzygy50MoveRule"]); + } + } + + if (config.rootInTB) + { + // Sort moves according to TB rank + std::stable_sort( + rootMoves.begin(), rootMoves.end(), + [](const Search::RootMove& a, const Search::RootMove& b) { return a.tbRank > b.tbRank; }); + + // Probe during search only if DTZ is not available and we are winning + if (dtz_available || rootMoves[0].tbScore <= VALUE_DRAW) + config.cardinality = 0; + } + else + { + // Clean up if root_probe() and root_probe_wdl() have failed + for (auto& m : rootMoves) + m.tbRank = 0; + } + + return config; +} +} // namespace Stockfish diff --git a/src/syzygy/tbprobe.h b/src/syzygy/tbprobe.h index c2917fef..e10950f4 100644 --- a/src/syzygy/tbprobe.h +++ b/src/syzygy/tbprobe.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,60 +19,57 @@ #ifndef TBPROBE_H #define TBPROBE_H -#include +#include +#include -#include "../search.h" + +namespace Stockfish { +class Position; +class OptionsMap; + +using Depth = int; + +namespace Search { +struct RootMove; +using RootMoves = std::vector; +} +} namespace Stockfish::Tablebases { -enum WDLScore { - WDLLoss = -2, // Loss - WDLBlessedLoss = -1, // Loss, but draw under 50-move rule - WDLDraw = 0, // Draw - WDLCursedWin = 1, // Win, but draw under 50-move rule - WDLWin = 2, // Win +struct Config { + int cardinality = 0; + bool rootInTB = false; + bool useRule50 = false; + Depth probeDepth = 0; +}; - WDLScoreNone = -1000 +enum WDLScore { + WDLLoss = -2, // Loss + WDLBlessedLoss = -1, // Loss, but draw under 50-move rule + WDLDraw = 0, // Draw + WDLCursedWin = 1, // Win, but draw under 50-move rule + WDLWin = 2, // Win }; // Possible states after a probing operation enum ProbeState { - FAIL = 0, // Probe failed (missing file table) - OK = 1, // Probe successful - CHANGE_STM = -1, // DTZ should check the other side - ZEROING_BEST_MOVE = 2 // Best move zeroes DTZ (capture or pawn move) + FAIL = 0, // Probe failed (missing file table) + OK = 1, // Probe successful + CHANGE_STM = -1, // DTZ should check the other side + ZEROING_BEST_MOVE = 2 // Best move zeroes DTZ (capture or pawn move) }; extern int MaxCardinality; -void init(const std::string& paths); + +void init(const std::string& paths); WDLScore probe_wdl(Position& pos, ProbeState* result); -int probe_dtz(Position& pos, ProbeState* result); -bool root_probe(Position& pos, Search::RootMoves& rootMoves); -bool root_probe_wdl(Position& pos, Search::RootMoves& rootMoves); -void rank_root_moves(Position& pos, Search::RootMoves& rootMoves); +int probe_dtz(Position& pos, ProbeState* result); +bool root_probe(Position& pos, Search::RootMoves& rootMoves, bool rule50); +bool root_probe_wdl(Position& pos, Search::RootMoves& rootMoves, bool rule50); +Config rank_root_moves(const OptionsMap& options, Position& pos, Search::RootMoves& rootMoves); -inline std::ostream& operator<<(std::ostream& os, const WDLScore v) { - - os << (v == WDLLoss ? "Loss" : - v == WDLBlessedLoss ? "Blessed loss" : - v == WDLDraw ? "Draw" : - v == WDLCursedWin ? "Cursed win" : - v == WDLWin ? "Win" : "None"); - - return os; -} - -inline std::ostream& operator<<(std::ostream& os, const ProbeState v) { - - os << (v == FAIL ? "Failed" : - v == OK ? "Success" : - v == CHANGE_STM ? "Probed opponent side" : - v == ZEROING_BEST_MOVE ? "Best move zeroes DTZ" : "None"); - - return os; -} - -} // namespace Stockfish::Tablebases +} // namespace Stockfish::Tablebases #endif diff --git a/src/thread.cpp b/src/thread.cpp index 30177a39..9052654b 100644 --- a/src/thread.cpp +++ b/src/thread.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -16,227 +16,280 @@ along with this program. If not, see . */ -#include +#include "thread.h" -#include // For std::count +#include +#include +#include +#include +#include +#include +#include + +#include "misc.h" #include "movegen.h" #include "search.h" -#include "thread.h" -#include "uci.h" #include "syzygy/tbprobe.h" +#include "timeman.h" #include "tt.h" +#include "types.h" +#include "ucioption.h" +#include "uci.h" namespace Stockfish { -ThreadPool Threads; // Global object +// Constructor launches the thread and waits until it goes to sleep +// in idle_loop(). Note that 'searching' and 'exit' should be already set. +Thread::Thread(Search::SharedState& sharedState, + std::unique_ptr sm, + size_t n) : + worker(std::make_unique(sharedState, std::move(sm), n)), + idx(n), + nthreads(sharedState.options["Threads"]), + stdThread(&Thread::idle_loop, this) { - -/// Thread constructor launches the thread and waits until it goes to sleep -/// in idle_loop(). Note that 'searching' and 'exit' should be already set. - -Thread::Thread(size_t n) : idx(n), stdThread(&Thread::idle_loop, this) { - - wait_for_search_finished(); + wait_for_search_finished(); } -/// Thread destructor wakes up the thread in idle_loop() and waits -/// for its termination. Thread should be already waiting. - +// Destructor wakes up the thread in idle_loop() and waits +// for its termination. Thread should be already waiting. Thread::~Thread() { - assert(!searching); + assert(!searching); - exit = true; - start_searching(); - stdThread.join(); + exit = true; + start_searching(); + stdThread.join(); } -/// Thread::clear() reset histories, usually before a new game - -void Thread::clear() { - - counterMoves.fill(MOVE_NONE); - mainHistory.fill(0); - captureHistory.fill(0); - - for (bool inCheck : { false, true }) - for (StatsType c : { NoCaptures, Captures }) - { - for (auto& to : continuationHistory[inCheck][c]) - for (auto& h : to) - h->fill(-71); - continuationHistory[inCheck][c][NO_PIECE][0]->fill(Search::CounterMovePruneThreshold - 1); - } -} - - -/// Thread::start_searching() wakes up the thread that will start the search - +// Wakes up the thread that will start the search void Thread::start_searching() { - - std::lock_guard lk(mutex); - searching = true; - cv.notify_one(); // Wake up the thread in idle_loop() + mutex.lock(); + searching = true; + mutex.unlock(); // Unlock before notifying saves a few CPU-cycles + cv.notify_one(); // Wake up the thread in idle_loop() } -/// Thread::wait_for_search_finished() blocks on the condition variable -/// until the thread has finished searching. - +// Blocks on the condition variable +// until the thread has finished searching. void Thread::wait_for_search_finished() { - std::unique_lock lk(mutex); - cv.wait(lk, [&]{ return !searching; }); + std::unique_lock lk(mutex); + cv.wait(lk, [&] { return !searching; }); } -/// Thread::idle_loop() is where the thread is parked, blocked on the -/// condition variable, when it has no work to do. +// Thread gets parked here, blocked on the +// condition variable, when it has no work to do. void Thread::idle_loop() { - // If OS already scheduled us on a different group than 0 then don't overwrite - // the choice, eventually we are one of many one-threaded processes running on - // some Windows NUMA hardware, for instance in fishtest. To make it simple, - // just check if running threads are below a threshold, in this case all this - // NUMA machinery is not needed. - if (Options["Threads"] > 8) - WinProcGroup::bindThisThread(idx); + // If OS already scheduled us on a different group than 0 then don't overwrite + // the choice, eventually we are one of many one-threaded processes running on + // some Windows NUMA hardware, for instance in fishtest. To make it simple, + // just check if running threads are below a threshold, in this case, all this + // NUMA machinery is not needed. + if (nthreads > 8) + WinProcGroup::bind_this_thread(idx); - while (true) - { - std::unique_lock lk(mutex); - searching = false; - cv.notify_one(); // Wake up anyone waiting for search finished - cv.wait(lk, [&]{ return searching; }); + while (true) + { + std::unique_lock lk(mutex); + searching = false; + cv.notify_one(); // Wake up anyone waiting for search finished + cv.wait(lk, [&] { return searching; }); - if (exit) - return; + if (exit) + return; - lk.unlock(); + lk.unlock(); - search(); - } + worker->start_searching(); + } } -/// ThreadPool::set() creates/destroys threads to match the requested number. -/// Created and launched threads will immediately go to sleep in idle_loop. -/// Upon resizing, threads are recreated to allow for binding if necessary. +Search::SearchManager* ThreadPool::main_manager() { + return static_cast(main_thread()->worker.get()->manager.get()); +} -void ThreadPool::set(size_t requested) { +uint64_t ThreadPool::nodes_searched() const { return accumulate(&Search::Worker::nodes); } +uint64_t ThreadPool::tb_hits() const { return accumulate(&Search::Worker::tbHits); } - if (size() > 0) // destroy any existing thread(s) - { - main()->wait_for_search_finished(); +// Creates/destroys threads to match the requested number. +// Created and launched threads will immediately go to sleep in idle_loop. +// Upon resizing, threads are recreated to allow for binding if necessary. +void ThreadPool::set(Search::SharedState sharedState, + const Search::SearchManager::UpdateContext& updateContext) { - while (size() > 0) - delete back(), pop_back(); - } + if (threads.size() > 0) // destroy any existing thread(s) + { + main_thread()->wait_for_search_finished(); - if (requested > 0) // create new thread(s) - { - push_back(new MainThread(0)); + while (threads.size() > 0) + delete threads.back(), threads.pop_back(); + } - while (size() < requested) - push_back(new Thread(size())); - clear(); + const size_t requested = sharedState.options["Threads"]; - // Reallocate the hash with the new threadpool size - TT.resize(size_t(Options["Hash"])); + if (requested > 0) // create new thread(s) + { + auto manager = std::make_unique(updateContext); + threads.push_back(new Thread(sharedState, std::move(manager), 0)); - // Init thread number dependent search params. - Search::init(); - } + while (threads.size() < requested) + { + auto null_manager = std::make_unique(); + threads.push_back(new Thread(sharedState, std::move(null_manager), threads.size())); + } + + clear(); + + main_thread()->wait_for_search_finished(); + + // Reallocate the hash with the new threadpool size + sharedState.tt.resize(sharedState.options["Hash"], requested); + } } -/// ThreadPool::clear() sets threadPool data to initial values - +// Sets threadPool data to initial values void ThreadPool::clear() { - for (Thread* th : *this) - th->clear(); + for (Thread* th : threads) + th->worker->clear(); - main()->callsCnt = 0; - main()->bestPreviousScore = VALUE_INFINITE; - main()->bestPreviousAverageScore = VALUE_INFINITE; - main()->previousTimeReduction = 1.0; + if (threads.size() == 0) + return; + + main_manager()->callsCnt = 0; + main_manager()->bestPreviousScore = VALUE_INFINITE; + main_manager()->bestPreviousAverageScore = VALUE_INFINITE; + main_manager()->previousTimeReduction = 1.0; + main_manager()->tm.clear(); } -/// ThreadPool::start_thinking() wakes up main thread waiting in idle_loop() and -/// returns immediately. Main thread will wake up other threads and start the search. +// Wakes up main thread waiting in idle_loop() and +// returns immediately. Main thread will wake up other threads and start the search. +void ThreadPool::start_thinking(const OptionsMap& options, + Position& pos, + StateListPtr& states, + Search::LimitsType limits) { -void ThreadPool::start_thinking(Position& pos, StateListPtr& states, - const Search::LimitsType& limits, bool ponderMode) { + main_thread()->wait_for_search_finished(); - main()->wait_for_search_finished(); + main_manager()->stopOnPonderhit = stop = abortedSearch = false; + main_manager()->ponder = limits.ponderMode; - main()->stopOnPonderhit = stop = false; - increaseDepth = true; - main()->ponder = ponderMode; - Search::Limits = limits; - Search::RootMoves rootMoves; + increaseDepth = true; - for (const auto& m : MoveList(pos)) - if ( limits.searchmoves.empty() - || std::count(limits.searchmoves.begin(), limits.searchmoves.end(), m)) - rootMoves.emplace_back(m); + Search::RootMoves rootMoves; + const auto legalmoves = MoveList(pos); - if (!rootMoves.empty()) - Tablebases::rank_root_moves(pos, rootMoves); + for (const auto& uciMove : limits.searchmoves) + { + auto move = UCIEngine::to_move(pos, uciMove); - // After ownership transfer 'states' becomes empty, so if we stop the search - // and call 'go' again without setting a new position states.get() == NULL. - assert(states.get() || setupStates.get()); + if (std::find(legalmoves.begin(), legalmoves.end(), move) != legalmoves.end()) + rootMoves.emplace_back(move); + } - if (states.get()) - setupStates = std::move(states); // Ownership transfer, states is now empty + if (rootMoves.empty()) + for (const auto& m : legalmoves) + rootMoves.emplace_back(m); - // We use Position::set() to set root position across threads. But there are - // some StateInfo fields (previous, pliesFromNull, capturedPiece) that cannot - // be deduced from a fen string, so set() clears them and they are set from - // setupStates->back() later. The rootState is per thread, earlier states are shared - // since they are read-only. - for (Thread* th : *this) - { - th->nodes = th->tbHits = th->nmpMinPly = th->bestMoveChanges = 0; - th->rootDepth = th->completedDepth = 0; - th->rootMoves = rootMoves; - th->rootPos.set(pos.fen(), pos.is_chess960(), &th->rootState, th); - th->rootState = setupStates->back(); - } + Tablebases::Config tbConfig = Tablebases::rank_root_moves(options, pos, rootMoves); - main()->start_searching(); + // After ownership transfer 'states' becomes empty, so if we stop the search + // and call 'go' again without setting a new position states.get() == nullptr. + assert(states.get() || setupStates.get()); + + if (states.get()) + setupStates = std::move(states); // Ownership transfer, states is now empty + + // We use Position::set() to set root position across threads. But there are + // some StateInfo fields (previous, pliesFromNull, capturedPiece) that cannot + // be deduced from a fen string, so set() clears them and they are set from + // setupStates->back() later. The rootState is per thread, earlier states are shared + // since they are read-only. + for (Thread* th : threads) + { + th->worker->limits = limits; + th->worker->nodes = th->worker->tbHits = th->worker->nmpMinPly = + th->worker->bestMoveChanges = 0; + th->worker->rootDepth = th->worker->completedDepth = 0; + th->worker->rootMoves = rootMoves; + th->worker->rootPos.set(pos.fen(), pos.is_chess960(), &th->worker->rootState); + th->worker->rootState = setupStates->back(); + th->worker->tbConfig = tbConfig; + } + + main_thread()->start_searching(); } Thread* ThreadPool::get_best_thread() const { - Thread* bestThread = front(); - std::map votes; - Value minScore = VALUE_NONE; + Thread* bestThread = threads.front(); + Value minScore = VALUE_NONE; - // Find minimum score of all threads - for (Thread* th: *this) - minScore = std::min(minScore, th->rootMoves[0].score); + std::unordered_map votes( + 2 * std::min(size(), bestThread->worker->rootMoves.size())); + + // Find the minimum score of all threads + for (Thread* th : threads) + minScore = std::min(minScore, th->worker->rootMoves[0].score); // Vote according to score and depth, and select the best thread - for (Thread* th : *this) - { - votes[th->rootMoves[0].pv[0]] += - (th->rootMoves[0].score - minScore + 14) * int(th->completedDepth); + auto thread_voting_value = [minScore](Thread* th) { + return (th->worker->rootMoves[0].score - minScore + 14) * int(th->worker->completedDepth); + }; - if (abs(bestThread->rootMoves[0].score) >= VALUE_TB_WIN_IN_MAX_PLY) + for (Thread* th : threads) + votes[th->worker->rootMoves[0].pv[0]] += thread_voting_value(th); + + for (Thread* th : threads) + { + const auto bestThreadScore = bestThread->worker->rootMoves[0].score; + const auto newThreadScore = th->worker->rootMoves[0].score; + + const auto& bestThreadPV = bestThread->worker->rootMoves[0].pv; + const auto& newThreadPV = th->worker->rootMoves[0].pv; + + const auto bestThreadMoveVote = votes[bestThreadPV[0]]; + const auto newThreadMoveVote = votes[newThreadPV[0]]; + + const bool bestThreadInProvenWin = bestThreadScore >= VALUE_TB_WIN_IN_MAX_PLY; + const bool newThreadInProvenWin = newThreadScore >= VALUE_TB_WIN_IN_MAX_PLY; + + const bool bestThreadInProvenLoss = + bestThreadScore != -VALUE_INFINITE && bestThreadScore <= VALUE_TB_LOSS_IN_MAX_PLY; + const bool newThreadInProvenLoss = + newThreadScore != -VALUE_INFINITE && newThreadScore <= VALUE_TB_LOSS_IN_MAX_PLY; + + // Note that we make sure not to pick a thread with truncated-PV for better viewer experience. + const bool betterVotingValue = + thread_voting_value(th) * int(newThreadPV.size() > 2) + > thread_voting_value(bestThread) * int(bestThreadPV.size() > 2); + + if (bestThreadInProvenWin) { - // Make sure we pick the shortest mate / TB conversion or stave off mate the longest - if (th->rootMoves[0].score > bestThread->rootMoves[0].score) + // Make sure we pick the shortest mate / TB conversion + if (newThreadScore > bestThreadScore) bestThread = th; } - else if ( th->rootMoves[0].score >= VALUE_TB_WIN_IN_MAX_PLY - || ( th->rootMoves[0].score > VALUE_TB_LOSS_IN_MAX_PLY - && votes[th->rootMoves[0].pv[0]] > votes[bestThread->rootMoves[0].pv[0]])) + else if (bestThreadInProvenLoss) + { + // Make sure we pick the shortest mated / TB conversion + if (newThreadInProvenLoss && newThreadScore < bestThreadScore) + bestThread = th; + } + else if (newThreadInProvenWin || newThreadInProvenLoss + || (newThreadScore > VALUE_TB_LOSS_IN_MAX_PLY + && (newThreadMoveVote > bestThreadMoveVote + || (newThreadMoveVote == bestThreadMoveVote && betterVotingValue)))) bestThread = th; } @@ -244,23 +297,23 @@ Thread* ThreadPool::get_best_thread() const { } -/// Start non-main threads - +// Start non-main threads +// Will be invoked by main thread after it has started searching void ThreadPool::start_searching() { - for (Thread* th : *this) - if (th != front()) + for (Thread* th : threads) + if (th != threads.front()) th->start_searching(); } -/// Wait for non-main threads +// Wait for non-main threads void ThreadPool::wait_for_search_finished() const { - for (Thread* th : *this) - if (th != front()) + for (Thread* th : threads) + if (th != threads.front()) th->wait_for_search_finished(); } -} // namespace Stockfish +} // namespace Stockfish diff --git a/src/thread.h b/src/thread.h index 594a8ea2..223652ae 100644 --- a/src/thread.h +++ b/src/thread.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -21,116 +21,99 @@ #include #include +#include +#include +#include #include -#include #include -#include "material.h" -#include "movepick.h" -#include "pawns.h" #include "position.h" #include "search.h" #include "thread_win32_osx.h" namespace Stockfish { -/// Thread class keeps together all the thread-related stuff. We use -/// per-thread pawn and material hash tables so that once we get a -/// pointer to an entry its life time is unlimited and we don't have -/// to care about someone changing the entry under our feet. +class OptionsMap; +using Value = int; + +// Abstraction of a thread. It contains a pointer to the worker and a native thread. +// After construction, the native thread is started with idle_loop() +// waiting for a signal to start searching. +// When the signal is received, the thread starts searching and when +// the search is finished, it goes back to idle_loop() waiting for a new signal. class Thread { + public: + Thread(Search::SharedState&, std::unique_ptr, size_t); + virtual ~Thread(); - std::mutex mutex; - std::condition_variable cv; - size_t idx; - bool exit = false, searching = true; // Set before starting std::thread - NativeThread stdThread; + void idle_loop(); + void start_searching(); + void wait_for_search_finished(); + size_t id() const { return idx; } -public: - explicit Thread(size_t); - virtual ~Thread(); - virtual void search(); - void clear(); - void idle_loop(); - void start_searching(); - void wait_for_search_finished(); - size_t id() const { return idx; } + std::unique_ptr worker; - Pawns::Table pawnsTable; - Material::Table materialTable; - size_t pvIdx, pvLast; - RunningAverage complexityAverage; - std::atomic nodes, tbHits, bestMoveChanges; - int selDepth, nmpMinPly; - Color nmpColor; - Value bestValue, optimism[COLOR_NB]; - - Position rootPos; - StateInfo rootState; - Search::RootMoves rootMoves; - Depth rootDepth, completedDepth; - Value rootDelta; - CounterMoveHistory counterMoves; - ButterflyHistory mainHistory; - CapturePieceToHistory captureHistory; - ContinuationHistory continuationHistory[2][2]; - Score trend; + private: + std::mutex mutex; + std::condition_variable cv; + size_t idx, nthreads; + bool exit = false, searching = true; // Set before starting std::thread + NativeThread stdThread; }; -/// MainThread is a derived class specific for main thread +// ThreadPool struct handles all the threads-related stuff like init, starting, +// parking and, most importantly, launching a thread. All the access to threads +// is done through this class. +class ThreadPool { -struct MainThread : public Thread { + public: + ~ThreadPool() { + // destroy any existing thread(s) + if (threads.size() > 0) + { + main_thread()->wait_for_search_finished(); - using Thread::Thread; + while (threads.size() > 0) + delete threads.back(), threads.pop_back(); + } + } - void search() override; - void check_time(); + void start_thinking(const OptionsMap&, Position&, StateListPtr&, Search::LimitsType); + void clear(); + void set(Search::SharedState, const Search::SearchManager::UpdateContext&); - double previousTimeReduction; - Value bestPreviousScore; - Value bestPreviousAverageScore; - Value iterValue[4]; - int callsCnt; - bool stopOnPonderhit; - std::atomic_bool ponder; + Search::SearchManager* main_manager(); + Thread* main_thread() const { return threads.front(); } + uint64_t nodes_searched() const; + uint64_t tb_hits() const; + Thread* get_best_thread() const; + void start_searching(); + void wait_for_search_finished() const; + + std::atomic_bool stop, abortedSearch, increaseDepth; + + auto cbegin() const noexcept { return threads.cbegin(); } + auto begin() noexcept { return threads.begin(); } + auto end() noexcept { return threads.end(); } + auto cend() const noexcept { return threads.cend(); } + auto size() const noexcept { return threads.size(); } + auto empty() const noexcept { return threads.empty(); } + + private: + StateListPtr setupStates; + std::vector threads; + + uint64_t accumulate(std::atomic Search::Worker::*member) const { + + uint64_t sum = 0; + for (Thread* th : threads) + sum += (th->worker.get()->*member).load(std::memory_order_relaxed); + return sum; + } }; +} // namespace Stockfish -/// ThreadPool struct handles all the threads-related stuff like init, starting, -/// parking and, most importantly, launching a thread. All the access to threads -/// is done through this class. - -struct ThreadPool : public std::vector { - - void start_thinking(Position&, StateListPtr&, const Search::LimitsType&, bool = false); - void clear(); - void set(size_t); - - MainThread* main() const { return static_cast(front()); } - uint64_t nodes_searched() const { return accumulate(&Thread::nodes); } - uint64_t tb_hits() const { return accumulate(&Thread::tbHits); } - Thread* get_best_thread() const; - void start_searching(); - void wait_for_search_finished() const; - - std::atomic_bool stop, increaseDepth; - -private: - StateListPtr setupStates; - - uint64_t accumulate(std::atomic Thread::* member) const { - - uint64_t sum = 0; - for (Thread* th : *this) - sum += (th->*member).load(std::memory_order_relaxed); - return sum; - } -}; - -extern ThreadPool Threads; - -} // namespace Stockfish - -#endif // #ifndef THREAD_H_INCLUDED +#endif // #ifndef THREAD_H_INCLUDED diff --git a/src/thread_win32_osx.h b/src/thread_win32_osx.h index 77d1c3c7..1d9a834f 100644 --- a/src/thread_win32_osx.h +++ b/src/thread_win32_osx.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -21,54 +21,58 @@ #include -/// On OSX threads other than the main thread are created with a reduced stack -/// size of 512KB by default, this is too low for deep searches, which require -/// somewhat more than 1MB stack, so adjust it to TH_STACK_SIZE. -/// The implementation calls pthread_create() with the stack size parameter -/// equal to the linux 8MB default, on platforms that support it. +// On OSX threads other than the main thread are created with a reduced stack +// size of 512KB by default, this is too low for deep searches, which require +// somewhat more than 1MB stack, so adjust it to TH_STACK_SIZE. +// The implementation calls pthread_create() with the stack size parameter +// equal to the Linux 8MB default, on platforms that support it. #if defined(__APPLE__) || defined(__MINGW32__) || defined(__MINGW64__) || defined(USE_PTHREADS) -#include + #include + #include namespace Stockfish { -static const size_t TH_STACK_SIZE = 8 * 1024 * 1024; - -template > -void* start_routine(void* ptr) -{ - P* p = reinterpret_cast(ptr); - (p->first->*(p->second))(); // Call member function pointer - delete p; - return NULL; -} - class NativeThread { + pthread_t thread; - pthread_t thread; + static constexpr size_t TH_STACK_SIZE = 8 * 1024 * 1024; -public: - template> - explicit NativeThread(void(T::*fun)(), T* obj) { - pthread_attr_t attr_storage, *attr = &attr_storage; - pthread_attr_init(attr); - pthread_attr_setstacksize(attr, TH_STACK_SIZE); - pthread_create(&thread, attr, start_routine, new P(obj, fun)); - } - void join() { pthread_join(thread, NULL); } + public: + template + explicit NativeThread(Function&& fun, Args&&... args) { + auto func = new std::function( + std::bind(std::forward(fun), std::forward(args)...)); + + pthread_attr_t attr_storage, *attr = &attr_storage; + pthread_attr_init(attr); + pthread_attr_setstacksize(attr, TH_STACK_SIZE); + + auto start_routine = [](void* ptr) -> void* { + auto f = reinterpret_cast*>(ptr); + // Call the function + (*f)(); + delete f; + return nullptr; + }; + + pthread_create(&thread, attr, start_routine, func); + } + + void join() { pthread_join(thread, nullptr); } }; -} // namespace Stockfish +} // namespace Stockfish -#else // Default case: use STL classes +#else // Default case: use STL classes namespace Stockfish { -typedef std::thread NativeThread; +using NativeThread = std::thread; -} // namespace Stockfish +} // namespace Stockfish #endif -#endif // #ifndef THREAD_WIN32_OSX_H_INCLUDED +#endif // #ifndef THREAD_WIN32_OSX_H_INCLUDED diff --git a/src/timeman.cpp b/src/timeman.cpp index 0400401e..c651745f 100644 --- a/src/timeman.cpp +++ b/src/timeman.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -16,90 +16,114 @@ along with this program. If not, see . */ +#include "timeman.h" + #include -#include +#include #include +#include #include "search.h" -#include "timeman.h" -#include "uci.h" +#include "ucioption.h" namespace Stockfish { -TimeManagement Time; // Our global time management object +TimePoint TimeManagement::optimum() const { return optimumTime; } +TimePoint TimeManagement::maximum() const { return maximumTime; } - -/// TimeManagement::init() is called at the beginning of the search and calculates -/// the bounds of time allowed for the current game ply. We currently support: -// 1) x basetime (+ z increment) -// 2) x moves in y seconds (+ z increment) - -void TimeManagement::init(Search::LimitsType& limits, Color us, int ply) { - - TimePoint moveOverhead = TimePoint(Options["Move Overhead"]); - TimePoint slowMover = TimePoint(Options["Slow Mover"]); - TimePoint npmsec = TimePoint(Options["nodestime"]); - - // optScale is a percentage of available time to use for the current move. - // maxScale is a multiplier applied to optimumTime. - double optScale, maxScale; - - // If we have to play in 'nodes as time' mode, then convert from time - // to nodes, and use resulting values in time management formulas. - // WARNING: to avoid time losses, the given npmsec (nodes per millisecond) - // must be much lower than the real engine speed. - if (npmsec) - { - if (!availableNodes) // Only once at game start - availableNodes = npmsec * limits.time[us]; // Time is in msec - - // Convert from milliseconds to nodes - limits.time[us] = TimePoint(availableNodes); - limits.inc[us] *= npmsec; - limits.npmsec = npmsec; - } - - startTime = limits.startTime; - - // Maximum move horizon of 50 moves - int mtg = limits.movestogo ? std::min(limits.movestogo, 50) : 50; - - // Make sure timeLeft is > 0 since we may use it as a divisor - TimePoint timeLeft = std::max(TimePoint(1), - limits.time[us] + limits.inc[us] * (mtg - 1) - moveOverhead * (2 + mtg)); - - // Use extra time with larger increments - double optExtra = std::clamp(1.0 + 12.0 * limits.inc[us] / limits.time[us], 1.0, 1.12); - - // A user may scale time usage by setting UCI option "Slow Mover" - // Default is 100 and changing this value will probably lose elo. - timeLeft = slowMover * timeLeft / 100; - - // x basetime (+ z increment) - // If there is a healthy increment, timeLeft can exceed actual available - // game time for the current move, so also cap to 20% of available game time. - if (limits.movestogo == 0) - { - optScale = std::min(0.0084 + std::pow(ply + 3.0, 0.5) * 0.0042, - 0.2 * limits.time[us] / double(timeLeft)) - * optExtra; - maxScale = std::min(7.0, 4.0 + ply / 12.0); - } - - // x moves in y seconds (+ z increment) - else - { - optScale = std::min((0.88 + ply / 116.4) / mtg, - 0.88 * limits.time[us] / double(timeLeft)); - maxScale = std::min(6.3, 1.5 + 0.11 * mtg); - } - - // Never use more than 80% of the available time for this move - optimumTime = TimePoint(optScale * timeLeft); - maximumTime = TimePoint(std::min(0.8 * limits.time[us] - moveOverhead, maxScale * optimumTime)); - - if (Options["Ponder"]) - optimumTime += optimumTime / 4; +void TimeManagement::clear() { + availableNodes = 0; // When in 'nodes as time' mode } -} // namespace Stockfish +void TimeManagement::advance_nodes_time(std::int64_t nodes) { + assert(useNodesTime); + availableNodes += nodes; +} + +// Called at the beginning of the search and calculates +// the bounds of time allowed for the current game ply. We currently support: +// 1) x basetime (+ z increment) +// 2) x moves in y seconds (+ z increment) +void TimeManagement::init(Search::LimitsType& limits, + Color us, + int ply, + const OptionsMap& options) { + // If we have no time, no need to initialize TM, except for the start time, + // which is used by movetime. + startTime = limits.startTime; + if (limits.time[us] == 0) + return; + + TimePoint moveOverhead = TimePoint(options["Move Overhead"]); + TimePoint npmsec = TimePoint(options["nodestime"]); + + // optScale is a percentage of available time to use for the current move. + // maxScale is a multiplier applied to optimumTime. + double optScale, maxScale; + + // If we have to play in 'nodes as time' mode, then convert from time + // to nodes, and use resulting values in time management formulas. + // WARNING: to avoid time losses, the given npmsec (nodes per millisecond) + // must be much lower than the real engine speed. + if (npmsec) + { + useNodesTime = true; + + if (!availableNodes) // Only once at game start + availableNodes = npmsec * limits.time[us]; // Time is in msec + + // Convert from milliseconds to nodes + limits.time[us] = TimePoint(availableNodes); + limits.inc[us] *= npmsec; + limits.npmsec = npmsec; + } + + // Maximum move horizon of 50 moves + int mtg = limits.movestogo ? std::min(limits.movestogo, 50) : 50; + + // if less than one second, gradually reduce mtg + if (limits.time[us] < 1000 && (double(mtg) / limits.time[us] > 0.05)) + { + mtg = limits.time[us] * 0.05; + } + + // Make sure timeLeft is > 0 since we may use it as a divisor + TimePoint timeLeft = std::max(TimePoint(1), limits.time[us] + limits.inc[us] * (mtg - 1) + - moveOverhead * (2 + mtg)); + + // x basetime (+ z increment) + // If there is a healthy increment, timeLeft can exceed the actual available + // game time for the current move, so also cap to a percentage of available game time. + if (limits.movestogo == 0) + { + // Use extra time with larger increments + double optExtra = limits.inc[us] < 500 ? 1.0 : 1.13; + + // Calculate time constants based on current time left. + double optConstant = + std::min(0.00308 + 0.000319 * std::log10(limits.time[us] / 1000.0), 0.00506); + double maxConstant = std::max(3.39 + 3.01 * std::log10(limits.time[us] / 1000.0), 2.93); + + optScale = std::min(0.0122 + std::pow(ply + 2.95, 0.462) * optConstant, + 0.213 * limits.time[us] / double(timeLeft)) + * optExtra; + maxScale = std::min(6.64, maxConstant + ply / 12.0); + } + + // x moves in y seconds (+ z increment) + else + { + optScale = std::min((0.88 + ply / 116.4) / mtg, 0.88 * limits.time[us] / double(timeLeft)); + maxScale = std::min(6.3, 1.5 + 0.11 * mtg); + } + + // Limit the maximum possible time for this move + optimumTime = TimePoint(optScale * timeLeft); + maximumTime = + TimePoint(std::min(0.825 * limits.time[us] - moveOverhead, maxScale * optimumTime)) - 10; + + if (options["Ponder"]) + optimumTime += optimumTime / 4; +} + +} // namespace Stockfish diff --git a/src/timeman.h b/src/timeman.h index a86f0769..35c3cfc0 100644 --- a/src/timeman.h +++ b/src/timeman.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,33 +19,44 @@ #ifndef TIMEMAN_H_INCLUDED #define TIMEMAN_H_INCLUDED +#include + #include "misc.h" -#include "search.h" -#include "thread.h" +#include "types.h" namespace Stockfish { -/// The TimeManagement class computes the optimal time to think depending on -/// the maximum available time, the game move number and other parameters. +class OptionsMap; +namespace Search { +struct LimitsType; +} + +// The TimeManagement class computes the optimal time to think depending on +// the maximum available time, the game move number, and other parameters. class TimeManagement { -public: - void init(Search::LimitsType& limits, Color us, int ply); - TimePoint optimum() const { return optimumTime; } - TimePoint maximum() const { return maximumTime; } - TimePoint elapsed() const { return Search::Limits.npmsec ? - TimePoint(Threads.nodes_searched()) : now() - startTime; } + public: + void init(Search::LimitsType& limits, Color us, int ply, const OptionsMap& options); - int64_t availableNodes; // When in 'nodes as time' mode + TimePoint optimum() const; + TimePoint maximum() const; + template + TimePoint elapsed(FUNC nodes) const { + return useNodesTime ? TimePoint(nodes()) : now() - startTime; + } -private: - TimePoint startTime; - TimePoint optimumTime; - TimePoint maximumTime; + void clear(); + void advance_nodes_time(std::int64_t nodes); + + private: + TimePoint startTime; + TimePoint optimumTime; + TimePoint maximumTime; + + std::int64_t availableNodes = 0; // When in 'nodes as time' mode + bool useNodesTime = false; // True if we are in 'nodes as time' mode }; -extern TimeManagement Time; +} // namespace Stockfish -} // namespace Stockfish - -#endif // #ifndef TIMEMAN_H_INCLUDED +#endif // #ifndef TIMEMAN_H_INCLUDED diff --git a/src/tt.cpp b/src/tt.cpp index c7118aea..4885a781 100644 --- a/src/tt.cpp +++ b/src/tt.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -16,147 +16,139 @@ along with this program. If not, see . */ -#include // For std::memset +#include "tt.h" + +#include +#include +#include +#include #include #include +#include -#include "bitboard.h" #include "misc.h" -#include "thread.h" -#include "tt.h" -#include "uci.h" namespace Stockfish { -TranspositionTable TT; // Our global transposition table +// Populates the TTEntry with a new node's data, possibly +// overwriting an old position. The update is not atomic and can be racy. +void TTEntry::save( + Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev, uint8_t generation8) { -/// TTEntry::save() populates the TTEntry with a new node's data, possibly -/// overwriting an old position. Update is not atomic and can be racy. + // Preserve any existing move for the same position + if (m || uint16_t(k) != key16) + move16 = m; -void TTEntry::save(Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev) { + // Overwrite less valuable entries (cheapest checks first) + if (b == BOUND_EXACT || uint16_t(k) != key16 || d - DEPTH_OFFSET + 2 * pv > depth8 - 4 + || relative_age(generation8)) + { + assert(d > DEPTH_OFFSET); + assert(d < 256 + DEPTH_OFFSET); - // Preserve any existing move for the same position - if (m || (uint16_t)k != key16) - move16 = (uint16_t)m; - - // Overwrite less valuable entries (cheapest checks first) - if ( b == BOUND_EXACT - || (uint16_t)k != key16 - || d - DEPTH_OFFSET + 2 * pv > depth8 - 4) - { - assert(d > DEPTH_OFFSET); - assert(d < 256 + DEPTH_OFFSET); - - key16 = (uint16_t)k; - depth8 = (uint8_t)(d - DEPTH_OFFSET); - genBound8 = (uint8_t)(TT.generation8 | uint8_t(pv) << 2 | b); - value16 = (int16_t)v; - eval16 = (int16_t)ev; - } + key16 = uint16_t(k); + depth8 = uint8_t(d - DEPTH_OFFSET); + genBound8 = uint8_t(generation8 | uint8_t(pv) << 2 | b); + value16 = int16_t(v); + eval16 = int16_t(ev); + } } -/// TranspositionTable::resize() sets the size of the transposition table, -/// measured in megabytes. Transposition table consists of a power of 2 number -/// of clusters and each cluster consists of ClusterSize number of TTEntry. +uint8_t TTEntry::relative_age(const uint8_t generation8) const { + // Due to our packed storage format for generation and its cyclic + // nature we add GENERATION_CYCLE (256 is the modulus, plus what + // is needed to keep the unrelated lowest n bits from affecting + // the result) to calculate the entry age correctly even after + // generation8 overflows into the next cycle. -void TranspositionTable::resize(size_t mbSize) { - - Threads.main()->wait_for_search_finished(); - - aligned_large_pages_free(table); - - clusterCount = mbSize * 1024 * 1024 / sizeof(Cluster); - - table = static_cast(aligned_large_pages_alloc(clusterCount * sizeof(Cluster))); - if (!table) - { - std::cerr << "Failed to allocate " << mbSize - << "MB for transposition table." << std::endl; - exit(EXIT_FAILURE); - } - - clear(); + return (TranspositionTable::GENERATION_CYCLE + generation8 - genBound8) + & TranspositionTable::GENERATION_MASK; } -/// TranspositionTable::clear() initializes the entire transposition table to zero, -// in a multi-threaded way. +// Sets the size of the transposition table, +// measured in megabytes. Transposition table consists +// of clusters and each cluster consists of ClusterSize number of TTEntry. +void TranspositionTable::resize(size_t mbSize, int threadCount) { + aligned_large_pages_free(table); -void TranspositionTable::clear() { + clusterCount = mbSize * 1024 * 1024 / sizeof(Cluster); - std::vector threads; + table = static_cast(aligned_large_pages_alloc(clusterCount * sizeof(Cluster))); + if (!table) + { + std::cerr << "Failed to allocate " << mbSize << "MB for transposition table." << std::endl; + exit(EXIT_FAILURE); + } - for (size_t idx = 0; idx < Options["Threads"]; ++idx) - { - threads.emplace_back([this, idx]() { - - // Thread binding gives faster search on systems with a first-touch policy - if (Options["Threads"] > 8) - WinProcGroup::bindThisThread(idx); - - // Each thread will zero its part of the hash table - const size_t stride = size_t(clusterCount / Options["Threads"]), - start = size_t(stride * idx), - len = idx != Options["Threads"] - 1 ? - stride : clusterCount - start; - - std::memset(&table[start], 0, len * sizeof(Cluster)); - }); - } - - for (std::thread& th : threads) - th.join(); + clear(threadCount); } -/// TranspositionTable::probe() looks up the current position in the transposition -/// table. It returns true and a pointer to the TTEntry if the position is found. -/// Otherwise, it returns false and a pointer to an empty or least valuable TTEntry -/// to be replaced later. The replace value of an entry is calculated as its depth -/// minus 8 times its relative age. TTEntry t1 is considered more valuable than -/// TTEntry t2 if its replace value is greater than that of t2. +// Initializes the entire transposition table to zero, +// in a multi-threaded way. +void TranspositionTable::clear(size_t threadCount) { + std::vector threads; + for (size_t idx = 0; idx < size_t(threadCount); ++idx) + { + threads.emplace_back([this, idx, threadCount]() { + // Thread binding gives faster search on systems with a first-touch policy + if (threadCount > 8) + WinProcGroup::bind_this_thread(idx); + + // Each thread will zero its part of the hash table + const size_t stride = size_t(clusterCount / threadCount), start = size_t(stride * idx), + len = idx != size_t(threadCount) - 1 ? stride : clusterCount - start; + + std::memset(&table[start], 0, len * sizeof(Cluster)); + }); + } + + for (std::thread& th : threads) + th.join(); +} + + +// Looks up the current position in the transposition +// table. It returns true and a pointer to the TTEntry if the position is found. +// Otherwise, it returns false and a pointer to an empty or least valuable TTEntry +// to be replaced later. The replace value of an entry is calculated as its depth +// minus 8 times its relative age. TTEntry t1 is considered more valuable than +// TTEntry t2 if its replace value is greater than that of t2. TTEntry* TranspositionTable::probe(const Key key, bool& found) const { - TTEntry* const tte = first_entry(key); - const uint16_t key16 = (uint16_t)key; // Use the low 16 bits as key inside the cluster + TTEntry* const tte = first_entry(key); + const uint16_t key16 = uint16_t(key); // Use the low 16 bits as key inside the cluster - for (int i = 0; i < ClusterSize; ++i) - if (tte[i].key16 == key16 || !tte[i].depth8) - { - tte[i].genBound8 = uint8_t(generation8 | (tte[i].genBound8 & (GENERATION_DELTA - 1))); // Refresh + for (int i = 0; i < ClusterSize; ++i) + if (tte[i].key16 == key16 || !tte[i].depth8) + return found = bool(tte[i].depth8), &tte[i]; - return found = (bool)tte[i].depth8, &tte[i]; - } + // Find an entry to be replaced according to the replacement strategy + TTEntry* replace = tte; + for (int i = 1; i < ClusterSize; ++i) + if (replace->depth8 - replace->relative_age(generation8) * 2 + > tte[i].depth8 - tte[i].relative_age(generation8) * 2) + replace = &tte[i]; - // Find an entry to be replaced according to the replacement strategy - TTEntry* replace = tte; - for (int i = 1; i < ClusterSize; ++i) - // Due to our packed storage format for generation and its cyclic - // nature we add GENERATION_CYCLE (256 is the modulus, plus what - // is needed to keep the unrelated lowest n bits from affecting - // the result) to calculate the entry age correctly even after - // generation8 overflows into the next cycle. - if ( replace->depth8 - ((GENERATION_CYCLE + generation8 - replace->genBound8) & GENERATION_MASK) - > tte[i].depth8 - ((GENERATION_CYCLE + generation8 - tte[i].genBound8) & GENERATION_MASK)) - replace = &tte[i]; - - return found = false, replace; + return found = false, replace; } -/// TranspositionTable::hashfull() returns an approximation of the hashtable -/// occupation during a search. The hash is x permill full, as per UCI protocol. - +// Returns an approximation of the hashtable +// occupation during a search. The hash is x permill full, as per UCI protocol. +// Only counts entries which match the current generation. int TranspositionTable::hashfull() const { - int cnt = 0; - for (int i = 0; i < 1000; ++i) - for (int j = 0; j < ClusterSize; ++j) - cnt += table[i].entry[j].depth8 && (table[i].entry[j].genBound8 & GENERATION_MASK) == generation8; + int cnt = 0; + for (int i = 0; i < 1000; ++i) + for (int j = 0; j < ClusterSize; ++j) + cnt += table[i].entry[j].depth8 + && (table[i].entry[j].genBound8 & GENERATION_MASK) == generation8; - return cnt / ClusterSize; + return cnt / ClusterSize; } -} // namespace Stockfish +} // namespace Stockfish diff --git a/src/tt.h b/src/tt.h index 03fe3e14..554a81a5 100644 --- a/src/tt.h +++ b/src/tt.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,89 +19,103 @@ #ifndef TT_H_INCLUDED #define TT_H_INCLUDED +#include +#include + #include "misc.h" #include "types.h" namespace Stockfish { -/// TTEntry struct is the 10 bytes transposition table entry, defined as below: -/// -/// key 16 bit -/// depth 8 bit -/// generation 5 bit -/// pv node 1 bit -/// bound type 2 bit -/// move 16 bit -/// value 16 bit -/// eval value 16 bit - +// TTEntry struct is the 10 bytes transposition table entry, defined as below: +// +// key 16 bit +// depth 8 bit +// generation 5 bit +// pv node 1 bit +// bound type 2 bit +// move 16 bit +// value 16 bit +// eval value 16 bit struct TTEntry { - Move move() const { return (Move )move16; } - Value value() const { return (Value)value16; } - Value eval() const { return (Value)eval16; } - Depth depth() const { return (Depth)depth8 + DEPTH_OFFSET; } - bool is_pv() const { return (bool)(genBound8 & 0x4); } - Bound bound() const { return (Bound)(genBound8 & 0x3); } - void save(Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev); + Move move() const { return Move(move16); } + Value value() const { return Value(value16); } + Value eval() const { return Value(eval16); } + Depth depth() const { return Depth(depth8 + DEPTH_OFFSET); } + bool is_pv() const { return bool(genBound8 & 0x4); } + Bound bound() const { return Bound(genBound8 & 0x3); } + void save(Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev, uint8_t generation8); + // The returned age is a multiple of TranspositionTable::GENERATION_DELTA + uint8_t relative_age(const uint8_t generation8) const; -private: - friend class TranspositionTable; + private: + friend class TranspositionTable; - uint16_t key16; - uint8_t depth8; - uint8_t genBound8; - uint16_t move16; - int16_t value16; - int16_t eval16; + uint16_t key16; + uint8_t depth8; + uint8_t genBound8; + Move move16; + int16_t value16; + int16_t eval16; }; -/// A TranspositionTable is an array of Cluster, of size clusterCount. Each -/// cluster consists of ClusterSize number of TTEntry. Each non-empty TTEntry -/// contains information on exactly one position. The size of a Cluster should -/// divide the size of a cache line for best performance, as the cacheline is -/// prefetched when possible. - +// A TranspositionTable is an array of Cluster, of size clusterCount. Each +// cluster consists of ClusterSize number of TTEntry. Each non-empty TTEntry +// contains information on exactly one position. The size of a Cluster should +// divide the size of a cache line for best performance, as the cacheline is +// prefetched when possible. class TranspositionTable { - static constexpr int ClusterSize = 3; + static constexpr int ClusterSize = 3; - struct Cluster { - TTEntry entry[ClusterSize]; - char padding[2]; // Pad to 32 bytes - }; + struct Cluster { + TTEntry entry[ClusterSize]; + char padding[2]; // Pad to 32 bytes + }; - static_assert(sizeof(Cluster) == 32, "Unexpected Cluster size"); + static_assert(sizeof(Cluster) == 32, "Unexpected Cluster size"); - // Constants used to refresh the hash table periodically - static constexpr unsigned GENERATION_BITS = 3; // nb of bits reserved for other things - static constexpr int GENERATION_DELTA = (1 << GENERATION_BITS); // increment for generation field - static constexpr int GENERATION_CYCLE = 255 + (1 << GENERATION_BITS); // cycle length - static constexpr int GENERATION_MASK = (0xFF << GENERATION_BITS) & 0xFF; // mask to pull out generation number + // Constants used to refresh the hash table periodically -public: - ~TranspositionTable() { aligned_large_pages_free(table); } - void new_search() { generation8 += GENERATION_DELTA; } // Lower bits are used for other things - TTEntry* probe(const Key key, bool& found) const; - int hashfull() const; - void resize(size_t mbSize); - void clear(); + // We have 8 bits available where the lowest 3 bits are + // reserved for other things. + static constexpr unsigned GENERATION_BITS = 3; + // increment for generation field + static constexpr int GENERATION_DELTA = (1 << GENERATION_BITS); + // cycle length + static constexpr int GENERATION_CYCLE = 255 + GENERATION_DELTA; + // mask to pull out generation number + static constexpr int GENERATION_MASK = (0xFF << GENERATION_BITS) & 0xFF; - TTEntry* first_entry(const Key key) const { - return &table[mul_hi64(key, clusterCount)].entry[0]; - } + public: + ~TranspositionTable() { aligned_large_pages_free(table); } -private: - friend struct TTEntry; + void new_search() { + // increment by delta to keep lower bits as is + generation8 += GENERATION_DELTA; + } - size_t clusterCount; - Cluster* table; - uint8_t generation8; // Size must be not bigger than TTEntry::genBound8 + TTEntry* probe(const Key key, bool& found) const; + int hashfull() const; + void resize(size_t mbSize, int threadCount); + void clear(size_t threadCount); + + TTEntry* first_entry(const Key key) const { + return &table[mul_hi64(key, clusterCount)].entry[0]; + } + + uint8_t generation() const { return generation8; } + + private: + friend struct TTEntry; + + size_t clusterCount; + Cluster* table = nullptr; + uint8_t generation8 = 0; // Size must be not bigger than TTEntry::genBound8 }; -extern TranspositionTable TT; +} // namespace Stockfish -} // namespace Stockfish - -#endif // #ifndef TT_H_INCLUDED +#endif // #ifndef TT_H_INCLUDED diff --git a/src/tune.cpp b/src/tune.cpp index a885845f..3e5ebe5e 100644 --- a/src/tune.cpp +++ b/src/tune.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -16,100 +16,94 @@ along with this program. If not, see . */ +#include "tune.h" + #include #include +#include #include +#include -#include "types.h" -#include "misc.h" -#include "uci.h" +#include "ucioption.h" using std::string; namespace Stockfish { -bool Tune::update_on_last; -const UCI::Option* LastOption = nullptr; -static std::map TuneResults; +bool Tune::update_on_last; +const Option* LastOption = nullptr; +OptionsMap* Tune::options; + + +namespace { +std::map TuneResults; + +void on_tune(const Option& o) { + + if (!Tune::update_on_last || LastOption == &o) + Tune::read_options(); +} + + +void make_option(OptionsMap* options, const string& n, int v, const SetRange& r) { + + // Do not generate option when there is nothing to tune (ie. min = max) + if (r(v).first == r(v).second) + return; + + if (TuneResults.count(n)) + v = TuneResults[n]; + + (*options)[n] << Option(v, r(v).first, r(v).second, on_tune); + LastOption = &((*options)[n]); + + // Print formatted parameters, ready to be copy-pasted in Fishtest + std::cout << n << "," << v << "," << r(v).first << "," << r(v).second << "," + << (r(v).second - r(v).first) / 20.0 << "," + << "0.0020" << std::endl; +} +} string Tune::next(string& names, bool pop) { - string name; + string name; - do { - string token = names.substr(0, names.find(',')); + do + { + string token = names.substr(0, names.find(',')); - if (pop) - names.erase(0, token.size() + 1); + if (pop) + names.erase(0, token.size() + 1); - std::stringstream ws(token); - name += (ws >> token, token); // Remove trailing whitespace + std::stringstream ws(token); + name += (ws >> token, token); // Remove trailing whitespace - } while ( std::count(name.begin(), name.end(), '(') - - std::count(name.begin(), name.end(), ')')); + } while (std::count(name.begin(), name.end(), '(') - std::count(name.begin(), name.end(), ')')); - return name; + return name; } -static void on_tune(const UCI::Option& o) { - if (!Tune::update_on_last || LastOption == &o) - Tune::read_options(); +template<> +void Tune::Entry::init_option() { + make_option(options, name, value, range); } -static void make_option(const string& n, int v, const SetRange& r) { - - // Do not generate option when there is nothing to tune (ie. min = max) - if (r(v).first == r(v).second) - return; - - if (TuneResults.count(n)) - v = TuneResults[n]; - - Options[n] << UCI::Option(v, r(v).first, r(v).second, on_tune); - LastOption = &Options[n]; - - // Print formatted parameters, ready to be copy-pasted in Fishtest - std::cout << n << "," - << v << "," - << r(v).first << "," << r(v).second << "," - << (r(v).second - r(v).first) / 20.0 << "," - << "0.0020" - << std::endl; -} - -template<> void Tune::Entry::init_option() { make_option(name, value, range); } - -template<> void Tune::Entry::read_option() { - if (Options.count(name)) - value = int(Options[name]); -} - -template<> void Tune::Entry::init_option() { make_option(name, value, range); } - -template<> void Tune::Entry::read_option() { - if (Options.count(name)) - value = Value(int(Options[name])); -} - -template<> void Tune::Entry::init_option() { - make_option("m" + name, mg_value(value), range); - make_option("e" + name, eg_value(value), range); -} - -template<> void Tune::Entry::read_option() { - if (Options.count("m" + name)) - value = make_score(int(Options["m" + name]), eg_value(value)); - - if (Options.count("e" + name)) - value = make_score(mg_value(value), int(Options["e" + name])); +template<> +void Tune::Entry::read_option() { + if (options->count(name)) + value = int((*options)[name]); } // Instead of a variable here we have a PostUpdate function: just call it -template<> void Tune::Entry::init_option() {} -template<> void Tune::Entry::read_option() { value(); } +template<> +void Tune::Entry::init_option() {} +template<> +void Tune::Entry::read_option() { + value(); +} -} // namespace Stockfish +} // namespace Stockfish // Init options with tuning session results instead of default values. Useful to @@ -121,13 +115,10 @@ template<> void Tune::Entry::read_option() { value(); } // // Then paste the output below, as the function body -#include namespace Stockfish { -void Tune::read_results() { - - /* ...insert your values here... */ +void Tune::read_results() { /* ...insert your values here... */ } -} // namespace Stockfish +} // namespace Stockfish diff --git a/src/tune.h b/src/tune.h index 75ab484a..079614db 100644 --- a/src/tune.h +++ b/src/tune.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,145 +19,163 @@ #ifndef TUNE_H_INCLUDED #define TUNE_H_INCLUDED +#include #include #include -#include +#include // IWYU pragma: keep +#include #include namespace Stockfish { -typedef std::pair Range; // Option's min-max values -typedef Range (RangeFun) (int); +class OptionsMap; + +using Range = std::pair; // Option's min-max values +using RangeFun = Range(int); // Default Range function, to calculate Option's min-max values -inline Range default_range(int v) { - return v > 0 ? Range(0, 2 * v) : Range(2 * v, 0); -} +inline Range default_range(int v) { return v > 0 ? Range(0, 2 * v) : Range(2 * v, 0); } struct SetRange { - explicit SetRange(RangeFun f) : fun(f) {} - SetRange(int min, int max) : fun(nullptr), range(min, max) {} - Range operator()(int v) const { return fun ? fun(v) : range; } + explicit SetRange(RangeFun f) : + fun(f) {} + SetRange(int min, int max) : + fun(nullptr), + range(min, max) {} + Range operator()(int v) const { return fun ? fun(v) : range; } - RangeFun* fun; - Range range; + RangeFun* fun; + Range range; }; #define SetDefaultRange SetRange(default_range) -/// Tune class implements the 'magic' code that makes the setup of a fishtest -/// tuning session as easy as it can be. Mainly you have just to remove const -/// qualifiers from the variables you want to tune and flag them for tuning, so -/// if you have: -/// -/// const Score myScore = S(10, 15); -/// const Value myValue[][2] = { { V(100), V(20) }, { V(7), V(78) } }; -/// -/// If you have a my_post_update() function to run after values have been updated, -/// and a my_range() function to set custom Option's min-max values, then you just -/// remove the 'const' qualifiers and write somewhere below in the file: -/// -/// TUNE(SetRange(my_range), myScore, myValue, my_post_update); -/// -/// You can also set the range directly, and restore the default at the end -/// -/// TUNE(SetRange(-100, 100), myScore, SetDefaultRange); -/// -/// In case update function is slow and you have many parameters, you can add: -/// -/// UPDATE_ON_LAST(); -/// -/// And the values update, including post update function call, will be done only -/// once, after the engine receives the last UCI option, that is the one defined -/// and created as the last one, so the GUI should send the options in the same -/// order in which have been defined. +// Tune class implements the 'magic' code that makes the setup of a fishtest tuning +// session as easy as it can be. Mainly you have just to remove const qualifiers +// from the variables you want to tune and flag them for tuning, so if you have: +// +// const Value myValue[][2] = { { V(100), V(20) }, { V(7), V(78) } }; +// +// If you have a my_post_update() function to run after values have been updated, +// and a my_range() function to set custom Option's min-max values, then you just +// remove the 'const' qualifiers and write somewhere below in the file: +// +// TUNE(SetRange(my_range), myValue, my_post_update); +// +// You can also set the range directly, and restore the default at the end +// +// TUNE(SetRange(-100, 100), myValue, SetDefaultRange); +// +// In case update function is slow and you have many parameters, you can add: +// +// UPDATE_ON_LAST(); +// +// And the values update, including post update function call, will be done only +// once, after the engine receives the last UCI option, that is the one defined +// and created as the last one, so the GUI should send the options in the same +// order in which have been defined. class Tune { - typedef void (PostUpdate) (); // Post-update function + using PostUpdate = void(); // Post-update function - Tune() { read_results(); } - Tune(const Tune&) = delete; - void operator=(const Tune&) = delete; - void read_results(); + Tune() { read_results(); } + Tune(const Tune&) = delete; + void operator=(const Tune&) = delete; + void read_results(); - static Tune& instance() { static Tune t; return t; } // Singleton + static Tune& instance() { + static Tune t; + return t; + } // Singleton - // Use polymorphism to accommodate Entry of different types in the same vector - struct EntryBase { - virtual ~EntryBase() = default; - virtual void init_option() = 0; - virtual void read_option() = 0; - }; + // Use polymorphism to accommodate Entry of different types in the same vector + struct EntryBase { + virtual ~EntryBase() = default; + virtual void init_option() = 0; + virtual void read_option() = 0; + }; - template - struct Entry : public EntryBase { + template + struct Entry: public EntryBase { - static_assert(!std::is_const::value, "Parameter cannot be const!"); + static_assert(!std::is_const_v, "Parameter cannot be const!"); - static_assert( std::is_same::value - || std::is_same::value - || std::is_same::value - || std::is_same::value, "Parameter type not supported!"); + static_assert(std::is_same_v || std::is_same_v, + "Parameter type not supported!"); - Entry(const std::string& n, T& v, const SetRange& r) : name(n), value(v), range(r) {} - void operator=(const Entry&) = delete; // Because 'value' is a reference - void init_option() override; - void read_option() override; + Entry(const std::string& n, T& v, const SetRange& r) : + name(n), + value(v), + range(r) {} + void operator=(const Entry&) = delete; // Because 'value' is a reference + void init_option() override; + void read_option() override; - std::string name; - T& value; - SetRange range; - }; + std::string name; + T& value; + SetRange range; + }; - // Our facility to fill the container, each Entry corresponds to a parameter - // to tune. We use variadic templates to deal with an unspecified number of - // entries, each one of a possible different type. - static std::string next(std::string& names, bool pop = true); + // Our facility to fill the container, each Entry corresponds to a parameter + // to tune. We use variadic templates to deal with an unspecified number of + // entries, each one of a possible different type. + static std::string next(std::string& names, bool pop = true); - int add(const SetRange&, std::string&&) { return 0; } + int add(const SetRange&, std::string&&) { return 0; } - template - int add(const SetRange& range, std::string&& names, T& value, Args&&... args) { - list.push_back(std::unique_ptr(new Entry(next(names), value, range))); - return add(range, std::move(names), args...); - } + template + int add(const SetRange& range, std::string&& names, T& value, Args&&... args) { + list.push_back(std::unique_ptr(new Entry(next(names), value, range))); + return add(range, std::move(names), args...); + } - // Template specialization for arrays: recursively handle multi-dimensional arrays - template - int add(const SetRange& range, std::string&& names, T (&value)[N], Args&&... args) { - for (size_t i = 0; i < N; i++) - add(range, next(names, i == N - 1) + "[" + std::to_string(i) + "]", value[i]); - return add(range, std::move(names), args...); - } + // Template specialization for arrays: recursively handle multi-dimensional arrays + template + int add(const SetRange& range, std::string&& names, T (&value)[N], Args&&... args) { + for (size_t i = 0; i < N; i++) + add(range, next(names, i == N - 1) + "[" + std::to_string(i) + "]", value[i]); + return add(range, std::move(names), args...); + } - // Template specialization for SetRange - template - int add(const SetRange&, std::string&& names, SetRange& value, Args&&... args) { - return add(value, (next(names), std::move(names)), args...); - } + // Template specialization for SetRange + template + int add(const SetRange&, std::string&& names, SetRange& value, Args&&... args) { + return add(value, (next(names), std::move(names)), args...); + } - std::vector> list; + std::vector> list; -public: - template - static int add(const std::string& names, Args&&... args) { - return instance().add(SetDefaultRange, names.substr(1, names.size() - 2), args...); // Remove trailing parenthesis - } - static void init() { for (auto& e : instance().list) e->init_option(); read_options(); } // Deferred, due to UCI::Options access - static void read_options() { for (auto& e : instance().list) e->read_option(); } - static bool update_on_last; + public: + template + static int add(const std::string& names, Args&&... args) { + return instance().add(SetDefaultRange, names.substr(1, names.size() - 2), + args...); // Remove trailing parenthesis + } + static void init(OptionsMap& o) { + options = &o; + for (auto& e : instance().list) + e->init_option(); + read_options(); + } // Deferred, due to UCIEngine::Options access + static void read_options() { + for (auto& e : instance().list) + e->read_option(); + } + + static bool update_on_last; + static OptionsMap* options; }; -// Some macro magic :-) we define a dummy int variable that compiler initializes calling Tune::add() +// Some macro magic :-) we define a dummy int variable that the compiler initializes calling Tune::add() #define STRINGIFY(x) #x -#define UNIQUE2(x, y) x ## y -#define UNIQUE(x, y) UNIQUE2(x, y) // Two indirection levels to expand __LINE__ +#define UNIQUE2(x, y) x##y +#define UNIQUE(x, y) UNIQUE2(x, y) // Two indirection levels to expand __LINE__ #define TUNE(...) int UNIQUE(p, __LINE__) = Tune::add(STRINGIFY((__VA_ARGS__)), __VA_ARGS__) #define UPDATE_ON_LAST() bool UNIQUE(p, __LINE__) = Tune::update_on_last = true -} // namespace Stockfish +} // namespace Stockfish -#endif // #ifndef TUNE_H_INCLUDED +#endif // #ifndef TUNE_H_INCLUDED diff --git a/src/types.h b/src/types.h index ddb2bb57..b49c789b 100644 --- a/src/types.h +++ b/src/types.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -17,472 +17,403 @@ */ #ifndef TYPES_H_INCLUDED -#define TYPES_H_INCLUDED + #define TYPES_H_INCLUDED -/// When compiling with provided Makefile (e.g. for Linux and OSX), configuration -/// is done automatically. To get started type 'make help'. -/// -/// When Makefile is not used (e.g. with Microsoft Visual Studio) some switches -/// need to be set manually: -/// -/// -DNDEBUG | Disable debugging mode. Always use this for release. -/// -/// -DNO_PREFETCH | Disable use of prefetch asm-instruction. You may need this to -/// | run on some very old machines. -/// -/// -DUSE_POPCNT | Add runtime support for use of popcnt asm-instruction. Works -/// | only in 64-bit mode and requires hardware with popcnt support. -/// -/// -DUSE_PEXT | Add runtime support for use of pext asm-instruction. Works -/// | only in 64-bit mode and requires hardware with pext support. +// When compiling with provided Makefile (e.g. for Linux and OSX), configuration +// is done automatically. To get started type 'make help'. +// +// When Makefile is not used (e.g. with Microsoft Visual Studio) some switches +// need to be set manually: +// +// -DNDEBUG | Disable debugging mode. Always use this for release. +// +// -DNO_PREFETCH | Disable use of prefetch asm-instruction. You may need this to +// | run on some very old machines. +// +// -DUSE_POPCNT | Add runtime support for use of popcnt asm-instruction. Works +// | only in 64-bit mode and requires hardware with popcnt support. +// +// -DUSE_PEXT | Add runtime support for use of pext asm-instruction. Works +// | only in 64-bit mode and requires hardware with pext support. -#include -#include -#include -#include -#include + #include + #include -#if defined(_MSC_VER) -// Disable some silly and noisy warning from MSVC compiler -#pragma warning(disable: 4127) // Conditional expression is constant -#pragma warning(disable: 4146) // Unary minus operator applied to unsigned type -#pragma warning(disable: 4800) // Forcing value to bool 'true' or 'false' -#endif + #if defined(_MSC_VER) + // Disable some silly and noisy warnings from MSVC compiler + #pragma warning(disable: 4127) // Conditional expression is constant + #pragma warning(disable: 4146) // Unary minus operator applied to unsigned type + #pragma warning(disable: 4800) // Forcing value to bool 'true' or 'false' + #endif -/// Predefined macros hell: -/// -/// __GNUC__ Compiler is gcc, Clang or Intel on Linux -/// __INTEL_COMPILER Compiler is Intel -/// _MSC_VER Compiler is MSVC or Intel on Windows -/// _WIN32 Building on Windows (any) -/// _WIN64 Building on Windows 64 bit +// Predefined macros hell: +// +// __GNUC__ Compiler is GCC, Clang or ICX +// __clang__ Compiler is Clang or ICX +// __INTEL_LLVM_COMPILER Compiler is ICX +// _MSC_VER Compiler is MSVC +// _WIN32 Building on Windows (any) +// _WIN64 Building on Windows 64 bit -#if defined(__GNUC__ ) && (__GNUC__ < 9 || (__GNUC__ == 9 && __GNUC_MINOR__ <= 2)) && defined(_WIN32) && !defined(__clang__) -#define ALIGNAS_ON_STACK_VARIABLES_BROKEN -#endif + #if defined(__GNUC__) && (__GNUC__ < 9 || (__GNUC__ == 9 && __GNUC_MINOR__ <= 2)) \ + && defined(_WIN32) && !defined(__clang__) + #define ALIGNAS_ON_STACK_VARIABLES_BROKEN + #endif -#define ASSERT_ALIGNED(ptr, alignment) assert(reinterpret_cast(ptr) % alignment == 0) + #define ASSERT_ALIGNED(ptr, alignment) assert(reinterpret_cast(ptr) % alignment == 0) -#if defined(_WIN64) && defined(_MSC_VER) // No Makefile used -# include // Microsoft header for _BitScanForward64() -# define IS_64BIT -#endif + #if defined(_WIN64) && defined(_MSC_VER) // No Makefile used + #include // Microsoft header for _BitScanForward64() + #define IS_64BIT + #endif -#if defined(USE_POPCNT) && (defined(__INTEL_COMPILER) || defined(_MSC_VER)) -# include // Intel and Microsoft header for _mm_popcnt_u64() -#endif + #if defined(USE_POPCNT) && defined(_MSC_VER) + #include // Microsoft header for _mm_popcnt_u64() + #endif -#if !defined(NO_PREFETCH) && (defined(__INTEL_COMPILER) || defined(_MSC_VER)) -# include // Intel and Microsoft header for _mm_prefetch() -#endif + #if !defined(NO_PREFETCH) && defined(_MSC_VER) + #include // Microsoft header for _mm_prefetch() + #endif -#if defined(USE_PEXT) -# include // Header for _pext_u64() intrinsic -# define pext(b, m) _pext_u64(b, m) -#else -# define pext(b, m) 0 -#endif + #if defined(USE_PEXT) + #include // Header for _pext_u64() intrinsic + #define pext(b, m) _pext_u64(b, m) + #else + #define pext(b, m) 0 + #endif namespace Stockfish { -#ifdef USE_POPCNT + #ifdef USE_POPCNT constexpr bool HasPopCnt = true; -#else + #else constexpr bool HasPopCnt = false; -#endif + #endif -#ifdef USE_PEXT + #ifdef USE_PEXT constexpr bool HasPext = true; -#else + #else constexpr bool HasPext = false; -#endif + #endif -#ifdef IS_64BIT + #ifdef IS_64BIT constexpr bool Is64Bit = true; -#else + #else constexpr bool Is64Bit = false; -#endif + #endif -typedef uint64_t Key; -typedef uint64_t Bitboard; +using Key = uint64_t; +using Bitboard = uint64_t; constexpr int MAX_MOVES = 256; constexpr int MAX_PLY = 246; -/// A move needs 16 bits to be stored -/// -/// bit 0- 5: destination square (from 0 to 63) -/// bit 6-11: origin square (from 0 to 63) -/// bit 12-13: promotion piece type - 2 (from KNIGHT-2 to QUEEN-2) -/// bit 14-15: special move flag: promotion (1), en passant (2), castling (3) -/// NOTE: en passant bit is set only when a pawn can be captured -/// -/// Special cases are MOVE_NONE and MOVE_NULL. We can sneak these in because in -/// any normal move destination square is always different from origin square -/// while MOVE_NONE and MOVE_NULL have the same origin and destination square. - -enum Move : int { - MOVE_NONE, - MOVE_NULL = 65 -}; - -enum MoveType { - NORMAL, - PROMOTION = 1 << 14, - EN_PASSANT = 2 << 14, - CASTLING = 3 << 14 -}; - enum Color { - WHITE, BLACK, COLOR_NB = 2 + WHITE, + BLACK, + COLOR_NB = 2 }; enum CastlingRights { - NO_CASTLING, - WHITE_OO, - WHITE_OOO = WHITE_OO << 1, - BLACK_OO = WHITE_OO << 2, - BLACK_OOO = WHITE_OO << 3, + NO_CASTLING, + WHITE_OO, + WHITE_OOO = WHITE_OO << 1, + BLACK_OO = WHITE_OO << 2, + BLACK_OOO = WHITE_OO << 3, - KING_SIDE = WHITE_OO | BLACK_OO, - QUEEN_SIDE = WHITE_OOO | BLACK_OOO, - WHITE_CASTLING = WHITE_OO | WHITE_OOO, - BLACK_CASTLING = BLACK_OO | BLACK_OOO, - ANY_CASTLING = WHITE_CASTLING | BLACK_CASTLING, + KING_SIDE = WHITE_OO | BLACK_OO, + QUEEN_SIDE = WHITE_OOO | BLACK_OOO, + WHITE_CASTLING = WHITE_OO | WHITE_OOO, + BLACK_CASTLING = BLACK_OO | BLACK_OOO, + ANY_CASTLING = WHITE_CASTLING | BLACK_CASTLING, - CASTLING_RIGHT_NB = 16 -}; - -enum Phase { - PHASE_ENDGAME, - PHASE_MIDGAME = 128, - MG = 0, EG = 1, PHASE_NB = 2 -}; - -enum ScaleFactor { - SCALE_FACTOR_DRAW = 0, - SCALE_FACTOR_NORMAL = 64, - SCALE_FACTOR_MAX = 128, - SCALE_FACTOR_NONE = 255 + CASTLING_RIGHT_NB = 16 }; enum Bound { - BOUND_NONE, - BOUND_UPPER, - BOUND_LOWER, - BOUND_EXACT = BOUND_UPPER | BOUND_LOWER + BOUND_NONE, + BOUND_UPPER, + BOUND_LOWER, + BOUND_EXACT = BOUND_UPPER | BOUND_LOWER }; -enum Value : int { - VALUE_ZERO = 0, - VALUE_DRAW = 0, - VALUE_KNOWN_WIN = 10000, - VALUE_MATE = 32000, - VALUE_INFINITE = 32001, - VALUE_NONE = 32002, +// Value is used as an alias for int16_t, this is done to differentiate between +// a search value and any other integer value. The values used in search are always +// supposed to be in the range (-VALUE_NONE, VALUE_NONE] and should not exceed this range. +using Value = int; - VALUE_TB_WIN_IN_MAX_PLY = VALUE_MATE - 2 * MAX_PLY, - VALUE_TB_LOSS_IN_MAX_PLY = -VALUE_TB_WIN_IN_MAX_PLY, - VALUE_MATE_IN_MAX_PLY = VALUE_MATE - MAX_PLY, - VALUE_MATED_IN_MAX_PLY = -VALUE_MATE_IN_MAX_PLY, +constexpr Value VALUE_ZERO = 0; +constexpr Value VALUE_DRAW = 0; +constexpr Value VALUE_NONE = 32002; +constexpr Value VALUE_INFINITE = 32001; - PawnValueMg = 126, PawnValueEg = 208, - KnightValueMg = 781, KnightValueEg = 854, - BishopValueMg = 825, BishopValueEg = 915, - RookValueMg = 1276, RookValueEg = 1380, - QueenValueMg = 2538, QueenValueEg = 2682, +constexpr Value VALUE_MATE = 32000; +constexpr Value VALUE_MATE_IN_MAX_PLY = VALUE_MATE - MAX_PLY; +constexpr Value VALUE_MATED_IN_MAX_PLY = -VALUE_MATE_IN_MAX_PLY; - MidgameLimit = 15258, EndgameLimit = 3915 -}; +constexpr Value VALUE_TB = VALUE_MATE_IN_MAX_PLY - 1; +constexpr Value VALUE_TB_WIN_IN_MAX_PLY = VALUE_TB - MAX_PLY; +constexpr Value VALUE_TB_LOSS_IN_MAX_PLY = -VALUE_TB_WIN_IN_MAX_PLY; +// In the code, we make the assumption that these values +// are such that non_pawn_material() can be used to uniquely +// identify the material on the board. +constexpr Value PawnValue = 208; +constexpr Value KnightValue = 781; +constexpr Value BishopValue = 825; +constexpr Value RookValue = 1276; +constexpr Value QueenValue = 2538; + + +// clang-format off enum PieceType { - NO_PIECE_TYPE, PAWN, KNIGHT, BISHOP, ROOK, QUEEN, KING, - ALL_PIECES = 0, - PIECE_TYPE_NB = 8 + NO_PIECE_TYPE, PAWN, KNIGHT, BISHOP, ROOK, QUEEN, KING, + ALL_PIECES = 0, + PIECE_TYPE_NB = 8 }; enum Piece { - NO_PIECE, - W_PAWN = PAWN, W_KNIGHT, W_BISHOP, W_ROOK, W_QUEEN, W_KING, - B_PAWN = PAWN + 8, B_KNIGHT, B_BISHOP, B_ROOK, B_QUEEN, B_KING, - PIECE_NB = 16 + NO_PIECE, + W_PAWN = PAWN, W_KNIGHT, W_BISHOP, W_ROOK, W_QUEEN, W_KING, + B_PAWN = PAWN + 8, B_KNIGHT, B_BISHOP, B_ROOK, B_QUEEN, B_KING, + PIECE_NB = 16 }; +// clang-format on -constexpr Value PieceValue[PHASE_NB][PIECE_NB] = { - { VALUE_ZERO, PawnValueMg, KnightValueMg, BishopValueMg, RookValueMg, QueenValueMg, VALUE_ZERO, VALUE_ZERO, - VALUE_ZERO, PawnValueMg, KnightValueMg, BishopValueMg, RookValueMg, QueenValueMg, VALUE_ZERO, VALUE_ZERO }, - { VALUE_ZERO, PawnValueEg, KnightValueEg, BishopValueEg, RookValueEg, QueenValueEg, VALUE_ZERO, VALUE_ZERO, - VALUE_ZERO, PawnValueEg, KnightValueEg, BishopValueEg, RookValueEg, QueenValueEg, VALUE_ZERO, VALUE_ZERO } -}; +constexpr Value PieceValue[PIECE_NB] = { + VALUE_ZERO, PawnValue, KnightValue, BishopValue, RookValue, QueenValue, VALUE_ZERO, VALUE_ZERO, + VALUE_ZERO, PawnValue, KnightValue, BishopValue, RookValue, QueenValue, VALUE_ZERO, VALUE_ZERO}; -typedef int Depth; +using Depth = int; enum : int { - DEPTH_QS_CHECKS = 0, - DEPTH_QS_NO_CHECKS = -1, - DEPTH_QS_RECAPTURES = -5, + DEPTH_QS_CHECKS = 0, + DEPTH_QS_NO_CHECKS = -1, - DEPTH_NONE = -6, + DEPTH_NONE = -6, - DEPTH_OFFSET = -7 // value used only for TT entry occupancy check + DEPTH_OFFSET = -7 // value used only for TT entry occupancy check }; +// clang-format off enum Square : int { - SQ_A1, SQ_B1, SQ_C1, SQ_D1, SQ_E1, SQ_F1, SQ_G1, SQ_H1, - SQ_A2, SQ_B2, SQ_C2, SQ_D2, SQ_E2, SQ_F2, SQ_G2, SQ_H2, - SQ_A3, SQ_B3, SQ_C3, SQ_D3, SQ_E3, SQ_F3, SQ_G3, SQ_H3, - SQ_A4, SQ_B4, SQ_C4, SQ_D4, SQ_E4, SQ_F4, SQ_G4, SQ_H4, - SQ_A5, SQ_B5, SQ_C5, SQ_D5, SQ_E5, SQ_F5, SQ_G5, SQ_H5, - SQ_A6, SQ_B6, SQ_C6, SQ_D6, SQ_E6, SQ_F6, SQ_G6, SQ_H6, - SQ_A7, SQ_B7, SQ_C7, SQ_D7, SQ_E7, SQ_F7, SQ_G7, SQ_H7, - SQ_A8, SQ_B8, SQ_C8, SQ_D8, SQ_E8, SQ_F8, SQ_G8, SQ_H8, - SQ_NONE, + SQ_A1, SQ_B1, SQ_C1, SQ_D1, SQ_E1, SQ_F1, SQ_G1, SQ_H1, + SQ_A2, SQ_B2, SQ_C2, SQ_D2, SQ_E2, SQ_F2, SQ_G2, SQ_H2, + SQ_A3, SQ_B3, SQ_C3, SQ_D3, SQ_E3, SQ_F3, SQ_G3, SQ_H3, + SQ_A4, SQ_B4, SQ_C4, SQ_D4, SQ_E4, SQ_F4, SQ_G4, SQ_H4, + SQ_A5, SQ_B5, SQ_C5, SQ_D5, SQ_E5, SQ_F5, SQ_G5, SQ_H5, + SQ_A6, SQ_B6, SQ_C6, SQ_D6, SQ_E6, SQ_F6, SQ_G6, SQ_H6, + SQ_A7, SQ_B7, SQ_C7, SQ_D7, SQ_E7, SQ_F7, SQ_G7, SQ_H7, + SQ_A8, SQ_B8, SQ_C8, SQ_D8, SQ_E8, SQ_F8, SQ_G8, SQ_H8, + SQ_NONE, - SQUARE_ZERO = 0, - SQUARE_NB = 64 + SQUARE_ZERO = 0, + SQUARE_NB = 64 }; +// clang-format on enum Direction : int { - NORTH = 8, - EAST = 1, - SOUTH = -NORTH, - WEST = -EAST, + NORTH = 8, + EAST = 1, + SOUTH = -NORTH, + WEST = -EAST, - NORTH_EAST = NORTH + EAST, - SOUTH_EAST = SOUTH + EAST, - SOUTH_WEST = SOUTH + WEST, - NORTH_WEST = NORTH + WEST + NORTH_EAST = NORTH + EAST, + SOUTH_EAST = SOUTH + EAST, + SOUTH_WEST = SOUTH + WEST, + NORTH_WEST = NORTH + WEST }; enum File : int { - FILE_A, FILE_B, FILE_C, FILE_D, FILE_E, FILE_F, FILE_G, FILE_H, FILE_NB + FILE_A, + FILE_B, + FILE_C, + FILE_D, + FILE_E, + FILE_F, + FILE_G, + FILE_H, + FILE_NB }; enum Rank : int { - RANK_1, RANK_2, RANK_3, RANK_4, RANK_5, RANK_6, RANK_7, RANK_8, RANK_NB + RANK_1, + RANK_2, + RANK_3, + RANK_4, + RANK_5, + RANK_6, + RANK_7, + RANK_8, + RANK_NB }; // Keep track of what a move changes on the board (used by NNUE) struct DirtyPiece { - // Number of changed pieces - int dirty_num; + // Number of changed pieces + int dirty_num; - // Max 3 pieces can change in one move. A promotion with capture moves - // both the pawn and the captured piece to SQ_NONE and the piece promoted - // to from SQ_NONE to the capture square. - Piece piece[3]; + // Max 3 pieces can change in one move. A promotion with capture moves + // both the pawn and the captured piece to SQ_NONE and the piece promoted + // to from SQ_NONE to the capture square. + Piece piece[3]; - // From and to squares, which may be SQ_NONE - Square from[3]; - Square to[3]; + // From and to squares, which may be SQ_NONE + Square from[3]; + Square to[3]; }; -/// Score enum stores a middlegame and an endgame value in a single integer (enum). -/// The least significant 16 bits are used to store the middlegame value and the -/// upper 16 bits are used to store the endgame value. We have to take care to -/// avoid left-shifting a signed int to avoid undefined behavior. -enum Score : int { SCORE_ZERO }; + #define ENABLE_INCR_OPERATORS_ON(T) \ + inline T& operator++(T& d) { return d = T(int(d) + 1); } \ + inline T& operator--(T& d) { return d = T(int(d) - 1); } -constexpr Score make_score(int mg, int eg) { - return Score((int)((unsigned int)eg << 16) + mg); -} - -/// Extracting the signed lower and upper 16 bits is not so trivial because -/// according to the standard a simple cast to short is implementation defined -/// and so is a right shift of a signed integer. -inline Value eg_value(Score s) { - union { uint16_t u; int16_t s; } eg = { uint16_t(unsigned(s + 0x8000) >> 16) }; - return Value(eg.s); -} - -inline Value mg_value(Score s) { - union { uint16_t u; int16_t s; } mg = { uint16_t(unsigned(s)) }; - return Value(mg.s); -} - -#define ENABLE_BASE_OPERATORS_ON(T) \ -constexpr T operator+(T d1, int d2) { return T(int(d1) + d2); } \ -constexpr T operator-(T d1, int d2) { return T(int(d1) - d2); } \ -constexpr T operator-(T d) { return T(-int(d)); } \ -inline T& operator+=(T& d1, int d2) { return d1 = d1 + d2; } \ -inline T& operator-=(T& d1, int d2) { return d1 = d1 - d2; } - -#define ENABLE_INCR_OPERATORS_ON(T) \ -inline T& operator++(T& d) { return d = T(int(d) + 1); } \ -inline T& operator--(T& d) { return d = T(int(d) - 1); } - -#define ENABLE_FULL_OPERATORS_ON(T) \ -ENABLE_BASE_OPERATORS_ON(T) \ -constexpr T operator*(int i, T d) { return T(i * int(d)); } \ -constexpr T operator*(T d, int i) { return T(int(d) * i); } \ -constexpr T operator/(T d, int i) { return T(int(d) / i); } \ -constexpr int operator/(T d1, T d2) { return int(d1) / int(d2); } \ -inline T& operator*=(T& d, int i) { return d = T(int(d) * i); } \ -inline T& operator/=(T& d, int i) { return d = T(int(d) / i); } - -ENABLE_FULL_OPERATORS_ON(Value) -ENABLE_FULL_OPERATORS_ON(Direction) - -ENABLE_INCR_OPERATORS_ON(Piece) ENABLE_INCR_OPERATORS_ON(PieceType) ENABLE_INCR_OPERATORS_ON(Square) ENABLE_INCR_OPERATORS_ON(File) ENABLE_INCR_OPERATORS_ON(Rank) -ENABLE_BASE_OPERATORS_ON(Score) + #undef ENABLE_INCR_OPERATORS_ON -#undef ENABLE_FULL_OPERATORS_ON -#undef ENABLE_INCR_OPERATORS_ON -#undef ENABLE_BASE_OPERATORS_ON +constexpr Direction operator+(Direction d1, Direction d2) { return Direction(int(d1) + int(d2)); } +constexpr Direction operator*(int i, Direction d) { return Direction(i * int(d)); } -/// Additional operators to add a Direction to a Square +// Additional operators to add a Direction to a Square constexpr Square operator+(Square s, Direction d) { return Square(int(s) + int(d)); } constexpr Square operator-(Square s, Direction d) { return Square(int(s) - int(d)); } -inline Square& operator+=(Square& s, Direction d) { return s = s + d; } -inline Square& operator-=(Square& s, Direction d) { return s = s - d; } +inline Square& operator+=(Square& s, Direction d) { return s = s + d; } +inline Square& operator-=(Square& s, Direction d) { return s = s - d; } -/// Only declared but not defined. We don't want to multiply two scores due to -/// a very high risk of overflow. So user should explicitly convert to integer. -Score operator*(Score, Score) = delete; +// Toggle color +constexpr Color operator~(Color c) { return Color(c ^ BLACK); } -/// Division of a Score must be handled separately for each term -inline Score operator/(Score s, int i) { - return make_score(mg_value(s) / i, eg_value(s) / i); -} +// Swap A1 <-> A8 +constexpr Square flip_rank(Square s) { return Square(s ^ SQ_A8); } -/// Multiplication of a Score by an integer. We check for overflow in debug mode. -inline Score operator*(Score s, int i) { +// Swap A1 <-> H1 +constexpr Square flip_file(Square s) { return Square(s ^ SQ_H1); } - Score result = Score(int(s) * i); - - assert(eg_value(result) == (i * eg_value(s))); - assert(mg_value(result) == (i * mg_value(s))); - assert((i == 0) || (result / i) == s); - - return result; -} - -/// Multiplication of a Score by a boolean -inline Score operator*(Score s, bool b) { - return b ? s : SCORE_ZERO; -} - -constexpr Color operator~(Color c) { - return Color(c ^ BLACK); // Toggle color -} - -constexpr Square flip_rank(Square s) { // Swap A1 <-> A8 - return Square(s ^ SQ_A8); -} - -constexpr Square flip_file(Square s) { // Swap A1 <-> H1 - return Square(s ^ SQ_H1); -} - -constexpr Piece operator~(Piece pc) { - return Piece(pc ^ 8); // Swap color of piece B_KNIGHT <-> W_KNIGHT -} +// Swap color of piece B_KNIGHT <-> W_KNIGHT +constexpr Piece operator~(Piece pc) { return Piece(pc ^ 8); } constexpr CastlingRights operator&(Color c, CastlingRights cr) { - return CastlingRights((c == WHITE ? WHITE_CASTLING : BLACK_CASTLING) & cr); + return CastlingRights((c == WHITE ? WHITE_CASTLING : BLACK_CASTLING) & cr); } +constexpr Value mate_in(int ply) { return VALUE_MATE - ply; } constexpr Value mate_in(int ply) { Value zero = Value(0); return zero - (VALUE_MATE - ply); } +constexpr Value mated_in(int ply) { return -VALUE_MATE + ply; } constexpr Value mated_in(int ply) { Value zero = Value(0); return zero - (-VALUE_MATE + ply); } -constexpr Square make_square(File f, Rank r) { - return Square((r << 3) + f); -} +constexpr Square make_square(File f, Rank r) { return Square((r << 3) + f); } -constexpr Piece make_piece(Color c, PieceType pt) { - return Piece((c << 3) + pt); -} +constexpr Piece make_piece(Color c, PieceType pt) { return Piece((c << 3) + pt); } -constexpr PieceType type_of(Piece pc) { - return PieceType(pc & 7); -} +constexpr PieceType type_of(Piece pc) { return PieceType(pc & 7); } inline Color color_of(Piece pc) { - assert(pc != NO_PIECE); - return Color(pc >> 3); + assert(pc != NO_PIECE); + return Color(pc >> 3); } -constexpr bool is_ok(Square s) { - return s >= SQ_A1 && s <= SQ_H8; -} +constexpr bool is_ok(Square s) { return s >= SQ_A1 && s <= SQ_H8; } -constexpr File file_of(Square s) { - return File(s & 7); -} +constexpr File file_of(Square s) { return File(s & 7); } -constexpr Rank rank_of(Square s) { - return Rank(s >> 3); -} +constexpr Rank rank_of(Square s) { return Rank(s >> 3); } -constexpr Square relative_square(Color c, Square s) { - return Square(s ^ (c * 56)); -} +constexpr Square relative_square(Color c, Square s) { return Square(s ^ (c * 56)); } -constexpr Rank relative_rank(Color c, Rank r) { - return Rank(r ^ (c * 7)); -} +constexpr Rank relative_rank(Color c, Rank r) { return Rank(r ^ (c * 7)); } -constexpr Rank relative_rank(Color c, Square s) { - return relative_rank(c, rank_of(s)); -} +constexpr Rank relative_rank(Color c, Square s) { return relative_rank(c, rank_of(s)); } -constexpr Direction pawn_push(Color c) { - return c == WHITE ? NORTH : SOUTH; -} +constexpr Direction pawn_push(Color c) { return c == WHITE ? NORTH : SOUTH; } -constexpr Square from_sq(Move m) { - return Square((m >> 6) & 0x3F); -} -constexpr Square to_sq(Move m) { - return Square(m & 0x3F); -} - -constexpr int from_to(Move m) { - return m & 0xFFF; -} - -constexpr MoveType type_of(Move m) { - return MoveType(m & (3 << 14)); -} - -constexpr PieceType promotion_type(Move m) { - return PieceType(((m >> 12) & 3) + KNIGHT); -} - -constexpr Move make_move(Square from, Square to) { - return Move((from << 6) + to); -} - -template -constexpr Move make(Square from, Square to, PieceType pt = KNIGHT) { - return Move(T + ((pt - KNIGHT) << 12) + (from << 6) + to); -} - -constexpr bool is_ok(Move m) { - return from_sq(m) != to_sq(m); // Catch MOVE_NULL and MOVE_NONE -} - -/// Based on a congruential pseudo random number generator +// Based on a congruential pseudo-random number generator constexpr Key make_key(uint64_t seed) { - return seed * 6364136223846793005ULL + 1442695040888963407ULL; + return seed * 6364136223846793005ULL + 1442695040888963407ULL; } -} // namespace Stockfish -#endif // #ifndef TYPES_H_INCLUDED +enum MoveType { + NORMAL, + PROMOTION = 1 << 14, + EN_PASSANT = 2 << 14, + CASTLING = 3 << 14 +}; -#include "tune.h" // Global visibility to tuning setup +// A move needs 16 bits to be stored +// +// bit 0- 5: destination square (from 0 to 63) +// bit 6-11: origin square (from 0 to 63) +// bit 12-13: promotion piece type - 2 (from KNIGHT-2 to QUEEN-2) +// bit 14-15: special move flag: promotion (1), en passant (2), castling (3) +// NOTE: en passant bit is set only when a pawn can be captured +// +// Special cases are Move::none() and Move::null(). We can sneak these in because in +// any normal move destination square is always different from origin square +// while Move::none() and Move::null() have the same origin and destination square. +class Move { + public: + Move() = default; + constexpr explicit Move(std::uint16_t d) : + data(d) {} + + constexpr Move(Square from, Square to) : + data((from << 6) + to) {} + + template + static constexpr Move make(Square from, Square to, PieceType pt = KNIGHT) { + return Move(T + ((pt - KNIGHT) << 12) + (from << 6) + to); + } + + constexpr Square from_sq() const { + assert(is_ok()); + return Square((data >> 6) & 0x3F); + } + + constexpr Square to_sq() const { + assert(is_ok()); + return Square(data & 0x3F); + } + + constexpr int from_to() const { return data & 0xFFF; } + + constexpr MoveType type_of() const { return MoveType(data & (3 << 14)); } + + constexpr PieceType promotion_type() const { return PieceType(((data >> 12) & 3) + KNIGHT); } + + constexpr bool is_ok() const { return none().data != data && null().data != data; } + + static constexpr Move null() { return Move(65); } + static constexpr Move none() { return Move(0); } + + constexpr bool operator==(const Move& m) const { return data == m.data; } + constexpr bool operator!=(const Move& m) const { return data != m.data; } + + constexpr explicit operator bool() const { return data != 0; } + + constexpr std::uint16_t raw() const { return data; } + + struct MoveHash { + std::size_t operator()(const Move& m) const { return make_key(m.data); } + }; + + protected: + std::uint16_t data; +}; + +} // namespace Stockfish + +#endif // #ifndef TYPES_H_INCLUDED + +#include "tune.h" // Global visibility to tuning setup diff --git a/src/uci.cpp b/src/uci.cpp index 741241b3..cb686a02 100644 --- a/src/uci.cpp +++ b/src/uci.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -16,50 +16,305 @@ along with this program. If not, see . */ -#include -#include -#include -#include -#include +#include "uci.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "benchmark.h" +#include "engine.h" #include "evaluate.h" #include "movegen.h" #include "position.h" +#include "score.h" #include "search.h" -#include "thread.h" -#include "timeman.h" -#include "tt.h" -#include "uci.h" #include "syzygy/tbprobe.h" - -using namespace std; +#include "types.h" +#include "ucioption.h" namespace Stockfish { -extern vector setup_bench(const Position&, istream&); +constexpr auto StartFEN = "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1"; +constexpr int MaxHashMB = Is64Bit ? 33554432 : 2048; -namespace { +template +struct overload: Ts... { + using Ts::operator()...; +}; - // FEN string of the initial position, normal chess - const char* StartFEN = "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1"; +template +overload(Ts...) -> overload; + +UCIEngine::UCIEngine(int argc, char** argv) : + engine(argv[0]), + cli(argc, argv) { + + auto& options = engine.get_options(); + + options["Debug Log File"] << Option("", [](const Option& o) { start_logger(o); }); + + options["Threads"] << Option(1, 1, 1024, [this](const Option&) { engine.resize_threads(); }); + + options["Hash"] << Option(16, 1, MaxHashMB, [this](const Option& o) { engine.set_tt_size(o); }); + + options["Clear Hash"] << Option([this](const Option&) { engine.search_clear(); }); + options["Ponder"] << Option(false); + options["MultiPV"] << Option(1, 1, MAX_MOVES); + options["Skill Level"] << Option(20, 0, 20); + options["Move Overhead"] << Option(10, 0, 5000); + options["nodestime"] << Option(0, 0, 10000); + options["UCI_Chess960"] << Option(false); + options["UCI_LimitStrength"] << Option(false); + options["UCI_Elo"] << Option(1320, 1320, 3190); + options["UCI_ShowWDL"] << Option(false); + options["SyzygyPath"] << Option("", [](const Option& o) { Tablebases::init(o); }); + options["SyzygyProbeDepth"] << Option(1, 1, 100); + options["Syzygy50MoveRule"] << Option(true); + options["SyzygyProbeLimit"] << Option(7, 0, 7); + options["EvalFile"] << Option(EvalFileDefaultNameBig, + [this](const Option& o) { engine.load_big_network(o); }); + options["EvalFileSmall"] << Option(EvalFileDefaultNameSmall, + [this](const Option& o) { engine.load_small_network(o); }); - // position() is called when engine receives the "position" UCI command. - // The function sets up the position described in the given FEN string ("fen") - // or the starting position ("startpos") and then makes the moves given in the - // following move list ("moves"). + engine.set_on_iter([](const auto& i) { on_iter(i); }); + engine.set_on_update_no_moves([](const auto& i) { on_update_no_moves(i); }); + engine.set_on_update_full([&](const auto& i) { on_update_full(i, options["UCI_ShowWDL"]); }); + engine.set_on_bestmove([](const auto& bm, const auto& p) { on_bestmove(bm, p); }); - void position(Position& pos, istringstream& is, StateListPtr& states) { + engine.load_networks(); + engine.resize_threads(); + engine.search_clear(); // After threads are up +} - Move m; - string token, fen; +void UCIEngine::loop() { + + std::string token, cmd; + + for (int i = 1; i < cli.argc; ++i) + cmd += std::string(cli.argv[i]) + " "; + + do + { + if (cli.argc == 1 + && !getline(std::cin, cmd)) // Wait for an input or an end-of-file (EOF) indication + cmd = "quit"; + + std::istringstream is(cmd); + + token.clear(); // Avoid a stale if getline() returns nothing or a blank line + is >> std::skipws >> token; + + if (token == "quit" || token == "stop") + engine.stop(); + + // The GUI sends 'ponderhit' to tell that the user has played the expected move. + // So, 'ponderhit' is sent if pondering was done on the same move that the user + // has played. The search should continue, but should also switch from pondering + // to the normal search. + else if (token == "ponderhit") + engine.set_ponderhit(false); + + else if (token == "uci") + sync_cout << "id name " << engine_info(true) << "\n" + << engine.get_options() << "\nuciok" << sync_endl; + + else if (token == "setoption") + setoption(is); + else if (token == "go") + go(is); + else if (token == "position") + position(is); + else if (token == "ucinewgame") + engine.search_clear(); + else if (token == "isready") + sync_cout << "readyok" << sync_endl; + + // Add custom non-UCI commands, mainly for debugging purposes. + // These commands must not be used during a search! + else if (token == "flip") + engine.flip(); + else if (token == "bench") + bench(is); + else if (token == "d") + sync_cout << engine.visualize() << sync_endl; + else if (token == "eval") + engine.trace_eval(); + else if (token == "compiler") + sync_cout << compiler_info() << sync_endl; + else if (token == "export_net") + { + std::pair, std::string> files[2]; + + if (is >> std::skipws >> files[0].second) + files[0].first = files[0].second; + + if (is >> std::skipws >> files[1].second) + files[1].first = files[1].second; + + engine.save_network(files); + } + else if (token == "--help" || token == "help" || token == "--license" || token == "license") + sync_cout + << "\nStockfish is a powerful chess engine for playing and analyzing." + "\nIt is released as free software licensed under the GNU GPLv3 License." + "\nStockfish is normally used with a graphical user interface (GUI) and implements" + "\nthe Universal Chess Interface (UCI) protocol to communicate with a GUI, an API, etc." + "\nFor any further information, visit https://github.com/official-stockfish/Stockfish#readme" + "\nor read the corresponding README.md and Copying.txt files distributed along with this program.\n" + << sync_endl; + else if (!token.empty() && token[0] != '#') + sync_cout << "Unknown command: '" << cmd << "'. Type help for more information." + << sync_endl; + + } while (token != "quit" && cli.argc == 1); // The command-line arguments are one-shot +} + +Search::LimitsType UCIEngine::parse_limits(std::istream& is) { + Search::LimitsType limits; + std::string token; + + limits.startTime = now(); // The search starts as early as possible + + while (is >> token) + if (token == "searchmoves") // Needs to be the last command on the line + while (is >> token) + limits.searchmoves.push_back(to_lower(token)); + + else if (token == "wtime") + is >> limits.time[WHITE]; + else if (token == "btime") + is >> limits.time[BLACK]; + else if (token == "winc") + is >> limits.inc[WHITE]; + else if (token == "binc") + is >> limits.inc[BLACK]; + else if (token == "movestogo") + is >> limits.movestogo; + else if (token == "depth") + is >> limits.depth; + else if (token == "nodes") + is >> limits.nodes; + else if (token == "movetime") + is >> limits.movetime; + else if (token == "mate") + is >> limits.mate; + else if (token == "perft") + is >> limits.perft; + else if (token == "infinite") + limits.infinite = 1; + else if (token == "ponder") + limits.ponderMode = true; + + return limits; +} + +void UCIEngine::go(std::istringstream& is) { + + Search::LimitsType limits = parse_limits(is); + + if (limits.perft) + perft(limits); + else + engine.go(limits); +} + +void UCIEngine::bench(std::istream& args) { + std::string token; + uint64_t num, nodes = 0, cnt = 1; + uint64_t nodesSearched = 0; + const auto& options = engine.get_options(); + + engine.set_on_update_full([&](const auto& i) { + nodesSearched = i.nodes; + on_update_full(i, options["UCI_ShowWDL"]); + }); + + std::vector list = Benchmark::setup_bench(engine.fen(), args); + + num = count_if(list.begin(), list.end(), + [](const std::string& s) { return s.find("go ") == 0 || s.find("eval") == 0; }); + + TimePoint elapsed = now(); + + for (const auto& cmd : list) + { + std::istringstream is(cmd); + is >> std::skipws >> token; + + if (token == "go" || token == "eval") + { + std::cerr << "\nPosition: " << cnt++ << '/' << num << " (" << engine.fen() << ")" + << std::endl; + if (token == "go") + { + Search::LimitsType limits = parse_limits(is); + + if (limits.perft) + nodes = perft(limits); + else + { + engine.go(limits); + engine.wait_for_search_finished(); + } + + nodes += nodesSearched; + nodesSearched = 0; + } + else + engine.trace_eval(); + } + else if (token == "setoption") + setoption(is); + else if (token == "position") + position(is); + else if (token == "ucinewgame") + { + engine.search_clear(); // search_clear may take a while + elapsed = now(); + } + } + + elapsed = now() - elapsed + 1; // Ensure positivity to avoid a 'divide by zero' + + dbg_print(); + + std::cerr << "\n===========================" + << "\nTotal time (ms) : " << elapsed << "\nNodes searched : " << nodes + << "\nNodes/second : " << 1000 * nodes / elapsed << std::endl; + + // reset callback, to not capture a dangling reference to nodesSearched + engine.set_on_update_full([&](const auto& i) { on_update_full(i, options["UCI_ShowWDL"]); }); +} + + +void UCIEngine::setoption(std::istringstream& is) { + engine.wait_for_search_finished(); + engine.get_options().setoption(is); +} + +std::uint64_t UCIEngine::perft(const Search::LimitsType& limits) { + auto nodes = engine.perft(engine.fen(), limits.perft, engine.get_options()["UCI_Chess960"]); + sync_cout << "\nNodes searched: " << nodes << "\n" << sync_endl; + return nodes; +} + +void UCIEngine::position(std::istringstream& is) { + std::string token, fen; is >> token; if (token == "startpos") { fen = StartFEN; - is >> token; // Consume "moves" token if any + is >> token; // Consume the "moves" token, if any } else if (token == "fen") while (is >> token && token != "moves") @@ -67,317 +322,181 @@ namespace { else return; - states = StateListPtr(new std::deque(1)); // Drop old and create a new one - pos.set(fen, Options["UCI_Chess960"], &states->back(), Threads.main()); - - // Parse move list (if any) - while (is >> token && (m = UCI::to_move(pos, token)) != MOVE_NONE) - { - states->emplace_back(); - pos.do_move(m, states->back()); - } - } - - // trace_eval() prints the evaluation for the current position, consistent with the UCI - // options set so far. - - void trace_eval(Position& pos) { - - StateListPtr states(new std::deque(1)); - Position p; - p.set(pos.fen(), Options["UCI_Chess960"], &states->back(), Threads.main()); - - Eval::NNUE::verify(); - - sync_cout << "\n" << Eval::trace(p) << sync_endl; - } - - - // setoption() is called when engine receives the "setoption" UCI command. The - // function updates the UCI option ("name") to the given value ("value"). - - void setoption(istringstream& is) { - - string token, name, value; - - is >> token; // Consume "name" token - - // Read option name (can contain spaces) - while (is >> token && token != "value") - name += (name.empty() ? "" : " ") + token; - - // Read option value (can contain spaces) - while (is >> token) - value += (value.empty() ? "" : " ") + token; - - if (Options.count(name)) - Options[name] = value; - else - sync_cout << "No such option: " << name << sync_endl; - } - - - // go() is called when engine receives the "go" UCI command. The function sets - // the thinking time and other parameters from the input string, then starts - // the search. - - void go(Position& pos, istringstream& is, StateListPtr& states) { - - Search::LimitsType limits; - string token; - bool ponderMode = false; - - limits.startTime = now(); // As early as possible! + std::vector moves; while (is >> token) - if (token == "searchmoves") // Needs to be the last command on the line - while (is >> token) - limits.searchmoves.push_back(UCI::to_move(pos, token)); - - else if (token == "wtime") is >> limits.time[WHITE]; - else if (token == "btime") is >> limits.time[BLACK]; - else if (token == "winc") is >> limits.inc[WHITE]; - else if (token == "binc") is >> limits.inc[BLACK]; - else if (token == "movestogo") is >> limits.movestogo; - else if (token == "depth") is >> limits.depth; - else if (token == "nodes") is >> limits.nodes; - else if (token == "movetime") is >> limits.movetime; - else if (token == "mate") is >> limits.mate; - else if (token == "perft") is >> limits.perft; - else if (token == "infinite") limits.infinite = 1; - else if (token == "ponder") ponderMode = true; - - Threads.start_thinking(pos, states, limits, ponderMode); - } - - - // bench() is called when engine receives the "bench" command. Firstly - // a list of UCI commands is setup according to bench parameters, then - // it is run one by one printing a summary at the end. - - void bench(Position& pos, istream& args, StateListPtr& states) { - - string token; - uint64_t num, nodes = 0, cnt = 1; - - vector list = setup_bench(pos, args); - num = count_if(list.begin(), list.end(), [](string s) { return s.find("go ") == 0 || s.find("eval") == 0; }); - - TimePoint elapsed = now(); - - for (const auto& cmd : list) { - istringstream is(cmd); - is >> skipws >> token; - - if (token == "go" || token == "eval") - { - cerr << "\nPosition: " << cnt++ << '/' << num << " (" << pos.fen() << ")" << endl; - if (token == "go") - { - go(pos, is, states); - Threads.main()->wait_for_search_finished(); - nodes += Threads.nodes_searched(); - } - else - trace_eval(pos); - } - else if (token == "setoption") setoption(is); - else if (token == "position") position(pos, is, states); - else if (token == "ucinewgame") { Search::clear(); elapsed = now(); } // Search::clear() may take some while + moves.push_back(token); } - elapsed = now() - elapsed + 1; // Ensure positivity to avoid a 'divide by zero' + engine.set_position(fen, moves); +} - dbg_print(); // Just before exiting +namespace { - cerr << "\n===========================" - << "\nTotal time (ms) : " << elapsed - << "\nNodes searched : " << nodes - << "\nNodes/second : " << 1000 * nodes / elapsed << endl; - } +struct WinRateParams { + double a; + double b; +}; - // The win rate model returns the probability (per mille) of winning given an eval - // and a game-ply. The model fits rather accurately the LTC fishtest statistics. - int win_rate_model(Value v, int ply) { +WinRateParams win_rate_params(const Position& pos) { - // The model captures only up to 240 plies, so limit input (and rescale) - double m = std::min(240, ply) / 64.0; + int material = pos.count() + 3 * pos.count() + 3 * pos.count() + + 5 * pos.count() + 9 * pos.count(); - // Coefficients of a 3rd order polynomial fit based on fishtest data - // for two parameters needed to transform eval to the argument of a - // logistic function. - double as[] = {-3.68389304, 30.07065921, -60.52878723, 149.53378557}; - double bs[] = {-2.0181857, 15.85685038, -29.83452023, 47.59078827}; - double a = (((as[0] * m + as[1]) * m + as[2]) * m) + as[3]; - double b = (((bs[0] * m + bs[1]) * m + bs[2]) * m) + bs[3]; + // The fitted model only uses data for material counts in [10, 78], and is anchored at count 58. + double m = std::clamp(material, 10, 78) / 58.0; - // Transform eval to centipawns with limited range - double x = std::clamp(double(100 * v) / PawnValueEg, -2000.0, 2000.0); + // Return a = p_a(material) and b = p_b(material), see github.com/official-stockfish/WDL_model + constexpr double as[] = {-150.77043883, 394.96159472, -321.73403766, 406.15850091}; + constexpr double bs[] = {62.33245393, -91.02264855, 45.88486850, 51.63461272}; - // Return win rate in per mille (rounded to nearest) - return int(0.5 + 1000 / (1 + std::exp((a - x) / b))); - } + double a = (((as[0] * m + as[1]) * m + as[2]) * m) + as[3]; + double b = (((bs[0] * m + bs[1]) * m + bs[2]) * m) + bs[3]; -} // namespace + return {a, b}; +} +// The win rate model is 1 / (1 + exp((a - eval) / b)), where a = p_a(material) and b = p_b(material). +// It fits the LTC fishtest statistics rather accurately. +int win_rate_model(Value v, const Position& pos) { -/// UCI::loop() waits for a command from stdin, parses it and calls the appropriate -/// function. Also intercepts EOF from stdin to ensure gracefully exiting if the -/// GUI dies unexpectedly. When called with some command line arguments, e.g. to -/// run 'bench', once the command is executed the function returns immediately. -/// In addition to the UCI ones, also some additional debug commands are supported. + auto [a, b] = win_rate_params(pos); -void UCI::loop(int argc, char* argv[]) { + // Return the win rate in per mille units, rounded to the nearest integer. + return int(0.5 + 1000 / (1 + std::exp((a - double(v)) / b))); +} +} - Position pos; - string token, cmd; - StateListPtr states(new std::deque(1)); +std::string UCIEngine::format_score(const Score& s) { + constexpr int TB_CP = 20000; + const auto format = + overload{[](Score::Mate mate) -> std::string { + auto m = (mate.plies > 0 ? (mate.plies + 1) : mate.plies) / 2; + return std::string("mate ") + std::to_string(m); + }, + [](Score::Tablebase tb) -> std::string { + return std::string("cp ") + + std::to_string((tb.win ? TB_CP - tb.plies : -TB_CP - tb.plies)); + }, + [](Score::InternalUnits units) -> std::string { + return std::string("cp ") + std::to_string(units.value); + }}; - pos.set(StartFEN, false, &states->back(), Threads.main()); + return s.visit(format); +} - for (int i = 1; i < argc; ++i) - cmd += std::string(argv[i]) + " "; +// Turns a Value to an integer centipawn number, +// without treatment of mate and similar special scores. +int UCIEngine::to_cp(Value v, const Position& pos) { - do { - if (argc == 1 && !getline(cin, cmd)) // Block here waiting for input or EOF - cmd = "quit"; + // In general, the score can be defined via the the WDL as + // (log(1/L - 1) - log(1/W - 1)) / ((log(1/L - 1) + log(1/W - 1)) + // Based on our win_rate_model, this simply yields v / a. - istringstream is(cmd); + auto [a, b] = win_rate_params(pos); - token.clear(); // Avoid a stale if getline() returns empty or blank line - is >> skipws >> token; + return std::round(100 * int(v) / a); +} - if ( token == "quit" - || token == "stop") - Threads.stop = true; +std::string UCIEngine::wdl(Value v, const Position& pos) { + std::stringstream ss; - // The GUI sends 'ponderhit' to tell us the user has played the expected move. - // So 'ponderhit' will be sent if we were told to ponder on the same move the - // user has played. We should continue searching but switch from pondering to - // normal search. - else if (token == "ponderhit") - Threads.main()->ponder = false; // Switch to normal search + int wdl_w = win_rate_model(v, pos); + int wdl_l = win_rate_model(-v, pos); + int wdl_d = 1000 - wdl_w - wdl_l; + ss << wdl_w << " " << wdl_d << " " << wdl_l; - else if (token == "uci") - sync_cout << "id name " << engine_info(true) - << "\n" << Options - << "\nuciok" << sync_endl; + return ss.str(); +} - else if (token == "setoption") setoption(is); - else if (token == "go") go(pos, is, states); - else if (token == "position") position(pos, is, states); - else if (token == "ucinewgame") Search::clear(); - else if (token == "isready") sync_cout << "readyok" << sync_endl; +std::string UCIEngine::square(Square s) { + return std::string{char('a' + file_of(s)), char('1' + rank_of(s))}; +} - // Additional custom non-UCI commands, mainly for debugging. - // Do not use these commands during a search! - else if (token == "flip") pos.flip(); - else if (token == "bench") bench(pos, is, states); - else if (token == "d") sync_cout << pos << sync_endl; - else if (token == "eval") trace_eval(pos); - else if (token == "compiler") sync_cout << compiler_info() << sync_endl; - else if (token == "export_net") - { - std::optional filename; - std::string f; - if (is >> skipws >> f) - filename = f; - Eval::NNUE::save_eval(filename); - } - else if (!token.empty() && token[0] != '#') - sync_cout << "Unknown command: " << cmd << sync_endl; +std::string UCIEngine::move(Move m, bool chess960) { + if (m == Move::none()) + return "(none)"; - } while (token != "quit" && argc == 1); // Command line args are one-shot + if (m == Move::null()) + return "0000"; + + Square from = m.from_sq(); + Square to = m.to_sq(); + + if (m.type_of() == CASTLING && !chess960) + to = make_square(to > from ? FILE_G : FILE_C, rank_of(from)); + + std::string move = square(from) + square(to); + + if (m.type_of() == PROMOTION) + move += " pnbrqk"[m.promotion_type()]; + + return move; } -/// UCI::value() converts a Value to a string suitable for use with the UCI -/// protocol specification: -/// -/// cp The score from the engine's point of view in centipawns. -/// mate Mate in y moves, not plies. If the engine is getting mated -/// use negative values for y. +std::string UCIEngine::to_lower(std::string str) { + std::transform(str.begin(), str.end(), str.begin(), [](auto c) { return std::tolower(c); }); -string UCI::value(Value v) { - - assert(-VALUE_INFINITE < v && v < VALUE_INFINITE); - - stringstream ss; - - if (abs(v) < VALUE_MATE_IN_MAX_PLY) - ss << "cp " << v * 100 / PawnValueEg; - else - ss << "mate " << (v > 0 ? VALUE_MATE - v + 1 : -VALUE_MATE - v) / 2; - - return ss.str(); + return str; } +Move UCIEngine::to_move(const Position& pos, std::string str) { + str = to_lower(str); -/// UCI::wdl() report WDL statistics given an evaluation and a game ply, based on -/// data gathered for fishtest LTC games. + for (const auto& m : MoveList(pos)) + if (str == move(m, pos.is_chess960())) + return m; -string UCI::wdl(Value v, int ply) { - - stringstream ss; - - int wdl_w = win_rate_model( v, ply); - int wdl_l = win_rate_model(-v, ply); - int wdl_d = 1000 - wdl_w - wdl_l; - ss << " wdl " << wdl_w << " " << wdl_d << " " << wdl_l; - - return ss.str(); + return Move::none(); } - -/// UCI::square() converts a Square to a string in algebraic notation (g1, a7, etc.) - -std::string UCI::square(Square s) { - return std::string{ char('a' + file_of(s)), char('1' + rank_of(s)) }; +void UCIEngine::on_update_no_moves(const Engine::InfoShort& info) { + sync_cout << "info depth " << info.depth << " score " << format_score(info.score) << sync_endl; } +void UCIEngine::on_update_full(const Engine::InfoFull& info, bool showWDL) { + std::stringstream ss; -/// UCI::move() converts a Move to a string in coordinate notation (g1f3, a7a8q). -/// The only special case is castling, where we print in the e1g1 notation in -/// normal chess mode, and in e1h1 notation in chess960 mode. Internally all -/// castling moves are always encoded as 'king captures rook'. + ss << "info"; + ss << " depth " << info.depth // + << " seldepth " << info.selDepth // + << " multipv " << info.multiPV // + << " score " << format_score(info.score); // -string UCI::move(Move m, bool chess960) { + if (showWDL) + ss << " wdl " << info.wdl; - Square from = from_sq(m); - Square to = to_sq(m); + if (!info.bound.empty()) + ss << " " << info.bound; - if (m == MOVE_NONE) - return "(none)"; + ss << " nodes " << info.nodes // + << " nps " << info.nps // + << " hashfull " << info.hashfull // + << " tbhits " << info.tbHits // + << " time " << info.timeMs // + << " pv " << info.pv; // - if (m == MOVE_NULL) - return "0000"; - - if (type_of(m) == CASTLING && !chess960) - to = make_square(to > from ? FILE_G : FILE_C, rank_of(from)); - - string move = UCI::square(from) + UCI::square(to); - - if (type_of(m) == PROMOTION) - move += " pnbrqk"[promotion_type(m)]; - - return move; + sync_cout << ss.str() << sync_endl; } +void UCIEngine::on_iter(const Engine::InfoIter& info) { + std::stringstream ss; -/// UCI::to_move() converts a string representing a move in coordinate notation -/// (g1f3, a7a8q) to the corresponding legal Move, if any. + ss << "info"; + ss << " depth " << info.depth // + << " currmove " << info.currmove // + << " currmovenumber " << info.currmovenumber; // -Move UCI::to_move(const Position& pos, string& str) { - - if (str.length() == 5) // Junior could send promotion piece in uppercase - str[4] = char(tolower(str[4])); - - for (const auto& m : MoveList(pos)) - if (str == UCI::move(m, pos.is_chess960())) - return m; - - return MOVE_NONE; + sync_cout << ss.str() << sync_endl; } -} // namespace Stockfish +void UCIEngine::on_bestmove(std::string_view bestmove, std::string_view ponder) { + sync_cout << "bestmove " << bestmove; + if (!ponder.empty()) + std::cout << " ponder " << ponder; + std::cout << sync_endl; +} + +} // namespace Stockfish diff --git a/src/uci.h b/src/uci.h index 5bb24a4e..55d580f9 100644 --- a/src/uci.h +++ b/src/uci.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,67 +19,57 @@ #ifndef UCI_H_INCLUDED #define UCI_H_INCLUDED -#include +#include #include +#include +#include -#include "types.h" +#include "engine.h" +#include "misc.h" +#include "search.h" namespace Stockfish { class Position; +class Move; +class Score; +enum Square : int; +using Value = int; -namespace UCI { +class UCIEngine { + public: + UCIEngine(int argc, char** argv); -class Option; + void loop(); -/// Custom comparator because UCI options should be case insensitive -struct CaseInsensitiveLess { - bool operator() (const std::string&, const std::string&) const; + static int to_cp(Value v, const Position& pos); + static std::string format_score(const Score& s); + static std::string square(Square s); + static std::string move(Move m, bool chess960); + static std::string wdl(Value v, const Position& pos); + static std::string to_lower(std::string str); + static Move to_move(const Position& pos, std::string str); + + static Search::LimitsType parse_limits(std::istream& is); + + auto& engine_options() { return engine.get_options(); } + + private: + Engine engine; + CommandLine cli; + + void go(std::istringstream& is); + void bench(std::istream& args); + void position(std::istringstream& is); + void setoption(std::istringstream& is); + std::uint64_t perft(const Search::LimitsType&); + + static void on_update_no_moves(const Engine::InfoShort& info); + static void on_update_full(const Engine::InfoFull& info, bool showWDL); + static void on_iter(const Engine::InfoIter& info); + static void on_bestmove(std::string_view bestmove, std::string_view ponder); }; -/// Our options container is actually a std::map -typedef std::map OptionsMap; +} // namespace Stockfish -/// Option class implements an option as defined by UCI protocol -class Option { - - typedef void (*OnChange)(const Option&); - -public: - Option(OnChange = nullptr); - Option(bool v, OnChange = nullptr); - Option(const char* v, OnChange = nullptr); - Option(double v, int minv, int maxv, OnChange = nullptr); - Option(const char* v, const char* cur, OnChange = nullptr); - - Option& operator=(const std::string&); - void operator<<(const Option&); - operator double() const; - operator std::string() const; - bool operator==(const char*) const; - -private: - friend std::ostream& operator<<(std::ostream&, const OptionsMap&); - - std::string defaultValue, currentValue, type; - int min, max; - size_t idx; - OnChange on_change; -}; - -void init(OptionsMap&); -void loop(int argc, char* argv[]); -std::string value(Value v); -std::string square(Square s); -std::string move(Move m, bool chess960); -std::string pv(const Position& pos, Depth depth, Value alpha, Value beta); -std::string wdl(Value v, int ply); -Move to_move(const Position& pos, std::string& str); - -} // namespace UCI - -extern UCI::OptionsMap Options; - -} // namespace Stockfish - -#endif // #ifndef UCI_H_INCLUDED +#endif // #ifndef UCI_H_INCLUDED diff --git a/src/ucioption.cpp b/src/ucioption.cpp index 922fa34f..e1ffe546 100644 --- a/src/ucioption.cpp +++ b/src/ucioption.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -16,179 +16,170 @@ along with this program. If not, see . */ +#include "ucioption.h" + #include #include -#include +#include +#include #include +#include -#include "evaluate.h" #include "misc.h" -#include "search.h" -#include "thread.h" -#include "tt.h" -#include "uci.h" -#include "syzygy/tbprobe.h" - -using std::string; namespace Stockfish { -UCI::OptionsMap Options; // Global object +bool CaseInsensitiveLess::operator()(const std::string& s1, const std::string& s2) const { -namespace UCI { - -/// 'On change' actions, triggered by an option's value change -void on_clear_hash(const Option&) { Search::clear(); } -void on_hash_size(const Option& o) { TT.resize(size_t(o)); } -void on_logger(const Option& o) { start_logger(o); } -void on_threads(const Option& o) { Threads.set(size_t(o)); } -void on_tb_path(const Option& o) { Tablebases::init(o); } -void on_use_NNUE(const Option& ) { Eval::NNUE::init(); } -void on_eval_file(const Option& ) { Eval::NNUE::init(); } - -/// Our case insensitive less() function as required by UCI protocol -bool CaseInsensitiveLess::operator() (const string& s1, const string& s2) const { - - return std::lexicographical_compare(s1.begin(), s1.end(), s2.begin(), s2.end(), - [](char c1, char c2) { return tolower(c1) < tolower(c2); }); + return std::lexicographical_compare( + s1.begin(), s1.end(), s2.begin(), s2.end(), + [](char c1, char c2) { return std::tolower(c1) < std::tolower(c2); }); } +void OptionsMap::setoption(std::istringstream& is) { + std::string token, name, value; -/// UCI::init() initializes the UCI options to their hard-coded default values + is >> token; // Consume the "name" token -void init(OptionsMap& o) { + // Read the option name (can contain spaces) + while (is >> token && token != "value") + name += (name.empty() ? "" : " ") + token; - constexpr int MaxHashMB = Is64Bit ? 33554432 : 2048; + // Read the option value (can contain spaces) + while (is >> token) + value += (value.empty() ? "" : " ") + token; - o["Debug Log File"] << Option("", on_logger); - o["Threads"] << Option(1, 1, 512, on_threads); - o["Hash"] << Option(16, 1, MaxHashMB, on_hash_size); - o["Clear Hash"] << Option(on_clear_hash); - o["Ponder"] << Option(false); - o["MultiPV"] << Option(1, 1, 500); - o["Skill Level"] << Option(20, 0, 20); - o["Move Overhead"] << Option(10, 0, 5000); - o["Slow Mover"] << Option(100, 10, 1000); - o["nodestime"] << Option(0, 0, 10000); - o["UCI_Chess960"] << Option(false); - o["UCI_AnalyseMode"] << Option(false); - o["UCI_LimitStrength"] << Option(false); - o["UCI_Elo"] << Option(1350, 1350, 2850); - o["UCI_ShowWDL"] << Option(false); - o["SyzygyPath"] << Option("", on_tb_path); - o["SyzygyProbeDepth"] << Option(1, 1, 100); - o["Syzygy50MoveRule"] << Option(true); - o["SyzygyProbeLimit"] << Option(7, 0, 7); - o["Use NNUE"] << Option(true, on_use_NNUE); - o["EvalFile"] << Option(EvalFileDefaultName, on_eval_file); + if (options_map.count(name)) + options_map[name] = value; + else + sync_cout << "No such option: " << name << sync_endl; } - -/// operator<<() is used to print all the options default values in chronological -/// insertion order (the idx field) and in the format defined by the UCI protocol. - -std::ostream& operator<<(std::ostream& os, const OptionsMap& om) { - - for (size_t idx = 0; idx < om.size(); ++idx) - for (const auto& it : om) - if (it.second.idx == idx) - { - const Option& o = it.second; - os << "\noption name " << it.first << " type " << o.type; - - if (o.type == "string" || o.type == "check" || o.type == "combo") - os << " default " << o.defaultValue; - - if (o.type == "spin") - os << " default " << int(stof(o.defaultValue)) - << " min " << o.min - << " max " << o.max; - - break; - } - - return os; +Option OptionsMap::operator[](const std::string& name) const { + auto it = options_map.find(name); + return it != options_map.end() ? it->second : Option(); } +Option& OptionsMap::operator[](const std::string& name) { return options_map[name]; } -/// Option class constructors and conversion operators +std::size_t OptionsMap::count(const std::string& name) const { return options_map.count(name); } -Option::Option(const char* v, OnChange f) : type("string"), min(0), max(0), on_change(f) -{ defaultValue = currentValue = v; } +Option::Option(const char* v, OnChange f) : + type("string"), + min(0), + max(0), + on_change(std::move(f)) { + defaultValue = currentValue = v; +} -Option::Option(bool v, OnChange f) : type("check"), min(0), max(0), on_change(f) -{ defaultValue = currentValue = (v ? "true" : "false"); } +Option::Option(bool v, OnChange f) : + type("check"), + min(0), + max(0), + on_change(std::move(f)) { + defaultValue = currentValue = (v ? "true" : "false"); +} -Option::Option(OnChange f) : type("button"), min(0), max(0), on_change(f) -{} +Option::Option(OnChange f) : + type("button"), + min(0), + max(0), + on_change(std::move(f)) {} -Option::Option(double v, int minv, int maxv, OnChange f) : type("spin"), min(minv), max(maxv), on_change(f) -{ defaultValue = currentValue = std::to_string(v); } +Option::Option(double v, int minv, int maxv, OnChange f) : + type("spin"), + min(minv), + max(maxv), + on_change(std::move(f)) { + defaultValue = currentValue = std::to_string(v); +} -Option::Option(const char* v, const char* cur, OnChange f) : type("combo"), min(0), max(0), on_change(f) -{ defaultValue = v; currentValue = cur; } +Option::Option(const char* v, const char* cur, OnChange f) : + type("combo"), + min(0), + max(0), + on_change(std::move(f)) { + defaultValue = v; + currentValue = cur; +} -Option::operator double() const { - assert(type == "check" || type == "spin"); - return (type == "spin" ? stof(currentValue) : currentValue == "true"); +Option::operator int() const { + assert(type == "check" || type == "spin"); + return (type == "spin" ? std::stoi(currentValue) : currentValue == "true"); } Option::operator std::string() const { - assert(type == "string"); - return currentValue; + assert(type == "string"); + return currentValue; } bool Option::operator==(const char* s) const { - assert(type == "combo"); - return !CaseInsensitiveLess()(currentValue, s) - && !CaseInsensitiveLess()(s, currentValue); + assert(type == "combo"); + return !CaseInsensitiveLess()(currentValue, s) && !CaseInsensitiveLess()(s, currentValue); } -/// operator<<() inits options and assigns idx in the correct printing order +// Inits options and assigns idx in the correct printing order void Option::operator<<(const Option& o) { - static size_t insert_order = 0; + static size_t insert_order = 0; - *this = o; - idx = insert_order++; + *this = o; + idx = insert_order++; } -/// operator=() updates currentValue and triggers on_change() action. It's up to -/// the GUI to check for option's limits, but we could receive the new value -/// from the user by console window, so let's check the bounds anyway. +// Updates currentValue and triggers on_change() action. It's up to +// the GUI to check for option's limits, but we could receive the new value +// from the user by console window, so let's check the bounds anyway. +Option& Option::operator=(const std::string& v) { -Option& Option::operator=(const string& v) { + assert(!type.empty()); - assert(!type.empty()); + if ((type != "button" && type != "string" && v.empty()) + || (type == "check" && v != "true" && v != "false") + || (type == "spin" && (std::stof(v) < min || std::stof(v) > max))) + return *this; - if ( (type != "button" && type != "string" && v.empty()) - || (type == "check" && v != "true" && v != "false") - || (type == "spin" && (stof(v) < min || stof(v) > max))) - return *this; + if (type == "combo") + { + OptionsMap comboMap; // To have case insensitive compare + std::string token; + std::istringstream ss(defaultValue); + while (ss >> token) + comboMap[token] << Option(); + if (!comboMap.count(v) || v == "var") + return *this; + } - if (type == "combo") - { - OptionsMap comboMap; // To have case insensitive compare - string token; - std::istringstream ss(defaultValue); - while (ss >> token) - comboMap[token] << Option(); - if (!comboMap.count(v) || v == "var") - return *this; - } + if (type != "button") + currentValue = v; - if (type != "button") - currentValue = v; + if (on_change) + on_change(*this); - if (on_change) - on_change(*this); - - return *this; + return *this; } -} // namespace UCI +std::ostream& operator<<(std::ostream& os, const OptionsMap& om) { + for (size_t idx = 0; idx < om.options_map.size(); ++idx) + for (const auto& it : om.options_map) + if (it.second.idx == idx) + { + const Option& o = it.second; + os << "\noption name " << it.first << " type " << o.type; -} // namespace Stockfish + if (o.type == "string" || o.type == "check" || o.type == "combo") + os << " default " << o.defaultValue; + + if (o.type == "spin") + os << " default " << int(stof(o.defaultValue)) << " min " << o.min << " max " + << o.max; + + break; + } + + return os; +} +} diff --git a/src/ucioption.h b/src/ucioption.h new file mode 100644 index 00000000..b575d164 --- /dev/null +++ b/src/ucioption.h @@ -0,0 +1,81 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifndef UCIOPTION_H_INCLUDED +#define UCIOPTION_H_INCLUDED + +#include +#include +#include +#include +#include + +namespace Stockfish { +// Define a custom comparator, because the UCI options should be case-insensitive +struct CaseInsensitiveLess { + bool operator()(const std::string&, const std::string&) const; +}; + +class Option; + +class OptionsMap { + public: + void setoption(std::istringstream&); + + friend std::ostream& operator<<(std::ostream&, const OptionsMap&); + + Option operator[](const std::string&) const; + Option& operator[](const std::string&); + + std::size_t count(const std::string&) const; + + private: + // The options container is defined as a std::map + using OptionsStore = std::map; + + OptionsStore options_map; +}; + +// The Option class implements each option as specified by the UCI protocol +class Option { + public: + using OnChange = std::function; + + Option(OnChange = nullptr); + Option(bool v, OnChange = nullptr); + Option(const char* v, OnChange = nullptr); + Option(double v, int minv, int maxv, OnChange = nullptr); + Option(const char* v, const char* cur, OnChange = nullptr); + + Option& operator=(const std::string&); + void operator<<(const Option&); + operator int() const; + operator std::string() const; + bool operator==(const char*) const; + + friend std::ostream& operator<<(std::ostream&, const OptionsMap&); + + private: + std::string defaultValue, currentValue, type; + int min, max; + size_t idx; + OnChange on_change; +}; + +} +#endif // #ifndef UCIOPTION_H_INCLUDED diff --git a/tests/instrumented.sh b/tests/instrumented.sh index e9455eab..4c63fc57 100755 --- a/tests/instrumented.sh +++ b/tests/instrumented.sh @@ -1,5 +1,5 @@ #!/bin/bash -# check for errors under valgrind or sanitizers. +# check for errors under Valgrind or sanitizers. error() { @@ -14,14 +14,14 @@ case $1 in echo "valgrind testing started" prefix='' exeprefix='valgrind --error-exitcode=42 --errors-for-leak-kinds=all --leak-check=full' - postfix='1>/dev/null' + postfix='' threads="1" ;; --valgrind-thread) echo "valgrind-thread testing started" prefix='' exeprefix='valgrind --fair-sched=try --error-exitcode=42' - postfix='1>/dev/null' + postfix='' threads="2" ;; --sanitizer-undefined) @@ -64,13 +64,32 @@ EOF ;; esac +cat << EOF > bench_tmp.epd +Rn6/1rbq1bk1/2p2n1p/2Bp1p2/3Pp1pP/1N2P1P1/2Q1NPB1/6K1 w - - 2 26 +rnbqkb1r/ppp1pp2/5n1p/3p2p1/P2PP3/5P2/1PP3PP/RNBQKBNR w KQkq - 0 3 +3qnrk1/4bp1p/1p2p1pP/p2bN3/1P1P1B2/P2BQ3/5PP1/4R1K1 w - - 9 28 +r4rk1/1b2ppbp/pq4pn/2pp1PB1/1p2P3/1P1P1NN1/1PP3PP/R2Q1RK1 w - - 0 13 +EOF + # simple command line testing for args in "eval" \ "go nodes 1000" \ "go depth 10" \ + "go perft 4" \ "go movetime 1000" \ "go wtime 8000 btime 8000 winc 500 binc 500" \ - "bench 128 $threads 8 default depth" + "go wtime 1000 btime 1000 winc 0 binc 0" \ + "go wtime 1000 btime 1000 winc 0 binc 0" \ + "go wtime 1000 btime 1000 winc 0 binc 0 movestogo 5" \ + "go movetime 200" \ + "go nodes 20000 searchmoves e2e4 d2d4" \ + "bench 128 $threads 8 default depth" \ + "bench 128 $threads 3 bench_tmp.epd depth" \ + "export_net verify.nnue" \ + "d" \ + "compiler" \ + "license" \ + "uci" do echo "$prefix $exeprefix ./stockfish $args $postfix" @@ -78,14 +97,25 @@ do done +# verify the generated net equals the base net +network=`./stockfish uci | grep 'option name EvalFile type string default' | awk '{print $NF}'` +echo "Comparing $network to the written verify.nnue" +diff $network verify.nnue + # more general testing, following an uci protocol exchange cat << EOF > game.exp set timeout 240 + # to correctly catch eof we need the following line + # expect_before timeout { exit 2 } eof { exit 3 } + expect_before timeout { exit 2 } + spawn $exeprefix ./stockfish + expect "Stockfish" send "uci\n" expect "uciok" + # send "setoption name Debug Log File value debug.log\n" send "setoption name Threads value $threads\n" send "ucinewgame\n" @@ -93,18 +123,121 @@ cat << EOF > game.exp send "go nodes 1000\n" expect "bestmove" + send "ucinewgame\n" send "position startpos moves e2e4 e7e6\n" send "go nodes 1000\n" expect "bestmove" + send "ucinewgame\n" send "position fen 5rk1/1K4p1/8/8/3B4/8/8/8 b - - 0 1\n" send "go depth 10\n" expect "bestmove" + send "ucinewgame\n" + send "position fen 5rk1/1K4p1/8/8/3B4/8/8/8 b - - 0 1\n" + send "flip\n" + send "go depth 10\n" + expect "bestmove" + + send "ucinewgame\n" + send "position startpos\n" + send "go depth 5\n" + expect -re {info depth \d+ seldepth \d+ multipv \d+ score cp \d+ nodes \d+ nps \d+ hashfull \d+ tbhits \d+ time \d+ pv} + expect "bestmove" + + send "ucinewgame\n" + send "setoption name UCI_ShowWDL value true\n" + send "position startpos\n" + send "go depth 9\n" + expect -re {info depth 1 seldepth \d+ multipv \d+ score cp \d+ wdl \d+ \d+ \d+ nodes \d+ nps \d+ hashfull \d+ tbhits \d+ time \d+ pv} + expect -re {info depth 2 seldepth \d+ multipv \d+ score cp \d+ wdl \d+ \d+ \d+ nodes \d+ nps \d+ hashfull \d+ tbhits \d+ time \d+ pv} + expect -re {info depth 3 seldepth \d+ multipv \d+ score cp \d+ wdl \d+ \d+ \d+ nodes \d+ nps \d+ hashfull \d+ tbhits \d+ time \d+ pv} + expect -re {info depth 4 seldepth \d+ multipv \d+ score cp \d+ wdl \d+ \d+ \d+ nodes \d+ nps \d+ hashfull \d+ tbhits \d+ time \d+ pv} + expect -re {info depth 5 seldepth \d+ multipv \d+ score cp \d+ wdl \d+ \d+ \d+ nodes \d+ nps \d+ hashfull \d+ tbhits \d+ time \d+ pv} + expect -re {info depth 6 seldepth \d+ multipv \d+ score cp \d+ wdl \d+ \d+ \d+ nodes \d+ nps \d+ hashfull \d+ tbhits \d+ time \d+ pv} + expect -re {info depth 7 seldepth \d+ multipv \d+ score cp \d+ wdl \d+ \d+ \d+ nodes \d+ nps \d+ hashfull \d+ tbhits \d+ time \d+ pv} + expect -re {info depth 8 seldepth \d+ multipv \d+ score cp \d+ wdl \d+ \d+ \d+ nodes \d+ nps \d+ hashfull \d+ tbhits \d+ time \d+ pv} + expect -re {info depth 9 seldepth \d+ multipv \d+ score cp \d+ wdl \d+ \d+ \d+ nodes \d+ nps \d+ hashfull \d+ tbhits \d+ time \d+ pv} + expect "bestmove" + + send "setoption name Clear Hash\n" + + send "ucinewgame\n" + send "position fen 5K2/8/2qk4/2nPp3/3r4/6B1/B7/3R4 w - e6\n" + send "go depth 18\n" + expect "score mate 1" + expect "pv d5e6" + expect "bestmove d5e6" + + send "ucinewgame\n" + send "position fen 2brrb2/8/p7/Q7/1p1kpPp1/1P1pN1K1/3P4/8 b - -\n" + send "go depth 18\n" + expect "score mate -1" + expect "bestmove" + + send "ucinewgame\n" + send "position fen 8/5R2/2K1P3/4k3/8/b1PPpp1B/5p2/8 w - -\n" + send "go depth 18\n" + expect "score mate 2 * pv c6d7 * f7f5" + expect "bestmove c6d7" + + send "ucinewgame\n" + send "position fen 8/5R2/2K1P3/4k3/8/b1PPpp1B/5p2/8 w - -\n" + send "go mate 2\n" + expect "score mate 2 * pv c6d7" + expect "bestmove c6d7" + + send "ucinewgame\n" + send "position fen 8/5R2/2K1P3/4k3/8/b1PPpp1B/5p2/8 w - -\n" + send "go nodes 10000\n" + expect "score mate 2 * pv c6d7 * f7f5" + expect "bestmove c6d7" + + send "ucinewgame\n" + send "position fen 1NR2B2/5p2/5p2/1p1kpp2/1P2rp2/2P1pB2/2P1P1K1/8 b - - \n" + send "go depth 18\n" + expect "score mate -2" + expect "pv d5e6 c8d8" + expect "bestmove d5e6" + + send "ucinewgame\n" + send "position fen 8/5R2/2K1P3/4k3/8/b1PPpp1B/5p2/8 w - - moves c6d7 f2f1q\n" + send "go depth 18\n" + expect "score mate 1 * pv f7f5" + expect "bestmove f7f5" + + send "ucinewgame\n" + send "position fen 8/5R2/2K1P3/4k3/8/b1PPpp1B/5p2/8 w - -\n" + send "go depth 18 searchmoves c6d7\n" + expect "score mate 2 * pv c6d7 * f7f5" + expect "bestmove c6d7" + + send "ucinewgame\n" + send "position fen 8/5R2/2K1P3/4k3/8/b1PPpp1B/5p2/8 w - - moves c6d7\n" + send "go depth 18 searchmoves e3e2\n" + expect "score mate -1 * pv e3e2 f7f5" + expect "bestmove e3e2" + + send "setoption name EvalFile value verify.nnue\n" + send "position startpos\n" + send "go depth 5\n" + expect "bestmove" + + send "setoption name MultiPV value 4\n" + send "position startpos\n" + send "go depth 5\n" + expect "bestmove" + + send "setoption name Skill Level value 10\n" + send "position startpos\n" + send "go depth 5\n" + expect "bestmove" + send "setoption name Skill Level value 20\n" + send "quit\n" expect eof - # return error code of the spawned program, useful for valgrind + # return error code of the spawned program, useful for Valgrind lassign [wait] pid spawnid os_error_flag value exit \$value EOF @@ -117,15 +250,35 @@ fi cat << EOF > syzygy.exp set timeout 240 + # to correctly catch eof we need the following line + # expect_before timeout { exit 2 } eof { exit 3 } + expect_before timeout { exit 2 } spawn $exeprefix ./stockfish + expect "Stockfish" send "uci\n" send "setoption name SyzygyPath value ../tests/syzygy/\n" - expect "info string Found 35 tablebases" {} timeout {exit 1} + expect "info string Found 35 tablebases" send "bench 128 1 8 default depth\n" + expect "Nodes searched :" + send "ucinewgame\n" + send "position fen 4k3/PP6/8/8/8/8/8/4K3 w - - 0 1\n" + send "go depth 5\n" + expect -re {score cp 20000|score mate} + expect "bestmove" + send "ucinewgame\n" + send "position fen 8/1P6/2B5/8/4K3/8/6k1/8 w - - 0 1\n" + send "go depth 5\n" + expect -re {score cp 20000|score mate} + expect "bestmove" + send "ucinewgame\n" + send "position fen 8/1P6/2B5/8/4K3/8/6k1/8 b - - 0 1\n" + send "go depth 5\n" + expect -re {score cp -20000|score mate} + expect "bestmove" send "quit\n" expect eof - # return error code of the spawned program, useful for valgrind + # return error code of the spawned program, useful for Valgrind lassign [wait] pid spawnid os_error_flag value exit \$value EOF @@ -133,6 +286,9 @@ EOF for exp in game.exp syzygy.exp do + echo "======== $exp ==============" + cat $exp + echo "============================" echo "$prefix expect $exp $postfix" eval "$prefix expect $exp $postfix" @@ -140,6 +296,6 @@ do done -rm -f tsan.supp +rm -f tsan.supp bench_tmp.epd echo "instrumented testing OK" diff --git a/tests/signature.sh b/tests/signature.sh index 2e5c183a..06bd1892 100755 --- a/tests/signature.sh +++ b/tests/signature.sh @@ -11,7 +11,7 @@ trap 'error ${LINENO}' ERR # obtain -signature=`./stockfish bench 2>&1 | grep "Nodes searched : " | awk '{print $4}'` +signature=`eval "$WINE_PATH ./stockfish bench 2>&1" | grep "Nodes searched : " | awk '{print $4}'` if [ $# -gt 0 ]; then # compare to given reference