Accumulator cache bugfix and cleanup

STC: https://tests.stockfishchess.org/tests/view/663068913a05f1bf7a511dc2 LLR: 2.98 (-2.94,2.94) <-1.75,0.25> Total: 70304 W: 18211 L: 18026 D: 34067 Ptnml(0-2): 232, 7966, 18582, 8129, 243 1) Fixes a bug introduced in https://github.com/official-stockfish/Stockfish/pull/5194. Only one psqtOnly flag was used for two perspectives which was causing wrong entries to be cleared and marked. 2) The finny caches should be cleared like histories and not at the start of every search. closes https://github.com/official-stockfish/Stockfish/pull/5203 No functional change
2025-04-30 00:33:09 +00:00 · 2024-04-29 20:37:54 -07:00 · 2024-04-29 20:37:54 -07:00 · be142337d8
commit be142337d8
parent 6a9b8a0c7b
3 changed files with 28 additions and 35 deletions
--- a/src/nnue/nnue_accumulator.h
+++ b/src/nnue/nnue_accumulator.h
@ -59,31 +59,27 @@ struct AccumulatorCaches {
    struct alignas(CacheLineSize) Cache {
        struct alignas(CacheLineSize) Entry {
-            BiasType       accumulation[COLOR_NB][Size];
+            BiasType       accumulation[Size];
-            PSQTWeightType psqtAccumulation[COLOR_NB][PSQTBuckets];
+            PSQTWeightType psqtAccumulation[PSQTBuckets];
-            Bitboard       byColorBB[COLOR_NB][COLOR_NB];
+            Bitboard       byColorBB[COLOR_NB];
-            Bitboard       byTypeBB[COLOR_NB][PIECE_TYPE_NB];
+            Bitboard       byTypeBB[PIECE_TYPE_NB];
            bool           psqtOnly;
            // To initialize a refresh entry, we set all its bitboards empty,
            // so we put the biases in the accumulation, without any weights on top
            void clear(const BiasType* biases) {
-                std::memset(byColorBB, 0, sizeof(byColorBB));
+                std::memcpy(accumulation, biases, sizeof(accumulation));
-                std::memset(byTypeBB, 0, sizeof(byTypeBB));
+                std::memset((uint8_t*) this + offsetof(Entry, psqtAccumulation), 0,
-                psqtOnly = false;
+                            sizeof(Entry) - offsetof(Entry, psqtAccumulation));
                std::memcpy(accumulation[WHITE], biases, Size * sizeof(BiasType));
                std::memcpy(accumulation[BLACK], biases, Size * sizeof(BiasType));
                std::memset(psqtAccumulation, 0, sizeof(psqtAccumulation));
            }
        };
        template<typename Network>
        void clear(const Network& network) {
-            for (auto& entry : entries)
+            for (auto& entries1D : entries)
-                entry.clear(network.featureTransformer->biases);
+                for (auto& entry : entries1D)
                    entry.clear(network.featureTransformer->biases);
        }
        void clear(const BiasType* biases) {
@ -91,9 +87,9 @@ struct AccumulatorCaches {
                entry.clear(biases);
        }
-        Entry& operator[](Square sq) { return entries[sq]; }
+        std::array<Entry, COLOR_NB>& operator[](Square sq) { return entries[sq]; }
-        std::array<Entry, SQUARE_NB> entries;
+        std::array<std::array<Entry, COLOR_NB>, SQUARE_NB> entries;
    };
    template<typename Networks>
--- a/src/nnue/nnue_feature_transformer.h
+++ b/src/nnue/nnue_feature_transformer.h
@ -652,7 +652,7 @@ class FeatureTransformer {
        assert(cache != nullptr);
        Square                ksq   = pos.square<KING>(Perspective);
-        auto&                 entry = (*cache)[ksq];
+        auto&                 entry = (*cache)[ksq][Perspective];
        FeatureSet::IndexList removed, added;
        if (entry.psqtOnly && !psqtOnly)
@ -666,9 +666,8 @@ class FeatureTransformer {
            {
                for (PieceType pt = PAWN; pt <= KING; ++pt)
                {
-                    const Piece    piece = make_piece(c, pt);
+                    const Piece    piece    = make_piece(c, pt);
-                    const Bitboard oldBB =
+                    const Bitboard oldBB    = entry.byColorBB[c] & entry.byTypeBB[pt];
                      entry.byColorBB[Perspective][c] & entry.byTypeBB[Perspective][pt];
                    const Bitboard newBB    = pos.pieces(c, pt);
                    Bitboard       toRemove = oldBB & ~newBB;
                    Bitboard       toAdd    = newBB & ~oldBB;
@ -698,8 +697,7 @@ class FeatureTransformer {
        if (!psqtOnly)
            for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j)
            {
-                auto entryTile =
+                auto entryTile = reinterpret_cast<vec_t*>(&entry.accumulation[j * TileHeight]);
                  reinterpret_cast<vec_t*>(&entry.accumulation[Perspective][j * TileHeight]);
                for (IndexType k = 0; k < NumRegs; ++k)
                    acc[k] = entryTile[k];
@ -741,8 +739,8 @@ class FeatureTransformer {
        for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j)
        {
-            auto entryTilePsqt = reinterpret_cast<psqt_vec_t*>(
+            auto entryTilePsqt =
-              &entry.psqtAccumulation[Perspective][j * PsqtTileHeight]);
+              reinterpret_cast<psqt_vec_t*>(&entry.psqtAccumulation[j * PsqtTileHeight]);
            for (std::size_t k = 0; k < NumPsqtRegs; ++k)
                psqt[k] = entryTilePsqt[k];
@ -777,11 +775,11 @@ class FeatureTransformer {
            {
                const IndexType offset = HalfDimensions * index;
                for (IndexType j = 0; j < HalfDimensions; ++j)
-                    entry.accumulation[Perspective][j] -= weights[offset + j];
+                    entry.accumulation[j] -= weights[offset + j];
            }
            for (std::size_t k = 0; k < PSQTBuckets; ++k)
-                entry.psqtAccumulation[Perspective][k] -= psqtWeights[index * PSQTBuckets + k];
+                entry.psqtAccumulation[k] -= psqtWeights[index * PSQTBuckets + k];
        }
        for (const auto index : added)
        {
@ -789,11 +787,11 @@ class FeatureTransformer {
            {
                const IndexType offset = HalfDimensions * index;
                for (IndexType j = 0; j < HalfDimensions; ++j)
-                    entry.accumulation[Perspective][j] += weights[offset + j];
+                    entry.accumulation[j] += weights[offset + j];
            }
            for (std::size_t k = 0; k < PSQTBuckets; ++k)
-                entry.psqtAccumulation[Perspective][k] += psqtWeights[index * PSQTBuckets + k];
+                entry.psqtAccumulation[k] += psqtWeights[index * PSQTBuckets + k];
        }
 #endif
@ -802,17 +800,17 @@ class FeatureTransformer {
        // Now copy its content to the actual accumulator we were refreshing
        if (!psqtOnly)
-            std::memcpy(accumulator.accumulation[Perspective], entry.accumulation[Perspective],
+            std::memcpy(accumulator.accumulation[Perspective], entry.accumulation,
                        sizeof(BiasType) * HalfDimensions);
-        std::memcpy(accumulator.psqtAccumulation[Perspective], entry.psqtAccumulation[Perspective],
+        std::memcpy(accumulator.psqtAccumulation[Perspective], entry.psqtAccumulation,
                    sizeof(int32_t) * PSQTBuckets);
        for (Color c : {WHITE, BLACK})
-            entry.byColorBB[Perspective][c] = pos.pieces(c);
+            entry.byColorBB[c] = pos.pieces(c);
        for (PieceType pt = PAWN; pt <= KING; ++pt)
-            entry.byTypeBB[Perspective][pt] = pos.pieces(pt);
+            entry.byTypeBB[pt] = pos.pieces(pt);
        entry.psqtOnly = psqtOnly;
    }
--- a/src/search.cpp
+++ b/src/search.cpp
@ -147,9 +147,6 @@ Search::Worker::Worker(SharedState&                    sharedState,
 void Search::Worker::start_searching() {
    // Initialize accumulator refresh entries
    refreshTable.clear(networks);
    // Non-main threads go directly to iterative_deepening()
    if (!is_mainthread())
    {
@ -506,6 +503,8 @@ void Search::Worker::clear() {
    for (size_t i = 1; i < reductions.size(); ++i)
        reductions[i] = int((20.14 + std::log(size_t(options["Threads"])) / 2) * std::log(i));
    refreshTable.clear(networks);
 }