diff --git a/src/nnue/features/feature_set.h b/src/nnue/features/feature_set.h index 26198114..975824b6 100644 --- a/src/nnue/features/feature_set.h +++ b/src/nnue/features/feature_set.h @@ -43,90 +43,6 @@ namespace Eval::NNUE::Features { template class FeatureSetBase { - public: - // Get a list of indices for active features - template - static void AppendActiveIndices( - const Position& pos, TriggerEvent trigger, IndexListType active[2]) { - - for (Color perspective : { WHITE, BLACK }) { - Derived::CollectActiveIndices( - pos, trigger, perspective, &active[perspective]); - } - } - - // Get a list of indices for recently changed features - template - static void AppendChangedIndices( - const PositionType& pos, TriggerEvent trigger, - IndexListType removed[2], IndexListType added[2], bool reset[2]) { - - auto collect_for_one = [&](const DirtyPiece& dp) { - for (Color perspective : { WHITE, BLACK }) { - switch (trigger) { - case TriggerEvent::kFriendKingMoved: - reset[perspective] = dp.piece[0] == make_piece(perspective, KING); - break; - default: - assert(false); - break; - } - if (reset[perspective]) { - Derived::CollectActiveIndices( - pos, trigger, perspective, &added[perspective]); - } else { - Derived::CollectChangedIndices( - pos, dp, trigger, perspective, - &removed[perspective], &added[perspective]); - } - } - }; - - auto collect_for_two = [&](const DirtyPiece& dp1, const DirtyPiece& dp2) { - for (Color perspective : { WHITE, BLACK }) { - switch (trigger) { - case TriggerEvent::kFriendKingMoved: - reset[perspective] = dp1.piece[0] == make_piece(perspective, KING) - || dp2.piece[0] == make_piece(perspective, KING); - break; - default: - assert(false); - break; - } - if (reset[perspective]) { - Derived::CollectActiveIndices( - pos, trigger, perspective, &added[perspective]); - } else { - Derived::CollectChangedIndices( - pos, dp1, trigger, perspective, - &removed[perspective], &added[perspective]); - Derived::CollectChangedIndices( - pos, dp2, trigger, perspective, - &removed[perspective], &added[perspective]); - } - } - }; - - if (pos.state()->previous->accumulator.computed_accumulation) { - const auto& prev_dp = pos.state()->dirtyPiece; - if (prev_dp.dirty_num == 0) return; - collect_for_one(prev_dp); - } else { - const auto& prev_dp = pos.state()->previous->dirtyPiece; - if (prev_dp.dirty_num == 0) { - const auto& prev2_dp = pos.state()->dirtyPiece; - if (prev2_dp.dirty_num == 0) return; - collect_for_one(prev2_dp); - } else { - const auto& prev2_dp = pos.state()->dirtyPiece; - if (prev2_dp.dirty_num == 0) { - collect_for_one(prev_dp); - } else { - collect_for_two(prev_dp, prev2_dp); - } - } - } - } }; // Class template that represents the feature set @@ -146,30 +62,6 @@ namespace Eval::NNUE::Features { CompileTimeList; static constexpr auto kRefreshTriggers = SortedTriggerSet::kValues; - private: - // Get a list of indices for active features - static void CollectActiveIndices( - const Position& pos, const TriggerEvent trigger, const Color perspective, - IndexList* const active) { - if (FeatureType::kRefreshTrigger == trigger) { - FeatureType::AppendActiveIndices(pos, perspective, active); - } - } - - // Get a list of indices for recently changed features - static void CollectChangedIndices( - const Position& pos, const DirtyPiece& dp, const TriggerEvent trigger, const Color perspective, - IndexList* const removed, IndexList* const added) { - - if (FeatureType::kRefreshTrigger == trigger) { - FeatureType::AppendChangedIndices(pos, dp, perspective, removed, added); - } - } - - // Make the base class and the class template that recursively uses itself a friend - friend class FeatureSetBase; - template - friend class FeatureSet; }; } // namespace Eval::NNUE::Features diff --git a/src/nnue/nnue_accumulator.h b/src/nnue/nnue_accumulator.h index 26370710..a357d835 100644 --- a/src/nnue/nnue_accumulator.h +++ b/src/nnue/nnue_accumulator.h @@ -25,11 +25,14 @@ namespace Eval::NNUE { + // The accumulator of a StateInfo without parent is set to the INIT state + enum AccumulatorState { EMPTY, COMPUTED, INIT }; + // Class that holds the result of affine transformation of input features struct alignas(kCacheLineSize) Accumulator { std::int16_t accumulation[2][kRefreshTriggers.size()][kTransformedFeatureDimensions]; - bool computed_accumulation; + AccumulatorState state[2]; }; } // namespace Eval::NNUE diff --git a/src/nnue/nnue_feature_transformer.h b/src/nnue/nnue_feature_transformer.h index 2f86d20a..f145c848 100644 --- a/src/nnue/nnue_feature_transformer.h +++ b/src/nnue/nnue_feature_transformer.h @@ -32,7 +32,7 @@ namespace Eval::NNUE { // If vector instructions are enabled, we update and refresh the // accumulator tile by tile such that each tile fits in the CPU's // vector registers. - #define TILING + #define VECTOR #ifdef USE_AVX512 typedef __m512i vec_t; @@ -75,7 +75,7 @@ namespace Eval::NNUE { static constexpr IndexType kNumRegs = 16; #else - #undef TILING + #undef VECTOR #endif @@ -86,7 +86,7 @@ namespace Eval::NNUE { // Number of output dimensions for one side static constexpr IndexType kHalfDimensions = kTransformedFeatureDimensions; - #ifdef TILING + #ifdef VECTOR static constexpr IndexType kTileHeight = kNumRegs * sizeof(vec_t) / 2; static_assert(kHalfDimensions % kTileHeight == 0, "kTileHeight must divide kHalfDimensions"); #endif @@ -119,32 +119,11 @@ namespace Eval::NNUE { return !stream.fail(); } - // Proceed with the difference calculation if possible - bool UpdateAccumulatorIfPossible(const Position& pos) const { - - const auto now = pos.state(); - if (now->accumulator.computed_accumulation) - return true; - - const auto prev = now->previous; - if (prev) { - if (prev->accumulator.computed_accumulation) { - UpdateAccumulator(pos); - return true; - } else if (prev->previous && prev->previous->accumulator.computed_accumulation) { - UpdateAccumulator(pos); - return true; - } - } - - return false; - } - // Convert input features void Transform(const Position& pos, OutputType* output) const { - if (!UpdateAccumulatorIfPossible(pos)) - RefreshAccumulator(pos); + UpdateAccumulator(pos, WHITE); + UpdateAccumulator(pos, BLACK); const auto& accumulation = pos.state()->accumulator.accumulation; @@ -240,27 +219,142 @@ namespace Eval::NNUE { } private: - // Calculate cumulative value without using difference calculation - void RefreshAccumulator(const Position& pos) const { + void UpdateAccumulator(const Position& pos, const Color c) const { - auto& accumulator = pos.state()->accumulator; - IndexType i = 0; - Features::IndexList active_indices[2]; - RawFeatures::AppendActiveIndices(pos, kRefreshTriggers[i], - active_indices); - for (Color perspective : { WHITE, BLACK }) { - #ifdef TILING - for (unsigned j = 0; j < kHalfDimensions / kTileHeight; ++j) { + #ifdef VECTOR + // Gcc-10.2 unnecessarily spills AVX2 registers if this array + // is defined in the VECTOR code below, once in each branch + vec_t acc[kNumRegs]; + #endif + + // Look for a usable accumulator of an earlier position. We keep track + // of the estimated gain in terms of features to be added/subtracted. + StateInfo *st = pos.state(), *next = nullptr; + int gain = popcount(pos.pieces()) - 2; + while (st->accumulator.state[c] == EMPTY) + { + auto& dp = st->dirtyPiece; + // The first condition tests whether an incremental update is + // possible at all: if this side's king has moved, it is not possible. + static_assert(std::is_same_v>, + "Current code assumes that only kFriendlyKingMoved refresh trigger is being used."); + if ( dp.piece[0] == make_piece(c, KING) + || (gain -= dp.dirty_num + 1) < 0) + break; + next = st; + st = st->previous; + } + + if (st->accumulator.state[c] == COMPUTED) + { + if (next == nullptr) + return; + + // Update incrementally in two steps. First, we update the "next" + // accumulator. Then, we update the current accumulator (pos.state()). + + // Gather all features to be updated. This code assumes HalfKP features + // only and doesn't support refresh triggers. + static_assert(std::is_same_v>, + RawFeatures>); + Features::IndexList removed[2], added[2]; + Features::HalfKP::AppendChangedIndices(pos, + next->dirtyPiece, c, &removed[0], &added[0]); + for (StateInfo *st2 = pos.state(); st2 != next; st2 = st2->previous) + Features::HalfKP::AppendChangedIndices(pos, + st2->dirtyPiece, c, &removed[1], &added[1]); + + // Mark the accumulators as computed. + next->accumulator.state[c] = COMPUTED; + pos.state()->accumulator.state[c] = COMPUTED; + + // Now update the accumulators listed in info[], where the last element is a sentinel. + StateInfo *info[3] = + { next, next == pos.state() ? nullptr : pos.state(), nullptr }; + #ifdef VECTOR + for (IndexType j = 0; j < kHalfDimensions / kTileHeight; ++j) + { + // Load accumulator + auto accTile = reinterpret_cast( + &st->accumulator.accumulation[c][0][j * kTileHeight]); + for (IndexType k = 0; k < kNumRegs; ++k) + acc[k] = vec_load(&accTile[k]); + + for (IndexType i = 0; info[i]; ++i) + { + // Difference calculation for the deactivated features + for (const auto index : removed[i]) + { + const IndexType offset = kHalfDimensions * index + j * kTileHeight; + auto column = reinterpret_cast(&weights_[offset]); + for (IndexType k = 0; k < kNumRegs; ++k) + acc[k] = vec_sub_16(acc[k], column[k]); + } + + // Difference calculation for the activated features + for (const auto index : added[i]) + { + const IndexType offset = kHalfDimensions * index + j * kTileHeight; + auto column = reinterpret_cast(&weights_[offset]); + for (IndexType k = 0; k < kNumRegs; ++k) + acc[k] = vec_add_16(acc[k], column[k]); + } + + // Store accumulator + accTile = reinterpret_cast( + &info[i]->accumulator.accumulation[c][0][j * kTileHeight]); + for (IndexType k = 0; k < kNumRegs; ++k) + vec_store(&accTile[k], acc[k]); + } + } + + #else + for (IndexType i = 0; info[i]; ++i) + { + std::memcpy(info[i]->accumulator.accumulation[c][0], + st->accumulator.accumulation[c][0], + kHalfDimensions * sizeof(BiasType)); + st = info[i]; + + // Difference calculation for the deactivated features + for (const auto index : removed[i]) + { + const IndexType offset = kHalfDimensions * index; + + for (IndexType j = 0; j < kHalfDimensions; ++j) + st->accumulator.accumulation[c][0][j] -= weights_[offset + j]; + } + + // Difference calculation for the activated features + for (const auto index : added[i]) + { + const IndexType offset = kHalfDimensions * index; + + for (IndexType j = 0; j < kHalfDimensions; ++j) + st->accumulator.accumulation[c][0][j] += weights_[offset + j]; + } + } + #endif + } + else + { + // Refresh the accumulator + auto& accumulator = pos.state()->accumulator; + accumulator.state[c] = COMPUTED; + Features::IndexList active; + Features::HalfKP::AppendActiveIndices(pos, c, &active); + + #ifdef VECTOR + for (IndexType j = 0; j < kHalfDimensions / kTileHeight; ++j) + { auto biasesTile = reinterpret_cast( &biases_[j * kTileHeight]); - auto accTile = reinterpret_cast( - &accumulator.accumulation[perspective][i][j * kTileHeight]); - vec_t acc[kNumRegs]; - - for (unsigned k = 0; k < kNumRegs; ++k) + for (IndexType k = 0; k < kNumRegs; ++k) acc[k] = biasesTile[k]; - for (const auto index : active_indices[perspective]) { + for (const auto index : active) + { const IndexType offset = kHalfDimensions * index + j * kTileHeight; auto column = reinterpret_cast(&weights_[offset]); @@ -268,18 +362,22 @@ namespace Eval::NNUE { acc[k] = vec_add_16(acc[k], column[k]); } + auto accTile = reinterpret_cast( + &accumulator.accumulation[c][0][j * kTileHeight]); for (unsigned k = 0; k < kNumRegs; k++) vec_store(&accTile[k], acc[k]); } + #else - std::memcpy(accumulator.accumulation[perspective][i], biases_, + std::memcpy(accumulator.accumulation[c][0], biases_, kHalfDimensions * sizeof(BiasType)); - for (const auto index : active_indices[perspective]) { + for (const auto index : active) + { const IndexType offset = kHalfDimensions * index; for (IndexType j = 0; j < kHalfDimensions; ++j) - accumulator.accumulation[perspective][i][j] += weights_[offset + j]; + accumulator.accumulation[c][0][j] += weights_[offset + j]; } #endif } @@ -287,106 +385,6 @@ namespace Eval::NNUE { #if defined(USE_MMX) _mm_empty(); #endif - - accumulator.computed_accumulation = true; - } - - // Calculate cumulative value using difference calculation - void UpdateAccumulator(const Position& pos) const { - - Accumulator* prev_accumulator; - assert(pos.state()->previous); - if (pos.state()->previous->accumulator.computed_accumulation) { - prev_accumulator = &pos.state()->previous->accumulator; - } - else { - assert(pos.state()->previous->previous); - assert(pos.state()->previous->previous->accumulator.computed_accumulation); - prev_accumulator = &pos.state()->previous->previous->accumulator; - } - - auto& accumulator = pos.state()->accumulator; - IndexType i = 0; - Features::IndexList removed_indices[2], added_indices[2]; - bool reset[2] = { false, false }; - RawFeatures::AppendChangedIndices(pos, kRefreshTriggers[i], - removed_indices, added_indices, reset); - - #ifdef TILING - for (IndexType j = 0; j < kHalfDimensions / kTileHeight; ++j) { - for (Color perspective : { WHITE, BLACK }) { - auto accTile = reinterpret_cast( - &accumulator.accumulation[perspective][i][j * kTileHeight]); - vec_t acc[kNumRegs]; - - if (reset[perspective]) { - auto biasesTile = reinterpret_cast( - &biases_[j * kTileHeight]); - for (unsigned k = 0; k < kNumRegs; ++k) - acc[k] = biasesTile[k]; - } else { - auto prevAccTile = reinterpret_cast( - &prev_accumulator->accumulation[perspective][i][j * kTileHeight]); - for (IndexType k = 0; k < kNumRegs; ++k) - acc[k] = vec_load(&prevAccTile[k]); - - // Difference calculation for the deactivated features - for (const auto index : removed_indices[perspective]) { - const IndexType offset = kHalfDimensions * index + j * kTileHeight; - auto column = reinterpret_cast(&weights_[offset]); - - for (IndexType k = 0; k < kNumRegs; ++k) - acc[k] = vec_sub_16(acc[k], column[k]); - } - } - { // Difference calculation for the activated features - for (const auto index : added_indices[perspective]) { - const IndexType offset = kHalfDimensions * index + j * kTileHeight; - auto column = reinterpret_cast(&weights_[offset]); - - for (IndexType k = 0; k < kNumRegs; ++k) - acc[k] = vec_add_16(acc[k], column[k]); - } - } - - for (IndexType k = 0; k < kNumRegs; ++k) - vec_store(&accTile[k], acc[k]); - } - } - #if defined(USE_MMX) - _mm_empty(); - #endif - - #else - for (Color perspective : { WHITE, BLACK }) { - - if (reset[perspective]) { - std::memcpy(accumulator.accumulation[perspective][i], biases_, - kHalfDimensions * sizeof(BiasType)); - } else { - std::memcpy(accumulator.accumulation[perspective][i], - prev_accumulator->accumulation[perspective][i], - kHalfDimensions * sizeof(BiasType)); - // Difference calculation for the deactivated features - for (const auto index : removed_indices[perspective]) { - const IndexType offset = kHalfDimensions * index; - - for (IndexType j = 0; j < kHalfDimensions; ++j) - accumulator.accumulation[perspective][i][j] -= weights_[offset + j]; - } - } - { // Difference calculation for the activated features - for (const auto index : added_indices[perspective]) { - const IndexType offset = kHalfDimensions * index; - - for (IndexType j = 0; j < kHalfDimensions; ++j) - accumulator.accumulation[perspective][i][j] += weights_[offset + j]; - } - } - } - #endif - - accumulator.computed_accumulation = true; } using BiasType = std::int16_t; diff --git a/src/position.cpp b/src/position.cpp index e6a760d2..b707293d 100644 --- a/src/position.cpp +++ b/src/position.cpp @@ -279,6 +279,8 @@ Position& Position::set(const string& fenStr, bool isChess960, StateInfo* si, Th chess960 = isChess960; thisThread = th; set_state(st); + st->accumulator.state[WHITE] = Eval::NNUE::INIT; + st->accumulator.state[BLACK] = Eval::NNUE::INIT; assert(pos_is_ok()); @@ -703,7 +705,8 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) { ++st->pliesFromNull; // Used by NNUE - st->accumulator.computed_accumulation = false; + st->accumulator.state[WHITE] = Eval::NNUE::EMPTY; + st->accumulator.state[BLACK] = Eval::NNUE::EMPTY; auto& dp = st->dirtyPiece; dp.dirty_num = 1; @@ -996,16 +999,16 @@ void Position::do_null_move(StateInfo& newSt) { assert(!checkers()); assert(&newSt != st); - if (Eval::useNNUE) - { - std::memcpy(&newSt, st, sizeof(StateInfo)); - } - else - std::memcpy(&newSt, st, offsetof(StateInfo, accumulator)); + std::memcpy(&newSt, st, offsetof(StateInfo, accumulator)); newSt.previous = st; st = &newSt; + st->dirtyPiece.dirty_num = 0; + st->dirtyPiece.piece[0] = NO_PIECE; // Avoid checks in UpdateAccumulator() + st->accumulator.state[WHITE] = Eval::NNUE::EMPTY; + st->accumulator.state[BLACK] = Eval::NNUE::EMPTY; + if (st->epSquare != SQ_NONE) { st->key ^= Zobrist::enpassant[file_of(st->epSquare)];