diff --git a/src/nnue/features/feature_set.h b/src/nnue/features/feature_set.h index bd960929..87f21fff 100644 --- a/src/nnue/features/feature_set.h +++ b/src/nnue/features/feature_set.h @@ -21,48 +21,6 @@ namespace Eval::NNUE::Features { kValues = {{First, Remaining...}}; }; - template - constexpr std::array - CompileTimeList::kValues; - - template - struct CompileTimeList { - static constexpr bool Contains(T /*value*/) { - return false; - } - static constexpr std::array kValues = {{}}; - }; - - // Class template that adds to the beginning of the list - template - struct AppendToList; - - template - struct AppendToList, AnotherValue> { - using Result = CompileTimeList; - }; - - // Class template for adding to a sorted, unique list - template - struct InsertToSet; - - template - struct InsertToSet, AnotherValue> { - using Result = std::conditional_t< - CompileTimeList::Contains(AnotherValue), - CompileTimeList, - std::conditional_t<(AnotherValue , - typename AppendToList, AnotherValue>::Result, - First>::Result>>; - }; - - template - struct InsertToSet, Value> { - using Result = CompileTimeList; - }; - // Base class of feature set template class FeatureSetBase { @@ -91,22 +49,10 @@ namespace Eval::NNUE::Features { for (Color perspective : { WHITE, BLACK }) { reset[perspective] = false; switch (trigger) { - case TriggerEvent::kNone: - break; case TriggerEvent::kFriendKingMoved: reset[perspective] = dp.pieceId[0] == PIECE_ID_KING + perspective; break; - case TriggerEvent::kEnemyKingMoved: - reset[perspective] = - dp.pieceId[0] == PIECE_ID_KING + ~perspective; - break; - case TriggerEvent::kAnyKingMoved: - reset[perspective] = dp.pieceId[0] >= PIECE_ID_KING; - break; - case TriggerEvent::kAnyPieceMoved: - reset[perspective] = true; - break; default: assert(false); break; @@ -123,80 +69,6 @@ namespace Eval::NNUE::Features { } }; - // Class template that represents the feature set - // do internal processing in reverse order of template arguments in order to linearize the amount of calculation at runtime - template - class FeatureSet : - public FeatureSetBase< - FeatureSet> { - - private: - using Head = FirstFeatureType; - using Tail = FeatureSet; - - public: - // Hash value embedded in the evaluation function file - static constexpr std::uint32_t kHashValue = - Head::kHashValue ^ (Tail::kHashValue << 1) ^ (Tail::kHashValue >> 31); - // number of feature dimensions - static constexpr IndexType kDimensions = - Head::kDimensions + Tail::kDimensions; - // The maximum value of the number of indexes whose value is 1 at the same time among the feature values - static constexpr IndexType kMaxActiveDimensions = - Head::kMaxActiveDimensions + Tail::kMaxActiveDimensions; - // List of timings to perform all calculations instead of difference calculation - using SortedTriggerSet = typename InsertToSet::Result; - static constexpr auto kRefreshTriggers = SortedTriggerSet::kValues; - - // Get the feature quantity name - static std::string GetName() { - return std::string(Head::kName) + "+" + Tail::GetName(); - } - - private: - // Get a list of indices with a value of 1 among the features - template - static void CollectActiveIndices( - const Position& pos, const TriggerEvent trigger, const Color perspective, - IndexListType* const active) { - - Tail::CollectActiveIndices(pos, trigger, perspective, active); - if (Head::kRefreshTrigger == trigger) { - const auto start = active->size(); - Head::AppendActiveIndices(pos, perspective, active); - for (auto i = start; i < active->size(); ++i) { - (*active)[i] += Tail::kDimensions; - } - } - } - - // Get a list of indices whose values ​​have changed from the previous one in the feature quantity - template - static void CollectChangedIndices( - const Position& pos, const TriggerEvent trigger, const Color perspective, - IndexListType* const removed, IndexListType* const added) { - - Tail::CollectChangedIndices(pos, trigger, perspective, removed, added); - if (Head::kRefreshTrigger == trigger) { - const auto start_removed = removed->size(); - const auto start_added = added->size(); - Head::AppendChangedIndices(pos, perspective, removed, added); - for (auto i = start_removed; i < removed->size(); ++i) { - (*removed)[i] += Tail::kDimensions; - } - for (auto i = start_added; i < added->size(); ++i) { - (*added)[i] += Tail::kDimensions; - } - } - } - - // Make the base class and the class template that recursively uses itself a friend - friend class FeatureSetBase; - template - friend class FeatureSet; - }; - // Class template that represents the feature set // Specialization with one template argument template diff --git a/src/nnue/features/features_common.h b/src/nnue/features/features_common.h index 8d2c54e0..09587dd5 100644 --- a/src/nnue/features/features_common.h +++ b/src/nnue/features/features_common.h @@ -17,19 +17,12 @@ namespace Eval::NNUE::Features { // Type of timing to perform all calculations instead of difference calculation enum class TriggerEvent { - - kNone, // Calculate the difference whenever possible - kFriendKingMoved, // calculate all when own king moves - kEnemyKingMoved, // do all calculations when enemy king moves - kAnyKingMoved, // do all calculations if either king moves - kAnyPieceMoved, // always do all calculations + kFriendKingMoved // calculate all when own king moves }; // turn side or other side enum class Side { - - kFriend, // turn side - kEnemy, // opponent + kFriend // turn side }; } // namespace Eval::NNUE::Features diff --git a/src/nnue/features/half_kp.cpp b/src/nnue/features/half_kp.cpp index 382f0ba0..8c6c178f 100644 --- a/src/nnue/features/half_kp.cpp +++ b/src/nnue/features/half_kp.cpp @@ -70,6 +70,5 @@ namespace Eval::NNUE::Features { } template class HalfKP; - template class HalfKP; } // namespace Eval::NNUE::Features diff --git a/src/nnue/features/half_kp.h b/src/nnue/features/half_kp.h index 0e7670bf..80e494d3 100644 --- a/src/nnue/features/half_kp.h +++ b/src/nnue/features/half_kp.h @@ -26,9 +26,7 @@ namespace Eval::NNUE::Features { // The maximum value of the number of indexes whose value is 1 at the same time among the feature values static constexpr IndexType kMaxActiveDimensions = PIECE_ID_KING; // Timing of full calculation instead of difference calculation - static constexpr TriggerEvent kRefreshTrigger = - (AssociatedKing == Side::kFriend) ? - TriggerEvent::kFriendKingMoved : TriggerEvent::kEnemyKingMoved; + static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kFriendKingMoved; // Get a list of indices with a value of 1 among the features static void AppendActiveIndices(const Position& pos, Color perspective, diff --git a/src/nnue/nnue_feature_transformer.h b/src/nnue/nnue_feature_transformer.h index ceba1993..b3a9dcc1 100644 --- a/src/nnue/nnue_feature_transformer.h +++ b/src/nnue/nnue_feature_transformer.h @@ -121,12 +121,6 @@ namespace Eval::NNUE { (&reinterpret_cast( accumulation[perspectives[p]][0])[j * 2 + 1]); - for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) { - sum0 = _mm256_add_epi16(sum0, reinterpret_cast( - accumulation[perspectives[p]][i])[j * 2 + 0]); - sum1 = _mm256_add_epi16(sum1, reinterpret_cast( - accumulation[perspectives[p]][i])[j * 2 + 1]); - } #if defined(__MINGW32__) || defined(__MINGW64__) _mm256_storeu_si256 @@ -145,12 +139,6 @@ namespace Eval::NNUE { accumulation[perspectives[p]][0])[j * 2 + 0]); __m128i sum1 = _mm_load_si128(&reinterpret_cast( accumulation[perspectives[p]][0])[j * 2 + 1]); - for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) { - sum0 = _mm_add_epi16(sum0, reinterpret_cast( - accumulation[perspectives[p]][i])[j * 2 + 0]); - sum1 = _mm_add_epi16(sum1, reinterpret_cast( - accumulation[perspectives[p]][i])[j * 2 + 1]); - } const __m128i packedbytes = _mm_packs_epi16(sum0, sum1); _mm_store_si128(&out[j], @@ -169,19 +157,12 @@ namespace Eval::NNUE { for (IndexType j = 0; j < kNumChunks; ++j) { int16x8_t sum = reinterpret_cast( accumulation[perspectives[p]][0])[j]; - for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) { - sum = vaddq_s16(sum, reinterpret_cast( - accumulation[perspectives[p]][i])[j]); - } out[j] = vmax_s8(vqmovn_s16(sum), kZero); } #else for (IndexType j = 0; j < kHalfDimensions; ++j) { BiasType sum = accumulation[static_cast(perspectives[p])][0][j]; - for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) { - sum += accumulation[static_cast(perspectives[p])][i][j]; - } output[offset + j] = static_cast( std::max(0, std::min(127, sum))); } @@ -194,59 +175,53 @@ namespace Eval::NNUE { // Calculate cumulative value without using difference calculation void RefreshAccumulator(const Position& pos) const { auto& accumulator = pos.state()->accumulator; - for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) { - Features::IndexList active_indices[2]; - RawFeatures::AppendActiveIndices(pos, kRefreshTriggers[i], - active_indices); - for (Color perspective : { WHITE, BLACK }) { - if (i == 0) { - std::memcpy(accumulator.accumulation[perspective][i], biases_, - kHalfDimensions * sizeof(BiasType)); - } else { - std::memset(accumulator.accumulation[perspective][i], 0, - kHalfDimensions * sizeof(BiasType)); - } - for (const auto index : active_indices[perspective]) { - const IndexType offset = kHalfDimensions * index; + IndexType i = 0; + Features::IndexList active_indices[2]; + RawFeatures::AppendActiveIndices(pos, kRefreshTriggers[i], + active_indices); + for (Color perspective : { WHITE, BLACK }) { + std::memcpy(accumulator.accumulation[perspective][i], biases_, + kHalfDimensions * sizeof(BiasType)); + for (const auto index : active_indices[perspective]) { + const IndexType offset = kHalfDimensions * index; #if defined(USE_AVX2) - auto accumulation = reinterpret_cast<__m256i*>( - &accumulator.accumulation[perspective][i][0]); - auto column = reinterpret_cast(&weights_[offset]); - constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); - for (IndexType j = 0; j < kNumChunks; ++j) { + auto accumulation = reinterpret_cast<__m256i*>( + &accumulator.accumulation[perspective][i][0]); + auto column = reinterpret_cast(&weights_[offset]); + constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); + for (IndexType j = 0; j < kNumChunks; ++j) { #if defined(__MINGW32__) || defined(__MINGW64__) - _mm256_storeu_si256(&accumulation[j], _mm256_add_epi16(_mm256_loadu_si256(&accumulation[j]), column[j])); + _mm256_storeu_si256(&accumulation[j], _mm256_add_epi16(_mm256_loadu_si256(&accumulation[j]), column[j])); #else - accumulation[j] = _mm256_add_epi16(accumulation[j], column[j]); + accumulation[j] = _mm256_add_epi16(accumulation[j], column[j]); #endif - } + } #elif defined(USE_SSE2) - auto accumulation = reinterpret_cast<__m128i*>( - &accumulator.accumulation[perspective][i][0]); - auto column = reinterpret_cast(&weights_[offset]); - constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); - for (IndexType j = 0; j < kNumChunks; ++j) { - accumulation[j] = _mm_add_epi16(accumulation[j], column[j]); - } + auto accumulation = reinterpret_cast<__m128i*>( + &accumulator.accumulation[perspective][i][0]); + auto column = reinterpret_cast(&weights_[offset]); + constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); + for (IndexType j = 0; j < kNumChunks; ++j) { + accumulation[j] = _mm_add_epi16(accumulation[j], column[j]); + } #elif defined(IS_ARM) - auto accumulation = reinterpret_cast( - &accumulator.accumulation[perspective][i][0]); - auto column = reinterpret_cast(&weights_[offset]); - constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); - for (IndexType j = 0; j < kNumChunks; ++j) { - accumulation[j] = vaddq_s16(accumulation[j], column[j]); - } + auto accumulation = reinterpret_cast( + &accumulator.accumulation[perspective][i][0]); + auto column = reinterpret_cast(&weights_[offset]); + constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); + for (IndexType j = 0; j < kNumChunks; ++j) { + accumulation[j] = vaddq_s16(accumulation[j], column[j]); + } #else - for (IndexType j = 0; j < kHalfDimensions; ++j) { - accumulator.accumulation[perspective][i][j] += weights_[offset + j]; - } + for (IndexType j = 0; j < kHalfDimensions; ++j) { + accumulator.accumulation[perspective][i][j] += weights_[offset + j]; + } #endif - } } } @@ -258,101 +233,95 @@ namespace Eval::NNUE { void UpdateAccumulator(const Position& pos) const { const auto prev_accumulator = pos.state()->previous->accumulator; auto& accumulator = pos.state()->accumulator; - for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) { - Features::IndexList removed_indices[2], added_indices[2]; - bool reset[2]; - RawFeatures::AppendChangedIndices(pos, kRefreshTriggers[i], - removed_indices, added_indices, reset); - for (Color perspective : { WHITE, BLACK }) { + IndexType i = 0; + Features::IndexList removed_indices[2], added_indices[2]; + bool reset[2]; + RawFeatures::AppendChangedIndices(pos, kRefreshTriggers[i], + removed_indices, added_indices, reset); + for (Color perspective : { WHITE, BLACK }) { #if defined(USE_AVX2) - constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); - auto accumulation = reinterpret_cast<__m256i*>( - &accumulator.accumulation[perspective][i][0]); + constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); + auto accumulation = reinterpret_cast<__m256i*>( + &accumulator.accumulation[perspective][i][0]); #elif defined(USE_SSE2) - constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); - auto accumulation = reinterpret_cast<__m128i*>( - &accumulator.accumulation[perspective][i][0]); + constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); + auto accumulation = reinterpret_cast<__m128i*>( + &accumulator.accumulation[perspective][i][0]); #elif defined(IS_ARM) - constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); - auto accumulation = reinterpret_cast( - &accumulator.accumulation[perspective][i][0]); + constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); + auto accumulation = reinterpret_cast( + &accumulator.accumulation[perspective][i][0]); #endif - if (reset[perspective]) { - if (i == 0) { - std::memcpy(accumulator.accumulation[perspective][i], biases_, - kHalfDimensions * sizeof(BiasType)); - } else { - std::memset(accumulator.accumulation[perspective][i], 0, - kHalfDimensions * sizeof(BiasType)); - } - } else {// Difference calculation for the feature amount changed from 1 to 0 - std::memcpy(accumulator.accumulation[perspective][i], - prev_accumulator.accumulation[perspective][i], - kHalfDimensions * sizeof(BiasType)); - for (const auto index : removed_indices[perspective]) { - const IndexType offset = kHalfDimensions * index; + if (reset[perspective]) { + std::memcpy(accumulator.accumulation[perspective][i], biases_, + kHalfDimensions * sizeof(BiasType)); + } else {// Difference calculation for the feature amount changed from 1 to 0 + std::memcpy(accumulator.accumulation[perspective][i], + prev_accumulator.accumulation[perspective][i], + kHalfDimensions * sizeof(BiasType)); + for (const auto index : removed_indices[perspective]) { + const IndexType offset = kHalfDimensions * index; #if defined(USE_AVX2) - auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { - accumulation[j] = _mm256_sub_epi16(accumulation[j], column[j]); - } + auto column = reinterpret_cast(&weights_[offset]); + for (IndexType j = 0; j < kNumChunks; ++j) { + accumulation[j] = _mm256_sub_epi16(accumulation[j], column[j]); + } #elif defined(USE_SSE2) - auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { - accumulation[j] = _mm_sub_epi16(accumulation[j], column[j]); - } + auto column = reinterpret_cast(&weights_[offset]); + for (IndexType j = 0; j < kNumChunks; ++j) { + accumulation[j] = _mm_sub_epi16(accumulation[j], column[j]); + } #elif defined(IS_ARM) - auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { - accumulation[j] = vsubq_s16(accumulation[j], column[j]); - } + auto column = reinterpret_cast(&weights_[offset]); + for (IndexType j = 0; j < kNumChunks; ++j) { + accumulation[j] = vsubq_s16(accumulation[j], column[j]); + } #else - for (IndexType j = 0; j < kHalfDimensions; ++j) { - accumulator.accumulation[perspective][i][j] -= - weights_[offset + j]; - } + for (IndexType j = 0; j < kHalfDimensions; ++j) { + accumulator.accumulation[perspective][i][j] -= + weights_[offset + j]; + } #endif - } } - {// Difference calculation for features that changed from 0 to 1 - for (const auto index : added_indices[perspective]) { - const IndexType offset = kHalfDimensions * index; + } + {// Difference calculation for features that changed from 0 to 1 + for (const auto index : added_indices[perspective]) { + const IndexType offset = kHalfDimensions * index; #if defined(USE_AVX2) - auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { - accumulation[j] = _mm256_add_epi16(accumulation[j], column[j]); - } + auto column = reinterpret_cast(&weights_[offset]); + for (IndexType j = 0; j < kNumChunks; ++j) { + accumulation[j] = _mm256_add_epi16(accumulation[j], column[j]); + } #elif defined(USE_SSE2) - auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { - accumulation[j] = _mm_add_epi16(accumulation[j], column[j]); - } + auto column = reinterpret_cast(&weights_[offset]); + for (IndexType j = 0; j < kNumChunks; ++j) { + accumulation[j] = _mm_add_epi16(accumulation[j], column[j]); + } #elif defined(IS_ARM) - auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { - accumulation[j] = vaddq_s16(accumulation[j], column[j]); - } + auto column = reinterpret_cast(&weights_[offset]); + for (IndexType j = 0; j < kNumChunks; ++j) { + accumulation[j] = vaddq_s16(accumulation[j], column[j]); + } #else - for (IndexType j = 0; j < kHalfDimensions; ++j) { - accumulator.accumulation[perspective][i][j] += - weights_[offset + j]; - } + for (IndexType j = 0; j < kHalfDimensions; ++j) { + accumulator.accumulation[perspective][i][j] += + weights_[offset + j]; + } #endif - } } } }