diff --git a/src/evaluate.cpp b/src/evaluate.cpp index 345925f6..fe6b83aa 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -60,8 +60,9 @@ Value Eval::evaluate(const Eval::NNUE::Networks& networks, int nnueComplexity; int v; - Value nnue = smallNet ? networks.small.evaluate(pos, &caches.small, true, &nnueComplexity, psqtOnly) - : networks.big.evaluate(pos, &caches.big, true, &nnueComplexity, false); + Value nnue = smallNet + ? networks.small.evaluate(pos, &caches.small, true, &nnueComplexity, psqtOnly) + : networks.big.evaluate(pos, &caches.big, true, &nnueComplexity, false); const auto adjustEval = [&](int optDiv, int nnueDiv, int npmDiv, int pawnCountConstant, int pawnCountMul, int npmConstant, int evalDiv, diff --git a/src/nnue/nnue_accumulator.h b/src/nnue/nnue_accumulator.h index dd313958..a2b3b989 100644 --- a/src/nnue/nnue_accumulator.h +++ b/src/nnue/nnue_accumulator.h @@ -102,7 +102,7 @@ struct AccumulatorCaches { small.clear(networks.small); } - Cache big; + Cache big; Cache small; }; diff --git a/src/nnue/nnue_feature_transformer.h b/src/nnue/nnue_feature_transformer.h index 60957ebe..6b3f78a9 100644 --- a/src/nnue/nnue_feature_transformer.h +++ b/src/nnue/nnue_feature_transformer.h @@ -660,8 +660,8 @@ class FeatureTransformer { bool psqtOnly) const { assert(cache != nullptr); - Square ksq = pos.square(Perspective); - auto& entry = (*cache)[ksq]; + Square ksq = pos.square(Perspective); + auto& entry = (*cache)[ksq]; FeatureSet::IndexList removed, added; if (entry.psqtOnly && !psqtOnly) @@ -712,16 +712,20 @@ class FeatureTransformer { for (IndexType k = 0; k < NumRegs; ++k) acc[k] = entryTile[k]; - for (int i = 0; i < int(added.size()); ++i) + int i0 = 0; + for (; i0 < int(std::min(removed.size(), added.size())); ++i0) { - IndexType index = added[i]; - const IndexType offset = HalfDimensions * index + j * TileHeight; - auto column = reinterpret_cast(&weights[offset]); + IndexType indexR = removed[i0]; + const IndexType offsetR = HalfDimensions * indexR + j * TileHeight; + auto columnR = reinterpret_cast(&weights[offsetR]); + IndexType indexA = added[i0]; + const IndexType offsetA = HalfDimensions * indexA + j * TileHeight; + auto columnA = reinterpret_cast(&weights[offsetA]); for (unsigned k = 0; k < NumRegs; ++k) - acc[k] = vec_add_16(acc[k], column[k]); + acc[k] = vec_add_16(vec_sub_16(acc[k], columnR[k]), columnA[k]); } - for (int i = 0; i < int(removed.size()); ++i) + for (int i = i0; i < int(removed.size()); ++i) { IndexType index = removed[i]; const IndexType offset = HalfDimensions * index + j * TileHeight; @@ -730,6 +734,15 @@ class FeatureTransformer { for (unsigned k = 0; k < NumRegs; ++k) acc[k] = vec_sub_16(acc[k], column[k]); } + for (int i = i0; i < int(added.size()); ++i) + { + IndexType index = added[i]; + const IndexType offset = HalfDimensions * index + j * TileHeight; + auto column = reinterpret_cast(&weights[offset]); + + for (unsigned k = 0; k < NumRegs; ++k) + acc[k] = vec_add_16(acc[k], column[k]); + } for (IndexType k = 0; k < NumRegs; k++) vec_store(&entryTile[k], acc[k]); @@ -742,15 +755,6 @@ class FeatureTransformer { for (std::size_t k = 0; k < NumPsqtRegs; ++k) psqt[k] = entryTilePsqt[k]; - for (int i = 0; i < int(added.size()); ++i) - { - IndexType index = added[i]; - const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight; - auto columnPsqt = reinterpret_cast(&psqtWeights[offset]); - - for (std::size_t k = 0; k < NumPsqtRegs; ++k) - psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]); - } for (int i = 0; i < int(removed.size()); ++i) { IndexType index = removed[i]; @@ -760,6 +764,15 @@ class FeatureTransformer { for (std::size_t k = 0; k < NumPsqtRegs; ++k) psqt[k] = vec_sub_psqt_32(psqt[k], columnPsqt[k]); } + for (int i = 0; i < int(added.size()); ++i) + { + IndexType index = added[i]; + const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight; + auto columnPsqt = reinterpret_cast(&psqtWeights[offset]); + + for (std::size_t k = 0; k < NumPsqtRegs; ++k) + psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]); + } for (std::size_t k = 0; k < NumPsqtRegs; ++k) vec_store_psqt(&entryTilePsqt[k], psqt[k]); @@ -767,18 +780,6 @@ class FeatureTransformer { #else - for (const auto index : added) - { - if (!psqtOnly) - { - const IndexType offset = HalfDimensions * index; - for (IndexType j = 0; j < HalfDimensions; ++j) - entry.accumulation[Perspective][j] += weights[offset + j]; - } - - for (std::size_t k = 0; k < PSQTBuckets; ++k) - entry.psqtAccumulation[Perspective][k] += psqtWeights[index * PSQTBuckets + k]; - } for (const auto index : removed) { if (!psqtOnly) @@ -791,6 +792,18 @@ class FeatureTransformer { for (std::size_t k = 0; k < PSQTBuckets; ++k) entry.psqtAccumulation[Perspective][k] -= psqtWeights[index * PSQTBuckets + k]; } + for (const auto index : added) + { + if (!psqtOnly) + { + const IndexType offset = HalfDimensions * index; + for (IndexType j = 0; j < HalfDimensions; ++j) + entry.accumulation[Perspective][j] += weights[offset + j]; + } + + for (std::size_t k = 0; k < PSQTBuckets; ++k) + entry.psqtAccumulation[Perspective][k] += psqtWeights[index * PSQTBuckets + k]; + } #endif diff --git a/src/nnue/nnue_misc.cpp b/src/nnue/nnue_misc.cpp index e92dcc71..21685d0f 100644 --- a/src/nnue/nnue_misc.cpp +++ b/src/nnue/nnue_misc.cpp @@ -48,7 +48,8 @@ void hint_common_parent_position(const Position& pos, int simpleEvalAbs = std::abs(simple_eval(pos, pos.side_to_move())); if (simpleEvalAbs > Eval::SmallNetThreshold) - networks.small.hint_common_access(pos, &caches.small, simpleEvalAbs > Eval::PsqtOnlyThreshold); + networks.small.hint_common_access(pos, &caches.small, + simpleEvalAbs > Eval::PsqtOnlyThreshold); else networks.big.hint_common_access(pos, &caches.big, false); }