mirror of
https://github.com/sockspls/badfish
synced 2025-04-30 08:43:09 +00:00
Cache small net w/ psqtOnly support
Caching the small net in the same way as the big net allows them to share the same code path and completely removes update_accumulator_refresh(). STC: https://tests.stockfishchess.org/tests/view/662bfb5ed46f72253dcfed85 LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 151712 W: 39252 L: 39158 D: 73302 Ptnml(0-2): 565, 17474, 39683, 17570, 564 closes https://github.com/official-stockfish/Stockfish/pull/5194 Bench: 1836777
This commit is contained in:
parent
bc45cbc820
commit
940a3a7383
6 changed files with 88 additions and 195 deletions
|
@ -60,7 +60,7 @@ Value Eval::evaluate(const Eval::NNUE::Networks& networks,
|
||||||
int nnueComplexity;
|
int nnueComplexity;
|
||||||
int v;
|
int v;
|
||||||
|
|
||||||
Value nnue = smallNet ? networks.small.evaluate(pos, nullptr, true, &nnueComplexity, psqtOnly)
|
Value nnue = smallNet ? networks.small.evaluate(pos, &caches.small, true, &nnueComplexity, psqtOnly)
|
||||||
: networks.big.evaluate(pos, &caches.big, true, &nnueComplexity, false);
|
: networks.big.evaluate(pos, &caches.big, true, &nnueComplexity, false);
|
||||||
|
|
||||||
const auto adjustEval = [&](int optDiv, int nnueDiv, int npmDiv, int pawnCountConstant,
|
const auto adjustEval = [&](int optDiv, int nnueDiv, int npmDiv, int pawnCountConstant,
|
||||||
|
|
|
@ -263,8 +263,8 @@ void Network<Arch, Transformer>::verify(std::string evalfilePath) const {
|
||||||
template<typename Arch, typename Transformer>
|
template<typename Arch, typename Transformer>
|
||||||
void Network<Arch, Transformer>::hint_common_access(const Position& pos,
|
void Network<Arch, Transformer>::hint_common_access(const Position& pos,
|
||||||
AccumulatorCaches::Cache<FTDimensions>* cache,
|
AccumulatorCaches::Cache<FTDimensions>* cache,
|
||||||
bool psqtOnl) const {
|
bool psqtOnly) const {
|
||||||
featureTransformer->hint_common_access(pos, cache, psqtOnl);
|
featureTransformer->hint_common_access(pos, cache, psqtOnly);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Arch, typename Transformer>
|
template<typename Arch, typename Transformer>
|
||||||
|
|
|
@ -62,7 +62,7 @@ class Network {
|
||||||
|
|
||||||
void hint_common_access(const Position& pos,
|
void hint_common_access(const Position& pos,
|
||||||
AccumulatorCaches::Cache<FTDimensions>* cache,
|
AccumulatorCaches::Cache<FTDimensions>* cache,
|
||||||
bool psqtOnl) const;
|
bool psqtOnly) const;
|
||||||
|
|
||||||
void verify(std::string evalfilePath) const;
|
void verify(std::string evalfilePath) const;
|
||||||
NnueEvalTrace trace_evaluate(const Position& pos,
|
NnueEvalTrace trace_evaluate(const Position& pos,
|
||||||
|
|
|
@ -63,6 +63,7 @@ struct AccumulatorCaches {
|
||||||
PSQTWeightType psqtAccumulation[COLOR_NB][PSQTBuckets];
|
PSQTWeightType psqtAccumulation[COLOR_NB][PSQTBuckets];
|
||||||
Bitboard byColorBB[COLOR_NB][COLOR_NB];
|
Bitboard byColorBB[COLOR_NB][COLOR_NB];
|
||||||
Bitboard byTypeBB[COLOR_NB][PIECE_TYPE_NB];
|
Bitboard byTypeBB[COLOR_NB][PIECE_TYPE_NB];
|
||||||
|
bool psqtOnly;
|
||||||
|
|
||||||
// To initialize a refresh entry, we set all its bitboards empty,
|
// To initialize a refresh entry, we set all its bitboards empty,
|
||||||
// so we put the biases in the accumulation, without any weights on top
|
// so we put the biases in the accumulation, without any weights on top
|
||||||
|
@ -70,6 +71,7 @@ struct AccumulatorCaches {
|
||||||
|
|
||||||
std::memset(byColorBB, 0, sizeof(byColorBB));
|
std::memset(byColorBB, 0, sizeof(byColorBB));
|
||||||
std::memset(byTypeBB, 0, sizeof(byTypeBB));
|
std::memset(byTypeBB, 0, sizeof(byTypeBB));
|
||||||
|
psqtOnly = false;
|
||||||
|
|
||||||
std::memcpy(accumulation[WHITE], biases, Size * sizeof(BiasType));
|
std::memcpy(accumulation[WHITE], biases, Size * sizeof(BiasType));
|
||||||
std::memcpy(accumulation[BLACK], biases, Size * sizeof(BiasType));
|
std::memcpy(accumulation[BLACK], biases, Size * sizeof(BiasType));
|
||||||
|
@ -97,11 +99,11 @@ struct AccumulatorCaches {
|
||||||
template<typename Networks>
|
template<typename Networks>
|
||||||
void clear(const Networks& networks) {
|
void clear(const Networks& networks) {
|
||||||
big.clear(networks.big);
|
big.clear(networks.big);
|
||||||
|
small.clear(networks.small);
|
||||||
}
|
}
|
||||||
|
|
||||||
// When adding a new cache for a network, i.e. the smallnet
|
|
||||||
// the appropriate condition must be added to FeatureTransformer::update_accumulator_refresh.
|
|
||||||
Cache<TransformedFeatureDimensionsBig> big;
|
Cache<TransformedFeatureDimensionsBig> big;
|
||||||
|
Cache<TransformedFeatureDimensionsSmall> small;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Stockfish::Eval::NNUE
|
} // namespace Stockfish::Eval::NNUE
|
||||||
|
|
|
@ -656,18 +656,21 @@ class FeatureTransformer {
|
||||||
|
|
||||||
template<Color Perspective>
|
template<Color Perspective>
|
||||||
void update_accumulator_refresh_cache(const Position& pos,
|
void update_accumulator_refresh_cache(const Position& pos,
|
||||||
AccumulatorCaches::Cache<HalfDimensions>* cache) const {
|
AccumulatorCaches::Cache<HalfDimensions>* cache,
|
||||||
|
bool psqtOnly) const {
|
||||||
assert(cache != nullptr);
|
assert(cache != nullptr);
|
||||||
|
|
||||||
Square ksq = pos.square<KING>(Perspective);
|
Square ksq = pos.square<KING>(Perspective);
|
||||||
|
|
||||||
auto& entry = (*cache)[ksq];
|
auto& entry = (*cache)[ksq];
|
||||||
|
|
||||||
auto& accumulator = pos.state()->*accPtr;
|
|
||||||
accumulator.computed[Perspective] = true;
|
|
||||||
accumulator.computedPSQT[Perspective] = true;
|
|
||||||
|
|
||||||
FeatureSet::IndexList removed, added;
|
FeatureSet::IndexList removed, added;
|
||||||
|
|
||||||
|
if (entry.psqtOnly && !psqtOnly)
|
||||||
|
{
|
||||||
|
entry.clear(biases);
|
||||||
|
FeatureSet::append_active_indices<Perspective>(pos, added);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
for (Color c : {WHITE, BLACK})
|
for (Color c : {WHITE, BLACK})
|
||||||
{
|
{
|
||||||
for (PieceType pt = PAWN; pt <= KING; ++pt)
|
for (PieceType pt = PAWN; pt <= KING; ++pt)
|
||||||
|
@ -691,11 +694,17 @@ class FeatureTransformer {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
auto& accumulator = pos.state()->*accPtr;
|
||||||
|
accumulator.computed[Perspective] = !psqtOnly;
|
||||||
|
accumulator.computedPSQT[Perspective] = true;
|
||||||
|
|
||||||
#ifdef VECTOR
|
#ifdef VECTOR
|
||||||
vec_t acc[NumRegs];
|
vec_t acc[NumRegs];
|
||||||
psqt_vec_t psqt[NumPsqtRegs];
|
psqt_vec_t psqt[NumPsqtRegs];
|
||||||
|
|
||||||
|
if (!psqtOnly)
|
||||||
for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j)
|
for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j)
|
||||||
{
|
{
|
||||||
auto entryTile =
|
auto entryTile =
|
||||||
|
@ -759,19 +768,25 @@ class FeatureTransformer {
|
||||||
#else
|
#else
|
||||||
|
|
||||||
for (const auto index : added)
|
for (const auto index : added)
|
||||||
|
{
|
||||||
|
if (!psqtOnly)
|
||||||
{
|
{
|
||||||
const IndexType offset = HalfDimensions * index;
|
const IndexType offset = HalfDimensions * index;
|
||||||
for (IndexType j = 0; j < HalfDimensions; ++j)
|
for (IndexType j = 0; j < HalfDimensions; ++j)
|
||||||
entry.accumulation[Perspective][j] += weights[offset + j];
|
entry.accumulation[Perspective][j] += weights[offset + j];
|
||||||
|
}
|
||||||
|
|
||||||
for (std::size_t k = 0; k < PSQTBuckets; ++k)
|
for (std::size_t k = 0; k < PSQTBuckets; ++k)
|
||||||
entry.psqtAccumulation[Perspective][k] += psqtWeights[index * PSQTBuckets + k];
|
entry.psqtAccumulation[Perspective][k] += psqtWeights[index * PSQTBuckets + k];
|
||||||
}
|
}
|
||||||
for (const auto index : removed)
|
for (const auto index : removed)
|
||||||
|
{
|
||||||
|
if (!psqtOnly)
|
||||||
{
|
{
|
||||||
const IndexType offset = HalfDimensions * index;
|
const IndexType offset = HalfDimensions * index;
|
||||||
for (IndexType j = 0; j < HalfDimensions; ++j)
|
for (IndexType j = 0; j < HalfDimensions; ++j)
|
||||||
entry.accumulation[Perspective][j] -= weights[offset + j];
|
entry.accumulation[Perspective][j] -= weights[offset + j];
|
||||||
|
}
|
||||||
|
|
||||||
for (std::size_t k = 0; k < PSQTBuckets; ++k)
|
for (std::size_t k = 0; k < PSQTBuckets; ++k)
|
||||||
entry.psqtAccumulation[Perspective][k] -= psqtWeights[index * PSQTBuckets + k];
|
entry.psqtAccumulation[Perspective][k] -= psqtWeights[index * PSQTBuckets + k];
|
||||||
|
@ -782,144 +797,20 @@ class FeatureTransformer {
|
||||||
// The accumulator of the refresh entry has been updated.
|
// The accumulator of the refresh entry has been updated.
|
||||||
// Now copy its content to the actual accumulator we were refreshing
|
// Now copy its content to the actual accumulator we were refreshing
|
||||||
|
|
||||||
std::memcpy(accumulator.psqtAccumulation[Perspective], entry.psqtAccumulation[Perspective],
|
if (!psqtOnly)
|
||||||
sizeof(int32_t) * PSQTBuckets);
|
|
||||||
|
|
||||||
std::memcpy(accumulator.accumulation[Perspective], entry.accumulation[Perspective],
|
std::memcpy(accumulator.accumulation[Perspective], entry.accumulation[Perspective],
|
||||||
sizeof(BiasType) * HalfDimensions);
|
sizeof(BiasType) * HalfDimensions);
|
||||||
|
|
||||||
|
std::memcpy(accumulator.psqtAccumulation[Perspective], entry.psqtAccumulation[Perspective],
|
||||||
|
sizeof(int32_t) * PSQTBuckets);
|
||||||
|
|
||||||
for (Color c : {WHITE, BLACK})
|
for (Color c : {WHITE, BLACK})
|
||||||
entry.byColorBB[Perspective][c] = pos.pieces(c);
|
entry.byColorBB[Perspective][c] = pos.pieces(c);
|
||||||
|
|
||||||
for (PieceType pt = PAWN; pt <= KING; ++pt)
|
for (PieceType pt = PAWN; pt <= KING; ++pt)
|
||||||
entry.byTypeBB[Perspective][pt] = pos.pieces(pt);
|
entry.byTypeBB[Perspective][pt] = pos.pieces(pt);
|
||||||
}
|
|
||||||
|
|
||||||
template<Color Perspective>
|
entry.psqtOnly = psqtOnly;
|
||||||
void
|
|
||||||
update_accumulator_refresh(const Position& pos,
|
|
||||||
[[maybe_unused]] AccumulatorCaches::Cache<HalfDimensions>* cache,
|
|
||||||
bool psqtOnly) const {
|
|
||||||
|
|
||||||
// When we are refreshing the accumulator of the big net,
|
|
||||||
// redirect to the version of refresh that uses the refresh table.
|
|
||||||
// Using the cache for the small net is not beneficial.
|
|
||||||
if constexpr (HalfDimensions == Eval::NNUE::TransformedFeatureDimensionsBig)
|
|
||||||
{
|
|
||||||
update_accumulator_refresh_cache<Perspective>(pos, cache);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef VECTOR
|
|
||||||
// Gcc-10.2 unnecessarily spills AVX2 registers if this array
|
|
||||||
// is defined in the VECTOR code below, once in each branch
|
|
||||||
vec_t acc[NumRegs];
|
|
||||||
psqt_vec_t psqt[NumPsqtRegs];
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Refresh the accumulator
|
|
||||||
// Could be extracted to a separate function because it's done in 2 places,
|
|
||||||
// but it's unclear if compilers would correctly handle register allocation.
|
|
||||||
auto& accumulator = pos.state()->*accPtr;
|
|
||||||
accumulator.computed[Perspective] = !psqtOnly;
|
|
||||||
accumulator.computedPSQT[Perspective] = true;
|
|
||||||
FeatureSet::IndexList active;
|
|
||||||
FeatureSet::append_active_indices<Perspective>(pos, active);
|
|
||||||
|
|
||||||
#ifdef VECTOR
|
|
||||||
if (!psqtOnly)
|
|
||||||
for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j)
|
|
||||||
{
|
|
||||||
auto biasesTile = reinterpret_cast<const vec_t*>(&biases[j * TileHeight]);
|
|
||||||
for (IndexType k = 0; k < NumRegs; ++k)
|
|
||||||
acc[k] = biasesTile[k];
|
|
||||||
|
|
||||||
int i = 0;
|
|
||||||
for (; i < int(active.size()) - 1; i += 2)
|
|
||||||
{
|
|
||||||
IndexType index0 = active[i];
|
|
||||||
IndexType index1 = active[i + 1];
|
|
||||||
const IndexType offset0 = HalfDimensions * index0 + j * TileHeight;
|
|
||||||
const IndexType offset1 = HalfDimensions * index1 + j * TileHeight;
|
|
||||||
auto column0 = reinterpret_cast<const vec_t*>(&weights[offset0]);
|
|
||||||
auto column1 = reinterpret_cast<const vec_t*>(&weights[offset1]);
|
|
||||||
|
|
||||||
for (unsigned k = 0; k < NumRegs; ++k)
|
|
||||||
acc[k] = vec_add_16(acc[k], vec_add_16(column0[k], column1[k]));
|
|
||||||
}
|
|
||||||
for (; i < int(active.size()); ++i)
|
|
||||||
{
|
|
||||||
IndexType index = active[i];
|
|
||||||
const IndexType offset = HalfDimensions * index + j * TileHeight;
|
|
||||||
auto column = reinterpret_cast<const vec_t*>(&weights[offset]);
|
|
||||||
|
|
||||||
for (unsigned k = 0; k < NumRegs; ++k)
|
|
||||||
acc[k] = vec_add_16(acc[k], column[k]);
|
|
||||||
}
|
|
||||||
|
|
||||||
auto accTile =
|
|
||||||
reinterpret_cast<vec_t*>(&accumulator.accumulation[Perspective][j * TileHeight]);
|
|
||||||
for (unsigned k = 0; k < NumRegs; k++)
|
|
||||||
vec_store(&accTile[k], acc[k]);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j)
|
|
||||||
{
|
|
||||||
for (std::size_t k = 0; k < NumPsqtRegs; ++k)
|
|
||||||
psqt[k] = vec_zero_psqt();
|
|
||||||
|
|
||||||
int i = 0;
|
|
||||||
for (; i < int(active.size()) - 1; i += 2)
|
|
||||||
{
|
|
||||||
IndexType index0 = active[i];
|
|
||||||
IndexType index1 = active[i + 1];
|
|
||||||
const IndexType offset0 = PSQTBuckets * index0 + j * PsqtTileHeight;
|
|
||||||
const IndexType offset1 = PSQTBuckets * index1 + j * PsqtTileHeight;
|
|
||||||
auto columnPsqt0 = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offset0]);
|
|
||||||
auto columnPsqt1 = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offset1]);
|
|
||||||
|
|
||||||
for (std::size_t k = 0; k < NumPsqtRegs; ++k)
|
|
||||||
psqt[k] =
|
|
||||||
vec_add_psqt_32(psqt[k], vec_add_psqt_32(columnPsqt0[k], columnPsqt1[k]));
|
|
||||||
}
|
|
||||||
for (; i < int(active.size()); ++i)
|
|
||||||
{
|
|
||||||
IndexType index = active[i];
|
|
||||||
const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight;
|
|
||||||
auto columnPsqt = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offset]);
|
|
||||||
|
|
||||||
for (std::size_t k = 0; k < NumPsqtRegs; ++k)
|
|
||||||
psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]);
|
|
||||||
}
|
|
||||||
|
|
||||||
auto accTilePsqt = reinterpret_cast<psqt_vec_t*>(
|
|
||||||
&accumulator.psqtAccumulation[Perspective][j * PsqtTileHeight]);
|
|
||||||
for (std::size_t k = 0; k < NumPsqtRegs; ++k)
|
|
||||||
vec_store_psqt(&accTilePsqt[k], psqt[k]);
|
|
||||||
}
|
|
||||||
|
|
||||||
#else
|
|
||||||
if (!psqtOnly)
|
|
||||||
std::memcpy(accumulator.accumulation[Perspective], biases,
|
|
||||||
HalfDimensions * sizeof(BiasType));
|
|
||||||
|
|
||||||
for (std::size_t k = 0; k < PSQTBuckets; ++k)
|
|
||||||
accumulator.psqtAccumulation[Perspective][k] = 0;
|
|
||||||
|
|
||||||
for (const auto index : active)
|
|
||||||
{
|
|
||||||
if (!psqtOnly)
|
|
||||||
{
|
|
||||||
const IndexType offset = HalfDimensions * index;
|
|
||||||
for (IndexType j = 0; j < HalfDimensions; ++j)
|
|
||||||
accumulator.accumulation[Perspective][j] += weights[offset + j];
|
|
||||||
}
|
|
||||||
|
|
||||||
for (std::size_t k = 0; k < PSQTBuckets; ++k)
|
|
||||||
accumulator.psqtAccumulation[Perspective][k] +=
|
|
||||||
psqtWeights[index * PSQTBuckets + k];
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<Color Perspective>
|
template<Color Perspective>
|
||||||
|
@ -948,7 +839,7 @@ class FeatureTransformer {
|
||||||
psqtOnly);
|
psqtOnly);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
update_accumulator_refresh<Perspective>(pos, cache, psqtOnly);
|
update_accumulator_refresh_cache<Perspective>(pos, cache, psqtOnly);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<Color Perspective>
|
template<Color Perspective>
|
||||||
|
@ -976,7 +867,7 @@ class FeatureTransformer {
|
||||||
psqtOnly);
|
psqtOnly);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
update_accumulator_refresh<Perspective>(pos, cache, psqtOnly);
|
update_accumulator_refresh_cache<Perspective>(pos, cache, psqtOnly);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<IndexType Size>
|
template<IndexType Size>
|
||||||
|
|
|
@ -48,7 +48,7 @@ void hint_common_parent_position(const Position& pos,
|
||||||
|
|
||||||
int simpleEvalAbs = std::abs(simple_eval(pos, pos.side_to_move()));
|
int simpleEvalAbs = std::abs(simple_eval(pos, pos.side_to_move()));
|
||||||
if (simpleEvalAbs > Eval::SmallNetThreshold)
|
if (simpleEvalAbs > Eval::SmallNetThreshold)
|
||||||
networks.small.hint_common_access(pos, nullptr, simpleEvalAbs > Eval::PsqtOnlyThreshold);
|
networks.small.hint_common_access(pos, &caches.small, simpleEvalAbs > Eval::PsqtOnlyThreshold);
|
||||||
else
|
else
|
||||||
networks.big.hint_common_access(pos, &caches.big, false);
|
networks.big.hint_common_access(pos, &caches.big, false);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Reference in a new issue