mirror of
https://github.com/sockspls/badfish
synced 2025-04-29 16:23:09 +00:00
Cache small net w/ psqtOnly support
Caching the small net in the same way as the big net allows them to share the same code path and completely removes update_accumulator_refresh(). STC: https://tests.stockfishchess.org/tests/view/662bfb5ed46f72253dcfed85 LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 151712 W: 39252 L: 39158 D: 73302 Ptnml(0-2): 565, 17474, 39683, 17570, 564 closes https://github.com/official-stockfish/Stockfish/pull/5194 Bench: 1836777
This commit is contained in:
parent
bc45cbc820
commit
940a3a7383
6 changed files with 88 additions and 195 deletions
|
@ -60,7 +60,7 @@ Value Eval::evaluate(const Eval::NNUE::Networks& networks,
|
|||
int nnueComplexity;
|
||||
int v;
|
||||
|
||||
Value nnue = smallNet ? networks.small.evaluate(pos, nullptr, true, &nnueComplexity, psqtOnly)
|
||||
Value nnue = smallNet ? networks.small.evaluate(pos, &caches.small, true, &nnueComplexity, psqtOnly)
|
||||
: networks.big.evaluate(pos, &caches.big, true, &nnueComplexity, false);
|
||||
|
||||
const auto adjustEval = [&](int optDiv, int nnueDiv, int npmDiv, int pawnCountConstant,
|
||||
|
|
|
@ -263,8 +263,8 @@ void Network<Arch, Transformer>::verify(std::string evalfilePath) const {
|
|||
template<typename Arch, typename Transformer>
|
||||
void Network<Arch, Transformer>::hint_common_access(const Position& pos,
|
||||
AccumulatorCaches::Cache<FTDimensions>* cache,
|
||||
bool psqtOnl) const {
|
||||
featureTransformer->hint_common_access(pos, cache, psqtOnl);
|
||||
bool psqtOnly) const {
|
||||
featureTransformer->hint_common_access(pos, cache, psqtOnly);
|
||||
}
|
||||
|
||||
template<typename Arch, typename Transformer>
|
||||
|
|
|
@ -62,7 +62,7 @@ class Network {
|
|||
|
||||
void hint_common_access(const Position& pos,
|
||||
AccumulatorCaches::Cache<FTDimensions>* cache,
|
||||
bool psqtOnl) const;
|
||||
bool psqtOnly) const;
|
||||
|
||||
void verify(std::string evalfilePath) const;
|
||||
NnueEvalTrace trace_evaluate(const Position& pos,
|
||||
|
|
|
@ -63,6 +63,7 @@ struct AccumulatorCaches {
|
|||
PSQTWeightType psqtAccumulation[COLOR_NB][PSQTBuckets];
|
||||
Bitboard byColorBB[COLOR_NB][COLOR_NB];
|
||||
Bitboard byTypeBB[COLOR_NB][PIECE_TYPE_NB];
|
||||
bool psqtOnly;
|
||||
|
||||
// To initialize a refresh entry, we set all its bitboards empty,
|
||||
// so we put the biases in the accumulation, without any weights on top
|
||||
|
@ -70,6 +71,7 @@ struct AccumulatorCaches {
|
|||
|
||||
std::memset(byColorBB, 0, sizeof(byColorBB));
|
||||
std::memset(byTypeBB, 0, sizeof(byTypeBB));
|
||||
psqtOnly = false;
|
||||
|
||||
std::memcpy(accumulation[WHITE], biases, Size * sizeof(BiasType));
|
||||
std::memcpy(accumulation[BLACK], biases, Size * sizeof(BiasType));
|
||||
|
@ -97,11 +99,11 @@ struct AccumulatorCaches {
|
|||
template<typename Networks>
|
||||
void clear(const Networks& networks) {
|
||||
big.clear(networks.big);
|
||||
small.clear(networks.small);
|
||||
}
|
||||
|
||||
// When adding a new cache for a network, i.e. the smallnet
|
||||
// the appropriate condition must be added to FeatureTransformer::update_accumulator_refresh.
|
||||
Cache<TransformedFeatureDimensionsBig> big;
|
||||
Cache<TransformedFeatureDimensionsSmall> small;
|
||||
};
|
||||
|
||||
} // namespace Stockfish::Eval::NNUE
|
||||
|
|
|
@ -656,75 +656,84 @@ class FeatureTransformer {
|
|||
|
||||
template<Color Perspective>
|
||||
void update_accumulator_refresh_cache(const Position& pos,
|
||||
AccumulatorCaches::Cache<HalfDimensions>* cache) const {
|
||||
AccumulatorCaches::Cache<HalfDimensions>* cache,
|
||||
bool psqtOnly) const {
|
||||
assert(cache != nullptr);
|
||||
|
||||
Square ksq = pos.square<KING>(Perspective);
|
||||
|
||||
auto& entry = (*cache)[ksq];
|
||||
|
||||
auto& accumulator = pos.state()->*accPtr;
|
||||
accumulator.computed[Perspective] = true;
|
||||
accumulator.computedPSQT[Perspective] = true;
|
||||
|
||||
FeatureSet::IndexList removed, added;
|
||||
for (Color c : {WHITE, BLACK})
|
||||
{
|
||||
for (PieceType pt = PAWN; pt <= KING; ++pt)
|
||||
{
|
||||
const Piece piece = make_piece(c, pt);
|
||||
const Bitboard oldBB =
|
||||
entry.byColorBB[Perspective][c] & entry.byTypeBB[Perspective][pt];
|
||||
const Bitboard newBB = pos.pieces(c, pt);
|
||||
Bitboard toRemove = oldBB & ~newBB;
|
||||
Bitboard toAdd = newBB & ~oldBB;
|
||||
|
||||
while (toRemove)
|
||||
if (entry.psqtOnly && !psqtOnly)
|
||||
{
|
||||
entry.clear(biases);
|
||||
FeatureSet::append_active_indices<Perspective>(pos, added);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (Color c : {WHITE, BLACK})
|
||||
{
|
||||
for (PieceType pt = PAWN; pt <= KING; ++pt)
|
||||
{
|
||||
Square sq = pop_lsb(toRemove);
|
||||
removed.push_back(FeatureSet::make_index<Perspective>(sq, piece, ksq));
|
||||
}
|
||||
while (toAdd)
|
||||
{
|
||||
Square sq = pop_lsb(toAdd);
|
||||
added.push_back(FeatureSet::make_index<Perspective>(sq, piece, ksq));
|
||||
const Piece piece = make_piece(c, pt);
|
||||
const Bitboard oldBB =
|
||||
entry.byColorBB[Perspective][c] & entry.byTypeBB[Perspective][pt];
|
||||
const Bitboard newBB = pos.pieces(c, pt);
|
||||
Bitboard toRemove = oldBB & ~newBB;
|
||||
Bitboard toAdd = newBB & ~oldBB;
|
||||
|
||||
while (toRemove)
|
||||
{
|
||||
Square sq = pop_lsb(toRemove);
|
||||
removed.push_back(FeatureSet::make_index<Perspective>(sq, piece, ksq));
|
||||
}
|
||||
while (toAdd)
|
||||
{
|
||||
Square sq = pop_lsb(toAdd);
|
||||
added.push_back(FeatureSet::make_index<Perspective>(sq, piece, ksq));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto& accumulator = pos.state()->*accPtr;
|
||||
accumulator.computed[Perspective] = !psqtOnly;
|
||||
accumulator.computedPSQT[Perspective] = true;
|
||||
|
||||
#ifdef VECTOR
|
||||
vec_t acc[NumRegs];
|
||||
psqt_vec_t psqt[NumPsqtRegs];
|
||||
|
||||
for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j)
|
||||
{
|
||||
auto entryTile =
|
||||
reinterpret_cast<vec_t*>(&entry.accumulation[Perspective][j * TileHeight]);
|
||||
for (IndexType k = 0; k < NumRegs; ++k)
|
||||
acc[k] = entryTile[k];
|
||||
|
||||
for (int i = 0; i < int(added.size()); ++i)
|
||||
if (!psqtOnly)
|
||||
for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j)
|
||||
{
|
||||
IndexType index = added[i];
|
||||
const IndexType offset = HalfDimensions * index + j * TileHeight;
|
||||
auto column = reinterpret_cast<const vec_t*>(&weights[offset]);
|
||||
auto entryTile =
|
||||
reinterpret_cast<vec_t*>(&entry.accumulation[Perspective][j * TileHeight]);
|
||||
for (IndexType k = 0; k < NumRegs; ++k)
|
||||
acc[k] = entryTile[k];
|
||||
|
||||
for (unsigned k = 0; k < NumRegs; ++k)
|
||||
acc[k] = vec_add_16(acc[k], column[k]);
|
||||
for (int i = 0; i < int(added.size()); ++i)
|
||||
{
|
||||
IndexType index = added[i];
|
||||
const IndexType offset = HalfDimensions * index + j * TileHeight;
|
||||
auto column = reinterpret_cast<const vec_t*>(&weights[offset]);
|
||||
|
||||
for (unsigned k = 0; k < NumRegs; ++k)
|
||||
acc[k] = vec_add_16(acc[k], column[k]);
|
||||
}
|
||||
for (int i = 0; i < int(removed.size()); ++i)
|
||||
{
|
||||
IndexType index = removed[i];
|
||||
const IndexType offset = HalfDimensions * index + j * TileHeight;
|
||||
auto column = reinterpret_cast<const vec_t*>(&weights[offset]);
|
||||
|
||||
for (unsigned k = 0; k < NumRegs; ++k)
|
||||
acc[k] = vec_sub_16(acc[k], column[k]);
|
||||
}
|
||||
|
||||
for (IndexType k = 0; k < NumRegs; k++)
|
||||
vec_store(&entryTile[k], acc[k]);
|
||||
}
|
||||
for (int i = 0; i < int(removed.size()); ++i)
|
||||
{
|
||||
IndexType index = removed[i];
|
||||
const IndexType offset = HalfDimensions * index + j * TileHeight;
|
||||
auto column = reinterpret_cast<const vec_t*>(&weights[offset]);
|
||||
|
||||
for (unsigned k = 0; k < NumRegs; ++k)
|
||||
acc[k] = vec_sub_16(acc[k], column[k]);
|
||||
}
|
||||
|
||||
for (IndexType k = 0; k < NumRegs; k++)
|
||||
vec_store(&entryTile[k], acc[k]);
|
||||
}
|
||||
|
||||
for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j)
|
||||
{
|
||||
|
@ -760,18 +769,24 @@ class FeatureTransformer {
|
|||
|
||||
for (const auto index : added)
|
||||
{
|
||||
const IndexType offset = HalfDimensions * index;
|
||||
for (IndexType j = 0; j < HalfDimensions; ++j)
|
||||
entry.accumulation[Perspective][j] += weights[offset + j];
|
||||
if (!psqtOnly)
|
||||
{
|
||||
const IndexType offset = HalfDimensions * index;
|
||||
for (IndexType j = 0; j < HalfDimensions; ++j)
|
||||
entry.accumulation[Perspective][j] += weights[offset + j];
|
||||
}
|
||||
|
||||
for (std::size_t k = 0; k < PSQTBuckets; ++k)
|
||||
entry.psqtAccumulation[Perspective][k] += psqtWeights[index * PSQTBuckets + k];
|
||||
}
|
||||
for (const auto index : removed)
|
||||
{
|
||||
const IndexType offset = HalfDimensions * index;
|
||||
for (IndexType j = 0; j < HalfDimensions; ++j)
|
||||
entry.accumulation[Perspective][j] -= weights[offset + j];
|
||||
if (!psqtOnly)
|
||||
{
|
||||
const IndexType offset = HalfDimensions * index;
|
||||
for (IndexType j = 0; j < HalfDimensions; ++j)
|
||||
entry.accumulation[Perspective][j] -= weights[offset + j];
|
||||
}
|
||||
|
||||
for (std::size_t k = 0; k < PSQTBuckets; ++k)
|
||||
entry.psqtAccumulation[Perspective][k] -= psqtWeights[index * PSQTBuckets + k];
|
||||
|
@ -782,144 +797,20 @@ class FeatureTransformer {
|
|||
// The accumulator of the refresh entry has been updated.
|
||||
// Now copy its content to the actual accumulator we were refreshing
|
||||
|
||||
if (!psqtOnly)
|
||||
std::memcpy(accumulator.accumulation[Perspective], entry.accumulation[Perspective],
|
||||
sizeof(BiasType) * HalfDimensions);
|
||||
|
||||
std::memcpy(accumulator.psqtAccumulation[Perspective], entry.psqtAccumulation[Perspective],
|
||||
sizeof(int32_t) * PSQTBuckets);
|
||||
|
||||
std::memcpy(accumulator.accumulation[Perspective], entry.accumulation[Perspective],
|
||||
sizeof(BiasType) * HalfDimensions);
|
||||
|
||||
for (Color c : {WHITE, BLACK})
|
||||
entry.byColorBB[Perspective][c] = pos.pieces(c);
|
||||
|
||||
for (PieceType pt = PAWN; pt <= KING; ++pt)
|
||||
entry.byTypeBB[Perspective][pt] = pos.pieces(pt);
|
||||
}
|
||||
|
||||
template<Color Perspective>
|
||||
void
|
||||
update_accumulator_refresh(const Position& pos,
|
||||
[[maybe_unused]] AccumulatorCaches::Cache<HalfDimensions>* cache,
|
||||
bool psqtOnly) const {
|
||||
|
||||
// When we are refreshing the accumulator of the big net,
|
||||
// redirect to the version of refresh that uses the refresh table.
|
||||
// Using the cache for the small net is not beneficial.
|
||||
if constexpr (HalfDimensions == Eval::NNUE::TransformedFeatureDimensionsBig)
|
||||
{
|
||||
update_accumulator_refresh_cache<Perspective>(pos, cache);
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef VECTOR
|
||||
// Gcc-10.2 unnecessarily spills AVX2 registers if this array
|
||||
// is defined in the VECTOR code below, once in each branch
|
||||
vec_t acc[NumRegs];
|
||||
psqt_vec_t psqt[NumPsqtRegs];
|
||||
#endif
|
||||
|
||||
// Refresh the accumulator
|
||||
// Could be extracted to a separate function because it's done in 2 places,
|
||||
// but it's unclear if compilers would correctly handle register allocation.
|
||||
auto& accumulator = pos.state()->*accPtr;
|
||||
accumulator.computed[Perspective] = !psqtOnly;
|
||||
accumulator.computedPSQT[Perspective] = true;
|
||||
FeatureSet::IndexList active;
|
||||
FeatureSet::append_active_indices<Perspective>(pos, active);
|
||||
|
||||
#ifdef VECTOR
|
||||
if (!psqtOnly)
|
||||
for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j)
|
||||
{
|
||||
auto biasesTile = reinterpret_cast<const vec_t*>(&biases[j * TileHeight]);
|
||||
for (IndexType k = 0; k < NumRegs; ++k)
|
||||
acc[k] = biasesTile[k];
|
||||
|
||||
int i = 0;
|
||||
for (; i < int(active.size()) - 1; i += 2)
|
||||
{
|
||||
IndexType index0 = active[i];
|
||||
IndexType index1 = active[i + 1];
|
||||
const IndexType offset0 = HalfDimensions * index0 + j * TileHeight;
|
||||
const IndexType offset1 = HalfDimensions * index1 + j * TileHeight;
|
||||
auto column0 = reinterpret_cast<const vec_t*>(&weights[offset0]);
|
||||
auto column1 = reinterpret_cast<const vec_t*>(&weights[offset1]);
|
||||
|
||||
for (unsigned k = 0; k < NumRegs; ++k)
|
||||
acc[k] = vec_add_16(acc[k], vec_add_16(column0[k], column1[k]));
|
||||
}
|
||||
for (; i < int(active.size()); ++i)
|
||||
{
|
||||
IndexType index = active[i];
|
||||
const IndexType offset = HalfDimensions * index + j * TileHeight;
|
||||
auto column = reinterpret_cast<const vec_t*>(&weights[offset]);
|
||||
|
||||
for (unsigned k = 0; k < NumRegs; ++k)
|
||||
acc[k] = vec_add_16(acc[k], column[k]);
|
||||
}
|
||||
|
||||
auto accTile =
|
||||
reinterpret_cast<vec_t*>(&accumulator.accumulation[Perspective][j * TileHeight]);
|
||||
for (unsigned k = 0; k < NumRegs; k++)
|
||||
vec_store(&accTile[k], acc[k]);
|
||||
}
|
||||
|
||||
for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j)
|
||||
{
|
||||
for (std::size_t k = 0; k < NumPsqtRegs; ++k)
|
||||
psqt[k] = vec_zero_psqt();
|
||||
|
||||
int i = 0;
|
||||
for (; i < int(active.size()) - 1; i += 2)
|
||||
{
|
||||
IndexType index0 = active[i];
|
||||
IndexType index1 = active[i + 1];
|
||||
const IndexType offset0 = PSQTBuckets * index0 + j * PsqtTileHeight;
|
||||
const IndexType offset1 = PSQTBuckets * index1 + j * PsqtTileHeight;
|
||||
auto columnPsqt0 = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offset0]);
|
||||
auto columnPsqt1 = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offset1]);
|
||||
|
||||
for (std::size_t k = 0; k < NumPsqtRegs; ++k)
|
||||
psqt[k] =
|
||||
vec_add_psqt_32(psqt[k], vec_add_psqt_32(columnPsqt0[k], columnPsqt1[k]));
|
||||
}
|
||||
for (; i < int(active.size()); ++i)
|
||||
{
|
||||
IndexType index = active[i];
|
||||
const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight;
|
||||
auto columnPsqt = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offset]);
|
||||
|
||||
for (std::size_t k = 0; k < NumPsqtRegs; ++k)
|
||||
psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]);
|
||||
}
|
||||
|
||||
auto accTilePsqt = reinterpret_cast<psqt_vec_t*>(
|
||||
&accumulator.psqtAccumulation[Perspective][j * PsqtTileHeight]);
|
||||
for (std::size_t k = 0; k < NumPsqtRegs; ++k)
|
||||
vec_store_psqt(&accTilePsqt[k], psqt[k]);
|
||||
}
|
||||
|
||||
#else
|
||||
if (!psqtOnly)
|
||||
std::memcpy(accumulator.accumulation[Perspective], biases,
|
||||
HalfDimensions * sizeof(BiasType));
|
||||
|
||||
for (std::size_t k = 0; k < PSQTBuckets; ++k)
|
||||
accumulator.psqtAccumulation[Perspective][k] = 0;
|
||||
|
||||
for (const auto index : active)
|
||||
{
|
||||
if (!psqtOnly)
|
||||
{
|
||||
const IndexType offset = HalfDimensions * index;
|
||||
for (IndexType j = 0; j < HalfDimensions; ++j)
|
||||
accumulator.accumulation[Perspective][j] += weights[offset + j];
|
||||
}
|
||||
|
||||
for (std::size_t k = 0; k < PSQTBuckets; ++k)
|
||||
accumulator.psqtAccumulation[Perspective][k] +=
|
||||
psqtWeights[index * PSQTBuckets + k];
|
||||
}
|
||||
#endif
|
||||
entry.psqtOnly = psqtOnly;
|
||||
}
|
||||
|
||||
template<Color Perspective>
|
||||
|
@ -948,7 +839,7 @@ class FeatureTransformer {
|
|||
psqtOnly);
|
||||
}
|
||||
else
|
||||
update_accumulator_refresh<Perspective>(pos, cache, psqtOnly);
|
||||
update_accumulator_refresh_cache<Perspective>(pos, cache, psqtOnly);
|
||||
}
|
||||
|
||||
template<Color Perspective>
|
||||
|
@ -976,7 +867,7 @@ class FeatureTransformer {
|
|||
psqtOnly);
|
||||
}
|
||||
else
|
||||
update_accumulator_refresh<Perspective>(pos, cache, psqtOnly);
|
||||
update_accumulator_refresh_cache<Perspective>(pos, cache, psqtOnly);
|
||||
}
|
||||
|
||||
template<IndexType Size>
|
||||
|
|
|
@ -48,7 +48,7 @@ void hint_common_parent_position(const Position& pos,
|
|||
|
||||
int simpleEvalAbs = std::abs(simple_eval(pos, pos.side_to_move()));
|
||||
if (simpleEvalAbs > Eval::SmallNetThreshold)
|
||||
networks.small.hint_common_access(pos, nullptr, simpleEvalAbs > Eval::PsqtOnlyThreshold);
|
||||
networks.small.hint_common_access(pos, &caches.small, simpleEvalAbs > Eval::PsqtOnlyThreshold);
|
||||
else
|
||||
networks.big.hint_common_access(pos, &caches.big, false);
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue