mirror of
https://github.com/sockspls/badfish
synced 2025-04-29 16:23:09 +00:00
Implement accumulator refresh table
For each thread persist an accumulator cache for the network, where each cache contains multiple entries for each of the possible king squares. When the accumulator needs to be refreshed, the cached entry is used to more efficiently update the accumulator, instead of rebuilding it from scratch. This idea, was first described by Luecx (author of Koivisto) and is commonly referred to as "Finny Tables". When the accumulator needs to be refreshed, instead of filling it with biases and adding every piece from scratch, we... 1. Take the `AccumulatorRefreshEntry` associated with the new king bucket 2. Calculate the features to activate and deactivate (from differences between bitboards in the entry and bitboards of the actual position) 3. Apply the updates on the refresh entry 4. Copy the content of the refresh entry accumulator to the accumulator we were refreshing 5. Copy the bitboards from the position to the refresh entry, to match the newly updated accumulator Results at STC: https://tests.stockfishchess.org/tests/view/662301573fe04ce4cefc1386 (first version) https://tests.stockfishchess.org/tests/view/6627fa063fe04ce4cefc6560 (final) Non-Regression between first and final: https://tests.stockfishchess.org/tests/view/662801e33fe04ce4cefc660a STC SMP: https://tests.stockfishchess.org/tests/view/662808133fe04ce4cefc667c closes https://github.com/official-stockfish/Stockfish/pull/5183 No functional change
This commit is contained in:
parent
fcba524793
commit
49ef4c935a
12 changed files with 349 additions and 80 deletions
|
@ -25,12 +25,14 @@
|
|||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <memory>
|
||||
|
||||
#include "nnue/network.h"
|
||||
#include "nnue/nnue_misc.h"
|
||||
#include "position.h"
|
||||
#include "types.h"
|
||||
#include "uci.h"
|
||||
#include "nnue/nnue_accumulator.h"
|
||||
|
||||
namespace Stockfish {
|
||||
|
||||
|
@ -45,7 +47,10 @@ int Eval::simple_eval(const Position& pos, Color c) {
|
|||
|
||||
// Evaluate is the evaluator for the outer world. It returns a static evaluation
|
||||
// of the position from the point of view of the side to move.
|
||||
Value Eval::evaluate(const Eval::NNUE::Networks& networks, const Position& pos, int optimism) {
|
||||
Value Eval::evaluate(const Eval::NNUE::Networks& networks,
|
||||
const Position& pos,
|
||||
Eval::NNUE::AccumulatorCaches& caches,
|
||||
int optimism) {
|
||||
|
||||
assert(!pos.checkers());
|
||||
|
||||
|
@ -55,8 +60,8 @@ Value Eval::evaluate(const Eval::NNUE::Networks& networks, const Position& pos,
|
|||
int nnueComplexity;
|
||||
int v;
|
||||
|
||||
Value nnue = smallNet ? networks.small.evaluate(pos, true, &nnueComplexity, psqtOnly)
|
||||
: networks.big.evaluate(pos, true, &nnueComplexity, false);
|
||||
Value nnue = smallNet ? networks.small.evaluate(pos, nullptr, true, &nnueComplexity, psqtOnly)
|
||||
: networks.big.evaluate(pos, &caches.big, true, &nnueComplexity, false);
|
||||
|
||||
const auto adjustEval = [&](int optDiv, int nnueDiv, int npmDiv, int pawnCountConstant,
|
||||
int pawnCountMul, int npmConstant, int evalDiv,
|
||||
|
@ -94,20 +99,22 @@ Value Eval::evaluate(const Eval::NNUE::Networks& networks, const Position& pos,
|
|||
// Trace scores are from white's point of view
|
||||
std::string Eval::trace(Position& pos, const Eval::NNUE::Networks& networks) {
|
||||
|
||||
auto caches = std::make_unique<Eval::NNUE::AccumulatorCaches>();
|
||||
|
||||
if (pos.checkers())
|
||||
return "Final evaluation: none (in check)";
|
||||
|
||||
std::stringstream ss;
|
||||
ss << std::showpoint << std::noshowpos << std::fixed << std::setprecision(2);
|
||||
ss << '\n' << NNUE::trace(pos, networks) << '\n';
|
||||
ss << '\n' << NNUE::trace(pos, networks, *caches) << '\n';
|
||||
|
||||
ss << std::showpoint << std::showpos << std::fixed << std::setprecision(2) << std::setw(15);
|
||||
|
||||
Value v = networks.big.evaluate(pos, false);
|
||||
Value v = networks.big.evaluate(pos, &caches->big, false);
|
||||
v = pos.side_to_move() == WHITE ? v : -v;
|
||||
ss << "NNUE evaluation " << 0.01 * UCIEngine::to_cp(v, pos) << " (white side)\n";
|
||||
|
||||
v = evaluate(networks, pos, VALUE_ZERO);
|
||||
v = evaluate(networks, pos, *caches, VALUE_ZERO);
|
||||
v = pos.side_to_move() == WHITE ? v : -v;
|
||||
ss << "Final evaluation " << 0.01 * UCIEngine::to_cp(v, pos) << " (white side)";
|
||||
ss << " [with scaled NNUE, ...]";
|
||||
|
|
|
@ -40,14 +40,16 @@ constexpr inline int SmallNetThreshold = 1274, PsqtOnlyThreshold = 2389;
|
|||
|
||||
namespace NNUE {
|
||||
struct Networks;
|
||||
struct AccumulatorCaches;
|
||||
}
|
||||
|
||||
std::string trace(Position& pos, const Eval::NNUE::Networks& networks);
|
||||
|
||||
int simple_eval(const Position& pos, Color c);
|
||||
Value evaluate(const NNUE::Networks& networks, const Position& pos, int optimism);
|
||||
|
||||
|
||||
Value evaluate(const NNUE::Networks& networks,
|
||||
const Position& pos,
|
||||
Eval::NNUE::AccumulatorCaches& caches,
|
||||
int optimism);
|
||||
} // namespace Eval
|
||||
|
||||
} // namespace Stockfish
|
||||
|
|
|
@ -23,7 +23,7 @@
|
|||
#include "../../bitboard.h"
|
||||
#include "../../position.h"
|
||||
#include "../../types.h"
|
||||
#include "../nnue_common.h"
|
||||
#include "../nnue_accumulator.h"
|
||||
|
||||
namespace Stockfish::Eval::NNUE::Features {
|
||||
|
||||
|
@ -49,6 +49,8 @@ void HalfKAv2_hm::append_active_indices(const Position& pos, IndexList& active)
|
|||
// Explicit template instantiations
|
||||
template void HalfKAv2_hm::append_active_indices<WHITE>(const Position& pos, IndexList& active);
|
||||
template void HalfKAv2_hm::append_active_indices<BLACK>(const Position& pos, IndexList& active);
|
||||
template IndexType HalfKAv2_hm::make_index<WHITE>(Square s, Piece pc, Square ksq);
|
||||
template IndexType HalfKAv2_hm::make_index<BLACK>(Square s, Piece pc, Square ksq);
|
||||
|
||||
// Get a list of indices for recently changed features
|
||||
template<Color Perspective>
|
||||
|
|
|
@ -63,10 +63,6 @@ class HalfKAv2_hm {
|
|||
{PS_NONE, PS_B_PAWN, PS_B_KNIGHT, PS_B_BISHOP, PS_B_ROOK, PS_B_QUEEN, PS_KING, PS_NONE,
|
||||
PS_NONE, PS_W_PAWN, PS_W_KNIGHT, PS_W_BISHOP, PS_W_ROOK, PS_W_QUEEN, PS_KING, PS_NONE}};
|
||||
|
||||
// Index of a feature for a given king position and another piece on some square
|
||||
template<Color Perspective>
|
||||
static IndexType make_index(Square s, Piece pc, Square ksq);
|
||||
|
||||
public:
|
||||
// Feature name
|
||||
static constexpr const char* Name = "HalfKAv2_hm(Friend)";
|
||||
|
@ -126,6 +122,10 @@ class HalfKAv2_hm {
|
|||
static constexpr IndexType MaxActiveDimensions = 32;
|
||||
using IndexList = ValueList<IndexType, MaxActiveDimensions>;
|
||||
|
||||
// Index of a feature for a given king position and another piece on some square
|
||||
template<Color Perspective>
|
||||
static IndexType make_index(Square s, Piece pc, Square ksq);
|
||||
|
||||
// Get a list of indices for active features
|
||||
template<Color Perspective>
|
||||
static void append_active_indices(const Position& pos, IndexList& active);
|
||||
|
|
|
@ -186,10 +186,11 @@ bool Network<Arch, Transformer>::save(const std::optional<std::string>& filename
|
|||
|
||||
|
||||
template<typename Arch, typename Transformer>
|
||||
Value Network<Arch, Transformer>::evaluate(const Position& pos,
|
||||
bool adjusted,
|
||||
int* complexity,
|
||||
bool psqtOnly) const {
|
||||
Value Network<Arch, Transformer>::evaluate(const Position& pos,
|
||||
AccumulatorCaches::Cache<FTDimensions>* cache,
|
||||
bool adjusted,
|
||||
int* complexity,
|
||||
bool psqtOnly) const {
|
||||
// We manually align the arrays on the stack because with gcc < 9.3
|
||||
// overaligning stack variables with alignas() doesn't work correctly.
|
||||
|
||||
|
@ -197,20 +198,21 @@ Value Network<Arch, Transformer>::evaluate(const Position& pos,
|
|||
constexpr int delta = 24;
|
||||
|
||||
#if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN)
|
||||
TransformedFeatureType transformedFeaturesUnaligned
|
||||
[FeatureTransformer<Arch::TransformedFeatureDimensions, nullptr>::BufferSize
|
||||
+ alignment / sizeof(TransformedFeatureType)];
|
||||
TransformedFeatureType
|
||||
transformedFeaturesUnaligned[FeatureTransformer<FTDimensions, nullptr>::BufferSize
|
||||
+ alignment / sizeof(TransformedFeatureType)];
|
||||
|
||||
auto* transformedFeatures = align_ptr_up<alignment>(&transformedFeaturesUnaligned[0]);
|
||||
#else
|
||||
alignas(alignment) TransformedFeatureType transformedFeatures
|
||||
[FeatureTransformer<Arch::TransformedFeatureDimensions, nullptr>::BufferSize];
|
||||
alignas(alignment) TransformedFeatureType
|
||||
transformedFeatures[FeatureTransformer<FTDimensions, nullptr>::BufferSize];
|
||||
#endif
|
||||
|
||||
ASSERT_ALIGNED(transformedFeatures, alignment);
|
||||
|
||||
const int bucket = (pos.count<ALL_PIECES>() - 1) / 4;
|
||||
const auto psqt = featureTransformer->transform(pos, transformedFeatures, bucket, psqtOnly);
|
||||
const auto psqt =
|
||||
featureTransformer->transform(pos, cache, transformedFeatures, bucket, psqtOnly);
|
||||
const auto positional = !psqtOnly ? (network[bucket]->propagate(transformedFeatures)) : 0;
|
||||
|
||||
if (complexity)
|
||||
|
@ -255,26 +257,29 @@ void Network<Arch, Transformer>::verify(std::string evalfilePath) const {
|
|||
|
||||
|
||||
template<typename Arch, typename Transformer>
|
||||
void Network<Arch, Transformer>::hint_common_access(const Position& pos, bool psqtOnl) const {
|
||||
featureTransformer->hint_common_access(pos, psqtOnl);
|
||||
void Network<Arch, Transformer>::hint_common_access(const Position& pos,
|
||||
AccumulatorCaches::Cache<FTDimensions>* cache,
|
||||
bool psqtOnl) const {
|
||||
featureTransformer->hint_common_access(pos, cache, psqtOnl);
|
||||
}
|
||||
|
||||
|
||||
template<typename Arch, typename Transformer>
|
||||
NnueEvalTrace Network<Arch, Transformer>::trace_evaluate(const Position& pos) const {
|
||||
NnueEvalTrace
|
||||
Network<Arch, Transformer>::trace_evaluate(const Position& pos,
|
||||
AccumulatorCaches::Cache<FTDimensions>* cache) const {
|
||||
// We manually align the arrays on the stack because with gcc < 9.3
|
||||
// overaligning stack variables with alignas() doesn't work correctly.
|
||||
constexpr uint64_t alignment = CacheLineSize;
|
||||
|
||||
#if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN)
|
||||
TransformedFeatureType transformedFeaturesUnaligned
|
||||
[FeatureTransformer<Arch::TransformedFeatureDimensions, nullptr>::BufferSize
|
||||
+ alignment / sizeof(TransformedFeatureType)];
|
||||
TransformedFeatureType
|
||||
transformedFeaturesUnaligned[FeatureTransformer<FTDimensions, nullptr>::BufferSize
|
||||
+ alignment / sizeof(TransformedFeatureType)];
|
||||
|
||||
auto* transformedFeatures = align_ptr_up<alignment>(&transformedFeaturesUnaligned[0]);
|
||||
#else
|
||||
alignas(alignment) TransformedFeatureType transformedFeatures
|
||||
[FeatureTransformer<Arch::TransformedFeatureDimensions, nullptr>::BufferSize];
|
||||
alignas(alignment) TransformedFeatureType
|
||||
transformedFeatures[FeatureTransformer<FTDimensions, nullptr>::BufferSize];
|
||||
#endif
|
||||
|
||||
ASSERT_ALIGNED(transformedFeatures, alignment);
|
||||
|
@ -284,7 +289,7 @@ NnueEvalTrace Network<Arch, Transformer>::trace_evaluate(const Position& pos) co
|
|||
for (IndexType bucket = 0; bucket < LayerStacks; ++bucket)
|
||||
{
|
||||
const auto materialist =
|
||||
featureTransformer->transform(pos, transformedFeatures, bucket, false);
|
||||
featureTransformer->transform(pos, cache, transformedFeatures, bucket, false);
|
||||
const auto positional = network[bucket]->propagate(transformedFeatures);
|
||||
|
||||
t.psqt[bucket] = static_cast<Value>(materialist / OutputScale);
|
||||
|
|
|
@ -31,10 +31,10 @@
|
|||
#include "nnue_architecture.h"
|
||||
#include "nnue_feature_transformer.h"
|
||||
#include "nnue_misc.h"
|
||||
#include "nnue_accumulator.h"
|
||||
|
||||
namespace Stockfish::Eval::NNUE {
|
||||
|
||||
|
||||
enum class EmbeddedNNUEType {
|
||||
BIG,
|
||||
SMALL,
|
||||
|
@ -43,6 +43,8 @@ enum class EmbeddedNNUEType {
|
|||
|
||||
template<typename Arch, typename Transformer>
|
||||
class Network {
|
||||
static constexpr IndexType FTDimensions = Arch::TransformedFeatureDimensions;
|
||||
|
||||
public:
|
||||
Network(EvalFile file, EmbeddedNNUEType type) :
|
||||
evalFile(file),
|
||||
|
@ -51,17 +53,20 @@ class Network {
|
|||
void load(const std::string& rootDirectory, std::string evalfilePath);
|
||||
bool save(const std::optional<std::string>& filename) const;
|
||||
|
||||
|
||||
Value evaluate(const Position& pos,
|
||||
bool adjusted = false,
|
||||
int* complexity = nullptr,
|
||||
bool psqtOnly = false) const;
|
||||
Value evaluate(const Position& pos,
|
||||
AccumulatorCaches::Cache<FTDimensions>* cache,
|
||||
bool adjusted = false,
|
||||
int* complexity = nullptr,
|
||||
bool psqtOnly = false) const;
|
||||
|
||||
|
||||
void hint_common_access(const Position& pos, bool psqtOnl) const;
|
||||
void hint_common_access(const Position& pos,
|
||||
AccumulatorCaches::Cache<FTDimensions>* cache,
|
||||
bool psqtOnl) const;
|
||||
|
||||
void verify(std::string evalfilePath) const;
|
||||
NnueEvalTrace trace_evaluate(const Position& pos) const;
|
||||
NnueEvalTrace trace_evaluate(const Position& pos,
|
||||
AccumulatorCaches::Cache<FTDimensions>* cache) const;
|
||||
|
||||
private:
|
||||
void load_user_net(const std::string&, const std::string&);
|
||||
|
@ -89,6 +94,9 @@ class Network {
|
|||
|
||||
// Hash value of evaluation function structure
|
||||
static constexpr std::uint32_t hash = Transformer::get_hash_value() ^ Arch::get_hash_value();
|
||||
|
||||
template<IndexType Size>
|
||||
friend struct AccumulatorCaches::Cache;
|
||||
};
|
||||
|
||||
// Definitions of the network types
|
||||
|
|
|
@ -28,13 +28,75 @@
|
|||
|
||||
namespace Stockfish::Eval::NNUE {
|
||||
|
||||
using BiasType = std::int16_t;
|
||||
using PSQTWeightType = std::int32_t;
|
||||
using IndexType = std::uint32_t;
|
||||
|
||||
// Class that holds the result of affine transformation of input features
|
||||
template<IndexType Size>
|
||||
struct alignas(CacheLineSize) Accumulator {
|
||||
std::int16_t accumulation[2][Size];
|
||||
std::int32_t psqtAccumulation[2][PSQTBuckets];
|
||||
bool computed[2];
|
||||
bool computedPSQT[2];
|
||||
std::int16_t accumulation[COLOR_NB][Size];
|
||||
std::int32_t psqtAccumulation[COLOR_NB][PSQTBuckets];
|
||||
bool computed[COLOR_NB];
|
||||
bool computedPSQT[COLOR_NB];
|
||||
};
|
||||
|
||||
|
||||
// AccumulatorCaches struct provides per-thread accumulator caches, where each
|
||||
// cache contains multiple entries for each of the possible king squares.
|
||||
// When the accumulator needs to be refreshed, the cached entry is used to more
|
||||
// efficiently update the accumulator, instead of rebuilding it from scratch.
|
||||
// This idea, was first described by Luecx (author of Koivisto) and
|
||||
// is commonly referred to as "Finny Tables".
|
||||
struct AccumulatorCaches {
|
||||
|
||||
template<IndexType Size>
|
||||
struct alignas(CacheLineSize) Cache {
|
||||
|
||||
struct alignas(CacheLineSize) Entry {
|
||||
BiasType accumulation[COLOR_NB][Size];
|
||||
PSQTWeightType psqtAccumulation[COLOR_NB][PSQTBuckets];
|
||||
Bitboard byColorBB[COLOR_NB][COLOR_NB];
|
||||
Bitboard byTypeBB[COLOR_NB][PIECE_TYPE_NB];
|
||||
|
||||
// To initialize a refresh entry, we set all its bitboards empty,
|
||||
// so we put the biases in the accumulation, without any weights on top
|
||||
void clear(const BiasType* biases) {
|
||||
|
||||
std::memset(byColorBB, 0, sizeof(byColorBB));
|
||||
std::memset(byTypeBB, 0, sizeof(byTypeBB));
|
||||
|
||||
std::memcpy(accumulation[WHITE], biases, Size * sizeof(BiasType));
|
||||
std::memcpy(accumulation[BLACK], biases, Size * sizeof(BiasType));
|
||||
|
||||
std::memset(psqtAccumulation, 0, sizeof(psqtAccumulation));
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Network>
|
||||
void clear(const Network& network) {
|
||||
for (auto& entry : entries)
|
||||
entry.clear(network.featureTransformer->biases);
|
||||
}
|
||||
|
||||
void clear(const BiasType* biases) {
|
||||
for (auto& entry : entries)
|
||||
entry.clear(biases);
|
||||
}
|
||||
|
||||
Entry& operator[](Square sq) { return entries[sq]; }
|
||||
|
||||
std::array<Entry, SQUARE_NB> entries;
|
||||
};
|
||||
|
||||
template<typename Networks>
|
||||
void clear(const Networks& networks) {
|
||||
big.clear(networks.big);
|
||||
}
|
||||
|
||||
// When adding a new cache for a network, i.e. the smallnet
|
||||
// the appropriate condition must be added to FeatureTransformer::update_accumulator_refresh.
|
||||
Cache<TransformedFeatureDimensionsBig> big;
|
||||
};
|
||||
|
||||
} // namespace Stockfish::Eval::NNUE
|
||||
|
|
|
@ -195,10 +195,10 @@ template<IndexType TransformedFeatureDimensions,
|
|||
Accumulator<TransformedFeatureDimensions> StateInfo::*accPtr>
|
||||
class FeatureTransformer {
|
||||
|
||||
private:
|
||||
// Number of output dimensions for one side
|
||||
static constexpr IndexType HalfDimensions = TransformedFeatureDimensions;
|
||||
|
||||
private:
|
||||
#ifdef VECTOR
|
||||
static constexpr int NumRegs =
|
||||
BestRegisterCount<vec_t, WeightType, TransformedFeatureDimensions, NumRegistersSIMD>();
|
||||
|
@ -306,10 +306,13 @@ class FeatureTransformer {
|
|||
}
|
||||
|
||||
// Convert input features
|
||||
std::int32_t
|
||||
transform(const Position& pos, OutputType* output, int bucket, bool psqtOnly) const {
|
||||
update_accumulator<WHITE>(pos, psqtOnly);
|
||||
update_accumulator<BLACK>(pos, psqtOnly);
|
||||
std::int32_t transform(const Position& pos,
|
||||
AccumulatorCaches::Cache<HalfDimensions>* cache,
|
||||
OutputType* output,
|
||||
int bucket,
|
||||
bool psqtOnly) const {
|
||||
update_accumulator<WHITE>(pos, cache, psqtOnly);
|
||||
update_accumulator<BLACK>(pos, cache, psqtOnly);
|
||||
|
||||
const Color perspectives[2] = {pos.side_to_move(), ~pos.side_to_move()};
|
||||
const auto& psqtAccumulation = (pos.state()->*accPtr).psqtAccumulation;
|
||||
|
@ -371,9 +374,11 @@ class FeatureTransformer {
|
|||
return psqt;
|
||||
} // end of function transform()
|
||||
|
||||
void hint_common_access(const Position& pos, bool psqtOnly) const {
|
||||
hint_common_access_for_perspective<WHITE>(pos, psqtOnly);
|
||||
hint_common_access_for_perspective<BLACK>(pos, psqtOnly);
|
||||
void hint_common_access(const Position& pos,
|
||||
AccumulatorCaches::Cache<HalfDimensions>* cache,
|
||||
bool psqtOnly) const {
|
||||
hint_common_access_for_perspective<WHITE>(pos, cache, psqtOnly);
|
||||
hint_common_access_for_perspective<BLACK>(pos, cache, psqtOnly);
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -650,7 +655,161 @@ class FeatureTransformer {
|
|||
}
|
||||
|
||||
template<Color Perspective>
|
||||
void update_accumulator_refresh(const Position& pos, bool psqtOnly) const {
|
||||
void update_accumulator_refresh_cache(const Position& pos,
|
||||
AccumulatorCaches::Cache<HalfDimensions>* cache) const {
|
||||
assert(cache != nullptr);
|
||||
|
||||
Square ksq = pos.square<KING>(Perspective);
|
||||
|
||||
auto& entry = (*cache)[ksq];
|
||||
|
||||
auto& accumulator = pos.state()->*accPtr;
|
||||
accumulator.computed[Perspective] = true;
|
||||
accumulator.computedPSQT[Perspective] = true;
|
||||
|
||||
FeatureSet::IndexList removed, added;
|
||||
for (Color c : {WHITE, BLACK})
|
||||
{
|
||||
for (PieceType pt = PAWN; pt <= KING; ++pt)
|
||||
{
|
||||
const Piece piece = make_piece(c, pt);
|
||||
const Bitboard oldBB =
|
||||
entry.byColorBB[Perspective][c] & entry.byTypeBB[Perspective][pt];
|
||||
const Bitboard newBB = pos.pieces(c, pt);
|
||||
Bitboard toRemove = oldBB & ~newBB;
|
||||
Bitboard toAdd = newBB & ~oldBB;
|
||||
|
||||
while (toRemove)
|
||||
{
|
||||
Square sq = pop_lsb(toRemove);
|
||||
removed.push_back(FeatureSet::make_index<Perspective>(sq, piece, ksq));
|
||||
}
|
||||
while (toAdd)
|
||||
{
|
||||
Square sq = pop_lsb(toAdd);
|
||||
added.push_back(FeatureSet::make_index<Perspective>(sq, piece, ksq));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef VECTOR
|
||||
vec_t acc[NumRegs];
|
||||
psqt_vec_t psqt[NumPsqtRegs];
|
||||
|
||||
for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j)
|
||||
{
|
||||
auto entryTile =
|
||||
reinterpret_cast<vec_t*>(&entry.accumulation[Perspective][j * TileHeight]);
|
||||
for (IndexType k = 0; k < NumRegs; ++k)
|
||||
acc[k] = entryTile[k];
|
||||
|
||||
for (int i = 0; i < int(added.size()); ++i)
|
||||
{
|
||||
IndexType index = added[i];
|
||||
const IndexType offset = HalfDimensions * index + j * TileHeight;
|
||||
auto column = reinterpret_cast<const vec_t*>(&weights[offset]);
|
||||
|
||||
for (unsigned k = 0; k < NumRegs; ++k)
|
||||
acc[k] = vec_add_16(acc[k], column[k]);
|
||||
}
|
||||
for (int i = 0; i < int(removed.size()); ++i)
|
||||
{
|
||||
IndexType index = removed[i];
|
||||
const IndexType offset = HalfDimensions * index + j * TileHeight;
|
||||
auto column = reinterpret_cast<const vec_t*>(&weights[offset]);
|
||||
|
||||
for (unsigned k = 0; k < NumRegs; ++k)
|
||||
acc[k] = vec_sub_16(acc[k], column[k]);
|
||||
}
|
||||
|
||||
for (IndexType k = 0; k < NumRegs; k++)
|
||||
vec_store(&entryTile[k], acc[k]);
|
||||
}
|
||||
|
||||
for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j)
|
||||
{
|
||||
auto entryTilePsqt = reinterpret_cast<psqt_vec_t*>(
|
||||
&entry.psqtAccumulation[Perspective][j * PsqtTileHeight]);
|
||||
for (std::size_t k = 0; k < NumPsqtRegs; ++k)
|
||||
psqt[k] = entryTilePsqt[k];
|
||||
|
||||
for (int i = 0; i < int(added.size()); ++i)
|
||||
{
|
||||
IndexType index = added[i];
|
||||
const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight;
|
||||
auto columnPsqt = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offset]);
|
||||
|
||||
for (std::size_t k = 0; k < NumPsqtRegs; ++k)
|
||||
psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]);
|
||||
}
|
||||
for (int i = 0; i < int(removed.size()); ++i)
|
||||
{
|
||||
IndexType index = removed[i];
|
||||
const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight;
|
||||
auto columnPsqt = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offset]);
|
||||
|
||||
for (std::size_t k = 0; k < NumPsqtRegs; ++k)
|
||||
psqt[k] = vec_sub_psqt_32(psqt[k], columnPsqt[k]);
|
||||
}
|
||||
|
||||
for (std::size_t k = 0; k < NumPsqtRegs; ++k)
|
||||
vec_store_psqt(&entryTilePsqt[k], psqt[k]);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
for (const auto index : added)
|
||||
{
|
||||
const IndexType offset = HalfDimensions * index;
|
||||
for (IndexType j = 0; j < HalfDimensions; ++j)
|
||||
entry.accumulation[Perspective][j] += weights[offset + j];
|
||||
|
||||
for (std::size_t k = 0; k < PSQTBuckets; ++k)
|
||||
entry.psqtAccumulation[Perspective][k] += psqtWeights[index * PSQTBuckets + k];
|
||||
}
|
||||
for (const auto index : removed)
|
||||
{
|
||||
const IndexType offset = HalfDimensions * index;
|
||||
for (IndexType j = 0; j < HalfDimensions; ++j)
|
||||
entry.accumulation[Perspective][j] -= weights[offset + j];
|
||||
|
||||
for (std::size_t k = 0; k < PSQTBuckets; ++k)
|
||||
entry.psqtAccumulation[Perspective][k] -= psqtWeights[index * PSQTBuckets + k];
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
// The accumulator of the refresh entry has been updated.
|
||||
// Now copy its content to the actual accumulator we were refreshing
|
||||
|
||||
std::memcpy(accumulator.psqtAccumulation[Perspective], entry.psqtAccumulation[Perspective],
|
||||
sizeof(int32_t) * PSQTBuckets);
|
||||
|
||||
std::memcpy(accumulator.accumulation[Perspective], entry.accumulation[Perspective],
|
||||
sizeof(BiasType) * HalfDimensions);
|
||||
|
||||
for (Color c : {WHITE, BLACK})
|
||||
entry.byColorBB[Perspective][c] = pos.pieces(c);
|
||||
|
||||
for (PieceType pt = PAWN; pt <= KING; ++pt)
|
||||
entry.byTypeBB[Perspective][pt] = pos.pieces(pt);
|
||||
}
|
||||
|
||||
template<Color Perspective>
|
||||
void
|
||||
update_accumulator_refresh(const Position& pos,
|
||||
[[maybe_unused]] AccumulatorCaches::Cache<HalfDimensions>* cache,
|
||||
bool psqtOnly) const {
|
||||
|
||||
// When we are refreshing the accumulator of the big net,
|
||||
// redirect to the version of refresh that uses the refresh table.
|
||||
// Using the cache for the small net is not beneficial.
|
||||
if constexpr (HalfDimensions == Eval::NNUE::TransformedFeatureDimensionsBig)
|
||||
{
|
||||
update_accumulator_refresh_cache<Perspective>(pos, cache);
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef VECTOR
|
||||
// Gcc-10.2 unnecessarily spills AVX2 registers if this array
|
||||
// is defined in the VECTOR code below, once in each branch
|
||||
|
@ -764,7 +923,9 @@ class FeatureTransformer {
|
|||
}
|
||||
|
||||
template<Color Perspective>
|
||||
void hint_common_access_for_perspective(const Position& pos, bool psqtOnly) const {
|
||||
void hint_common_access_for_perspective(const Position& pos,
|
||||
AccumulatorCaches::Cache<HalfDimensions>* cache,
|
||||
bool psqtOnly) const {
|
||||
|
||||
// Works like update_accumulator, but performs less work.
|
||||
// Updates ONLY the accumulator for pos.
|
||||
|
@ -787,11 +948,13 @@ class FeatureTransformer {
|
|||
psqtOnly);
|
||||
}
|
||||
else
|
||||
update_accumulator_refresh<Perspective>(pos, psqtOnly);
|
||||
update_accumulator_refresh<Perspective>(pos, cache, psqtOnly);
|
||||
}
|
||||
|
||||
template<Color Perspective>
|
||||
void update_accumulator(const Position& pos, bool psqtOnly) const {
|
||||
void update_accumulator(const Position& pos,
|
||||
AccumulatorCaches::Cache<HalfDimensions>* cache,
|
||||
bool psqtOnly) const {
|
||||
|
||||
auto [oldest_st, next] = try_find_computed_accumulator<Perspective>(pos, psqtOnly);
|
||||
|
||||
|
@ -813,9 +976,12 @@ class FeatureTransformer {
|
|||
psqtOnly);
|
||||
}
|
||||
else
|
||||
update_accumulator_refresh<Perspective>(pos, psqtOnly);
|
||||
update_accumulator_refresh<Perspective>(pos, cache, psqtOnly);
|
||||
}
|
||||
|
||||
template<IndexType Size>
|
||||
friend struct AccumulatorCaches::Cache;
|
||||
|
||||
alignas(CacheLineSize) BiasType biases[HalfDimensions];
|
||||
alignas(CacheLineSize) WeightType weights[HalfDimensions * InputDimensions];
|
||||
alignas(CacheLineSize) PSQTWeightType psqtWeights[InputDimensions * PSQTBuckets];
|
||||
|
|
|
@ -42,13 +42,15 @@ namespace Stockfish::Eval::NNUE {
|
|||
constexpr std::string_view PieceToChar(" PNBRQK pnbrqk");
|
||||
|
||||
|
||||
void hint_common_parent_position(const Position& pos, const Networks& networks) {
|
||||
void hint_common_parent_position(const Position& pos,
|
||||
const Networks& networks,
|
||||
AccumulatorCaches& caches) {
|
||||
|
||||
int simpleEvalAbs = std::abs(simple_eval(pos, pos.side_to_move()));
|
||||
if (simpleEvalAbs > Eval::SmallNetThreshold)
|
||||
networks.small.hint_common_access(pos, simpleEvalAbs > Eval::PsqtOnlyThreshold);
|
||||
networks.small.hint_common_access(pos, nullptr, simpleEvalAbs > Eval::PsqtOnlyThreshold);
|
||||
else
|
||||
networks.big.hint_common_access(pos, false);
|
||||
networks.big.hint_common_access(pos, &caches.big, false);
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
@ -104,7 +106,8 @@ void format_cp_aligned_dot(Value v, std::stringstream& stream, const Position& p
|
|||
|
||||
// Returns a string with the value of each piece on a board,
|
||||
// and a table for (PSQT, Layers) values bucket by bucket.
|
||||
std::string trace(Position& pos, const Eval::NNUE::Networks& networks) {
|
||||
std::string
|
||||
trace(Position& pos, const Eval::NNUE::Networks& networks, Eval::NNUE::AccumulatorCaches& caches) {
|
||||
|
||||
std::stringstream ss;
|
||||
|
||||
|
@ -130,7 +133,7 @@ std::string trace(Position& pos, const Eval::NNUE::Networks& networks) {
|
|||
|
||||
// We estimate the value of each piece by doing a differential evaluation from
|
||||
// the current base eval, simulating the removal of the piece from its square.
|
||||
Value base = networks.big.evaluate(pos);
|
||||
Value base = networks.big.evaluate(pos, &caches.big);
|
||||
base = pos.side_to_move() == WHITE ? base : -base;
|
||||
|
||||
for (File f = FILE_A; f <= FILE_H; ++f)
|
||||
|
@ -149,7 +152,7 @@ std::string trace(Position& pos, const Eval::NNUE::Networks& networks) {
|
|||
st->accumulatorBig.computedPSQT[WHITE] = st->accumulatorBig.computedPSQT[BLACK] =
|
||||
false;
|
||||
|
||||
Value eval = networks.big.evaluate(pos);
|
||||
Value eval = networks.big.evaluate(pos, &caches.big);
|
||||
eval = pos.side_to_move() == WHITE ? eval : -eval;
|
||||
v = base - eval;
|
||||
|
||||
|
@ -167,7 +170,7 @@ std::string trace(Position& pos, const Eval::NNUE::Networks& networks) {
|
|||
ss << board[row] << '\n';
|
||||
ss << '\n';
|
||||
|
||||
auto t = networks.big.trace_evaluate(pos);
|
||||
auto t = networks.big.trace_evaluate(pos, &caches.big);
|
||||
|
||||
ss << " NNUE network contributions "
|
||||
<< (pos.side_to_move() == WHITE ? "(White to move)" : "(Black to move)") << std::endl
|
||||
|
|
|
@ -50,12 +50,13 @@ struct NnueEvalTrace {
|
|||
std::size_t correctBucket;
|
||||
};
|
||||
|
||||
|
||||
struct Networks;
|
||||
struct AccumulatorCaches;
|
||||
|
||||
|
||||
std::string trace(Position& pos, const Networks& networks);
|
||||
void hint_common_parent_position(const Position& pos, const Networks& networks);
|
||||
std::string trace(Position& pos, const Networks& networks, AccumulatorCaches& caches);
|
||||
void hint_common_parent_position(const Position& pos,
|
||||
const Networks& networks,
|
||||
AccumulatorCaches& caches);
|
||||
|
||||
} // namespace Stockfish::Eval::NNUE
|
||||
} // namespace Stockfish
|
||||
|
|
|
@ -33,6 +33,8 @@
|
|||
#include "misc.h"
|
||||
#include "movegen.h"
|
||||
#include "movepick.h"
|
||||
#include "nnue/network.h"
|
||||
#include "nnue/nnue_accumulator.h"
|
||||
#include "nnue/nnue_common.h"
|
||||
#include "nnue/nnue_misc.h"
|
||||
#include "position.h"
|
||||
|
@ -135,6 +137,7 @@ Search::Worker::Worker(SharedState& sharedState,
|
|||
// Unpack the SharedState struct into member variables
|
||||
thread_idx(thread_id),
|
||||
manager(std::move(sm)),
|
||||
refreshTable(),
|
||||
options(sharedState.options),
|
||||
threads(sharedState.threads),
|
||||
tt(sharedState.tt),
|
||||
|
@ -143,6 +146,10 @@ Search::Worker::Worker(SharedState& sharedState,
|
|||
}
|
||||
|
||||
void Search::Worker::start_searching() {
|
||||
|
||||
// Initialize accumulator refresh entries
|
||||
refreshTable.clear(networks);
|
||||
|
||||
// Non-main threads go directly to iterative_deepening()
|
||||
if (!is_mainthread())
|
||||
{
|
||||
|
@ -564,7 +571,7 @@ Value Search::Worker::search(
|
|||
if (threads.stop.load(std::memory_order_relaxed) || pos.is_draw(ss->ply)
|
||||
|| ss->ply >= MAX_PLY)
|
||||
return (ss->ply >= MAX_PLY && !ss->inCheck)
|
||||
? evaluate(networks, pos, thisThread->optimism[us])
|
||||
? evaluate(networks, pos, refreshTable, thisThread->optimism[us])
|
||||
: value_draw(thisThread->nodes);
|
||||
|
||||
// Step 3. Mate distance pruning. Even if we mate at the next move our score
|
||||
|
@ -698,7 +705,7 @@ Value Search::Worker::search(
|
|||
{
|
||||
// Providing the hint that this node's accumulator will be used often
|
||||
// brings significant Elo gain (~13 Elo).
|
||||
Eval::NNUE::hint_common_parent_position(pos, networks);
|
||||
Eval::NNUE::hint_common_parent_position(pos, networks, refreshTable);
|
||||
unadjustedStaticEval = eval = ss->staticEval;
|
||||
}
|
||||
else if (ss->ttHit)
|
||||
|
@ -706,9 +713,9 @@ Value Search::Worker::search(
|
|||
// Never assume anything about values stored in TT
|
||||
unadjustedStaticEval = tte->eval();
|
||||
if (unadjustedStaticEval == VALUE_NONE)
|
||||
unadjustedStaticEval = evaluate(networks, pos, thisThread->optimism[us]);
|
||||
unadjustedStaticEval = evaluate(networks, pos, refreshTable, thisThread->optimism[us]);
|
||||
else if (PvNode)
|
||||
Eval::NNUE::hint_common_parent_position(pos, networks);
|
||||
Eval::NNUE::hint_common_parent_position(pos, networks, refreshTable);
|
||||
|
||||
ss->staticEval = eval = to_corrected_static_eval(unadjustedStaticEval, *thisThread, pos);
|
||||
|
||||
|
@ -718,7 +725,7 @@ Value Search::Worker::search(
|
|||
}
|
||||
else
|
||||
{
|
||||
unadjustedStaticEval = evaluate(networks, pos, thisThread->optimism[us]);
|
||||
unadjustedStaticEval = evaluate(networks, pos, refreshTable, thisThread->optimism[us]);
|
||||
ss->staticEval = eval = to_corrected_static_eval(unadjustedStaticEval, *thisThread, pos);
|
||||
|
||||
// Static evaluation is saved as it was before adjustment by correction history
|
||||
|
@ -875,7 +882,7 @@ Value Search::Worker::search(
|
|||
}
|
||||
}
|
||||
|
||||
Eval::NNUE::hint_common_parent_position(pos, networks);
|
||||
Eval::NNUE::hint_common_parent_position(pos, networks, refreshTable);
|
||||
}
|
||||
|
||||
moves_loop: // When in check, search starts here
|
||||
|
@ -1413,7 +1420,7 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta,
|
|||
// Step 2. Check for an immediate draw or maximum ply reached
|
||||
if (pos.is_draw(ss->ply) || ss->ply >= MAX_PLY)
|
||||
return (ss->ply >= MAX_PLY && !ss->inCheck)
|
||||
? evaluate(networks, pos, thisThread->optimism[us])
|
||||
? evaluate(networks, pos, refreshTable, thisThread->optimism[us])
|
||||
: VALUE_DRAW;
|
||||
|
||||
assert(0 <= ss->ply && ss->ply < MAX_PLY);
|
||||
|
@ -1445,7 +1452,8 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta,
|
|||
// Never assume anything about values stored in TT
|
||||
unadjustedStaticEval = tte->eval();
|
||||
if (unadjustedStaticEval == VALUE_NONE)
|
||||
unadjustedStaticEval = evaluate(networks, pos, thisThread->optimism[us]);
|
||||
unadjustedStaticEval =
|
||||
evaluate(networks, pos, refreshTable, thisThread->optimism[us]);
|
||||
ss->staticEval = bestValue =
|
||||
to_corrected_static_eval(unadjustedStaticEval, *thisThread, pos);
|
||||
|
||||
|
@ -1458,7 +1466,7 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta,
|
|||
{
|
||||
// In case of null move search, use previous static eval with a different sign
|
||||
unadjustedStaticEval = (ss - 1)->currentMove != Move::null()
|
||||
? evaluate(networks, pos, thisThread->optimism[us])
|
||||
? evaluate(networks, pos, refreshTable, thisThread->optimism[us])
|
||||
: -(ss - 1)->staticEval;
|
||||
ss->staticEval = bestValue =
|
||||
to_corrected_static_eval(unadjustedStaticEval, *thisThread, pos);
|
||||
|
|
|
@ -26,9 +26,9 @@
|
|||
#include <cstdint>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
#include "misc.h"
|
||||
#include "movepick.h"
|
||||
|
@ -37,6 +37,7 @@
|
|||
#include "syzygy/tbprobe.h"
|
||||
#include "timeman.h"
|
||||
#include "types.h"
|
||||
#include "nnue/nnue_accumulator.h"
|
||||
|
||||
namespace Stockfish {
|
||||
|
||||
|
@ -301,6 +302,10 @@ class Worker {
|
|||
|
||||
Tablebases::Config tbConfig;
|
||||
|
||||
// Used by NNUE
|
||||
|
||||
Eval::NNUE::AccumulatorCaches refreshTable;
|
||||
|
||||
const OptionsMap& options;
|
||||
ThreadPool& threads;
|
||||
TranspositionTable& tt;
|
||||
|
|
Loading…
Add table
Reference in a new issue