1
0
Fork 0
mirror of https://github.com/sockspls/badfish synced 2025-04-30 08:43:09 +00:00

More incremental accumulator updates

This patch was inspired by c065abd which updates the accumulator,
if possible, based on the accumulator of two plies back if
the accumulator of the preceding ply is not available.

With this patch we look back even further in the position history
in an attempt to reduce the number of complete recomputations.
When we find a usable accumulator for the position N plies back,
we also update the accumulator of the position N-1 plies back
because that accumulator is most likely to be helpful later
when evaluating positions in sibling branches.
By not updating all intermediate accumulators immediately,
we avoid doing too much work that is not certain to be useful.
Overall, roughly 2-3% speedup.

This patch makes the code more specific to the net architecture,
changing input features of the net will require additional changes
to the incremental update code as discussed in the PR #3193 and #3191.

Passed STC:
https://tests.stockfishchess.org/tests/view/5f9056712c92c7fe3a8c60d0
LLR: 2.94 (-2.94,2.94) {-0.25,1.25}
Total: 10040 W: 1116 L: 968 D: 7956
Ptnml(0-2): 42, 722, 3365, 828, 63

closes https://github.com/official-stockfish/Stockfish/pull/3193

No functional change.
This commit is contained in:
syzygy1 2020-10-20 21:06:06 +02:00 committed by Joost VandeVondele
parent 258af8ae44
commit 2046d5da30
4 changed files with 157 additions and 261 deletions

View file

@ -43,90 +43,6 @@ namespace Eval::NNUE::Features {
template <typename Derived>
class FeatureSetBase {
public:
// Get a list of indices for active features
template <typename IndexListType>
static void AppendActiveIndices(
const Position& pos, TriggerEvent trigger, IndexListType active[2]) {
for (Color perspective : { WHITE, BLACK }) {
Derived::CollectActiveIndices(
pos, trigger, perspective, &active[perspective]);
}
}
// Get a list of indices for recently changed features
template <typename PositionType, typename IndexListType>
static void AppendChangedIndices(
const PositionType& pos, TriggerEvent trigger,
IndexListType removed[2], IndexListType added[2], bool reset[2]) {
auto collect_for_one = [&](const DirtyPiece& dp) {
for (Color perspective : { WHITE, BLACK }) {
switch (trigger) {
case TriggerEvent::kFriendKingMoved:
reset[perspective] = dp.piece[0] == make_piece(perspective, KING);
break;
default:
assert(false);
break;
}
if (reset[perspective]) {
Derived::CollectActiveIndices(
pos, trigger, perspective, &added[perspective]);
} else {
Derived::CollectChangedIndices(
pos, dp, trigger, perspective,
&removed[perspective], &added[perspective]);
}
}
};
auto collect_for_two = [&](const DirtyPiece& dp1, const DirtyPiece& dp2) {
for (Color perspective : { WHITE, BLACK }) {
switch (trigger) {
case TriggerEvent::kFriendKingMoved:
reset[perspective] = dp1.piece[0] == make_piece(perspective, KING)
|| dp2.piece[0] == make_piece(perspective, KING);
break;
default:
assert(false);
break;
}
if (reset[perspective]) {
Derived::CollectActiveIndices(
pos, trigger, perspective, &added[perspective]);
} else {
Derived::CollectChangedIndices(
pos, dp1, trigger, perspective,
&removed[perspective], &added[perspective]);
Derived::CollectChangedIndices(
pos, dp2, trigger, perspective,
&removed[perspective], &added[perspective]);
}
}
};
if (pos.state()->previous->accumulator.computed_accumulation) {
const auto& prev_dp = pos.state()->dirtyPiece;
if (prev_dp.dirty_num == 0) return;
collect_for_one(prev_dp);
} else {
const auto& prev_dp = pos.state()->previous->dirtyPiece;
if (prev_dp.dirty_num == 0) {
const auto& prev2_dp = pos.state()->dirtyPiece;
if (prev2_dp.dirty_num == 0) return;
collect_for_one(prev2_dp);
} else {
const auto& prev2_dp = pos.state()->dirtyPiece;
if (prev2_dp.dirty_num == 0) {
collect_for_one(prev_dp);
} else {
collect_for_two(prev_dp, prev2_dp);
}
}
}
}
};
// Class template that represents the feature set
@ -146,30 +62,6 @@ namespace Eval::NNUE::Features {
CompileTimeList<TriggerEvent, FeatureType::kRefreshTrigger>;
static constexpr auto kRefreshTriggers = SortedTriggerSet::kValues;
private:
// Get a list of indices for active features
static void CollectActiveIndices(
const Position& pos, const TriggerEvent trigger, const Color perspective,
IndexList* const active) {
if (FeatureType::kRefreshTrigger == trigger) {
FeatureType::AppendActiveIndices(pos, perspective, active);
}
}
// Get a list of indices for recently changed features
static void CollectChangedIndices(
const Position& pos, const DirtyPiece& dp, const TriggerEvent trigger, const Color perspective,
IndexList* const removed, IndexList* const added) {
if (FeatureType::kRefreshTrigger == trigger) {
FeatureType::AppendChangedIndices(pos, dp, perspective, removed, added);
}
}
// Make the base class and the class template that recursively uses itself a friend
friend class FeatureSetBase<FeatureSet>;
template <typename... FeatureTypes>
friend class FeatureSet;
};
} // namespace Eval::NNUE::Features

View file

@ -25,11 +25,14 @@
namespace Eval::NNUE {
// The accumulator of a StateInfo without parent is set to the INIT state
enum AccumulatorState { EMPTY, COMPUTED, INIT };
// Class that holds the result of affine transformation of input features
struct alignas(kCacheLineSize) Accumulator {
std::int16_t
accumulation[2][kRefreshTriggers.size()][kTransformedFeatureDimensions];
bool computed_accumulation;
AccumulatorState state[2];
};
} // namespace Eval::NNUE

View file

@ -32,7 +32,7 @@ namespace Eval::NNUE {
// If vector instructions are enabled, we update and refresh the
// accumulator tile by tile such that each tile fits in the CPU's
// vector registers.
#define TILING
#define VECTOR
#ifdef USE_AVX512
typedef __m512i vec_t;
@ -75,7 +75,7 @@ namespace Eval::NNUE {
static constexpr IndexType kNumRegs = 16;
#else
#undef TILING
#undef VECTOR
#endif
@ -86,7 +86,7 @@ namespace Eval::NNUE {
// Number of output dimensions for one side
static constexpr IndexType kHalfDimensions = kTransformedFeatureDimensions;
#ifdef TILING
#ifdef VECTOR
static constexpr IndexType kTileHeight = kNumRegs * sizeof(vec_t) / 2;
static_assert(kHalfDimensions % kTileHeight == 0, "kTileHeight must divide kHalfDimensions");
#endif
@ -119,32 +119,11 @@ namespace Eval::NNUE {
return !stream.fail();
}
// Proceed with the difference calculation if possible
bool UpdateAccumulatorIfPossible(const Position& pos) const {
const auto now = pos.state();
if (now->accumulator.computed_accumulation)
return true;
const auto prev = now->previous;
if (prev) {
if (prev->accumulator.computed_accumulation) {
UpdateAccumulator(pos);
return true;
} else if (prev->previous && prev->previous->accumulator.computed_accumulation) {
UpdateAccumulator(pos);
return true;
}
}
return false;
}
// Convert input features
void Transform(const Position& pos, OutputType* output) const {
if (!UpdateAccumulatorIfPossible(pos))
RefreshAccumulator(pos);
UpdateAccumulator(pos, WHITE);
UpdateAccumulator(pos, BLACK);
const auto& accumulation = pos.state()->accumulator.accumulation;
@ -240,153 +219,172 @@ namespace Eval::NNUE {
}
private:
// Calculate cumulative value without using difference calculation
void RefreshAccumulator(const Position& pos) const {
void UpdateAccumulator(const Position& pos, const Color c) const {
auto& accumulator = pos.state()->accumulator;
IndexType i = 0;
Features::IndexList active_indices[2];
RawFeatures::AppendActiveIndices(pos, kRefreshTriggers[i],
active_indices);
for (Color perspective : { WHITE, BLACK }) {
#ifdef TILING
for (unsigned j = 0; j < kHalfDimensions / kTileHeight; ++j) {
auto biasesTile = reinterpret_cast<const vec_t*>(
&biases_[j * kTileHeight]);
auto accTile = reinterpret_cast<vec_t*>(
&accumulator.accumulation[perspective][i][j * kTileHeight]);
#ifdef VECTOR
// Gcc-10.2 unnecessarily spills AVX2 registers if this array
// is defined in the VECTOR code below, once in each branch
vec_t acc[kNumRegs];
for (unsigned k = 0; k < kNumRegs; ++k)
acc[k] = biasesTile[k];
for (const auto index : active_indices[perspective]) {
const IndexType offset = kHalfDimensions * index + j * kTileHeight;
auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);
for (unsigned k = 0; k < kNumRegs; ++k)
acc[k] = vec_add_16(acc[k], column[k]);
}
for (unsigned k = 0; k < kNumRegs; k++)
vec_store(&accTile[k], acc[k]);
}
#else
std::memcpy(accumulator.accumulation[perspective][i], biases_,
kHalfDimensions * sizeof(BiasType));
for (const auto index : active_indices[perspective]) {
const IndexType offset = kHalfDimensions * index;
for (IndexType j = 0; j < kHalfDimensions; ++j)
accumulator.accumulation[perspective][i][j] += weights_[offset + j];
}
#endif
}
#if defined(USE_MMX)
_mm_empty();
#endif
accumulator.computed_accumulation = true;
// Look for a usable accumulator of an earlier position. We keep track
// of the estimated gain in terms of features to be added/subtracted.
StateInfo *st = pos.state(), *next = nullptr;
int gain = popcount(pos.pieces()) - 2;
while (st->accumulator.state[c] == EMPTY)
{
auto& dp = st->dirtyPiece;
// The first condition tests whether an incremental update is
// possible at all: if this side's king has moved, it is not possible.
static_assert(std::is_same_v<RawFeatures::SortedTriggerSet,
Features::CompileTimeList<Features::TriggerEvent, Features::TriggerEvent::kFriendKingMoved>>,
"Current code assumes that only kFriendlyKingMoved refresh trigger is being used.");
if ( dp.piece[0] == make_piece(c, KING)
|| (gain -= dp.dirty_num + 1) < 0)
break;
next = st;
st = st->previous;
}
// Calculate cumulative value using difference calculation
void UpdateAccumulator(const Position& pos) const {
if (st->accumulator.state[c] == COMPUTED)
{
if (next == nullptr)
return;
Accumulator* prev_accumulator;
assert(pos.state()->previous);
if (pos.state()->previous->accumulator.computed_accumulation) {
prev_accumulator = &pos.state()->previous->accumulator;
}
else {
assert(pos.state()->previous->previous);
assert(pos.state()->previous->previous->accumulator.computed_accumulation);
prev_accumulator = &pos.state()->previous->previous->accumulator;
}
// Update incrementally in two steps. First, we update the "next"
// accumulator. Then, we update the current accumulator (pos.state()).
auto& accumulator = pos.state()->accumulator;
IndexType i = 0;
Features::IndexList removed_indices[2], added_indices[2];
bool reset[2] = { false, false };
RawFeatures::AppendChangedIndices(pos, kRefreshTriggers[i],
removed_indices, added_indices, reset);
// Gather all features to be updated. This code assumes HalfKP features
// only and doesn't support refresh triggers.
static_assert(std::is_same_v<Features::FeatureSet<Features::HalfKP<Features::Side::kFriend>>,
RawFeatures>);
Features::IndexList removed[2], added[2];
Features::HalfKP<Features::Side::kFriend>::AppendChangedIndices(pos,
next->dirtyPiece, c, &removed[0], &added[0]);
for (StateInfo *st2 = pos.state(); st2 != next; st2 = st2->previous)
Features::HalfKP<Features::Side::kFriend>::AppendChangedIndices(pos,
st2->dirtyPiece, c, &removed[1], &added[1]);
#ifdef TILING
for (IndexType j = 0; j < kHalfDimensions / kTileHeight; ++j) {
for (Color perspective : { WHITE, BLACK }) {
// Mark the accumulators as computed.
next->accumulator.state[c] = COMPUTED;
pos.state()->accumulator.state[c] = COMPUTED;
// Now update the accumulators listed in info[], where the last element is a sentinel.
StateInfo *info[3] =
{ next, next == pos.state() ? nullptr : pos.state(), nullptr };
#ifdef VECTOR
for (IndexType j = 0; j < kHalfDimensions / kTileHeight; ++j)
{
// Load accumulator
auto accTile = reinterpret_cast<vec_t*>(
&accumulator.accumulation[perspective][i][j * kTileHeight]);
vec_t acc[kNumRegs];
if (reset[perspective]) {
auto biasesTile = reinterpret_cast<const vec_t*>(
&biases_[j * kTileHeight]);
for (unsigned k = 0; k < kNumRegs; ++k)
acc[k] = biasesTile[k];
} else {
auto prevAccTile = reinterpret_cast<const vec_t*>(
&prev_accumulator->accumulation[perspective][i][j * kTileHeight]);
&st->accumulator.accumulation[c][0][j * kTileHeight]);
for (IndexType k = 0; k < kNumRegs; ++k)
acc[k] = vec_load(&prevAccTile[k]);
acc[k] = vec_load(&accTile[k]);
for (IndexType i = 0; info[i]; ++i)
{
// Difference calculation for the deactivated features
for (const auto index : removed_indices[perspective]) {
for (const auto index : removed[i])
{
const IndexType offset = kHalfDimensions * index + j * kTileHeight;
auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);
for (IndexType k = 0; k < kNumRegs; ++k)
acc[k] = vec_sub_16(acc[k], column[k]);
}
}
{ // Difference calculation for the activated features
for (const auto index : added_indices[perspective]) {
// Difference calculation for the activated features
for (const auto index : added[i])
{
const IndexType offset = kHalfDimensions * index + j * kTileHeight;
auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);
for (IndexType k = 0; k < kNumRegs; ++k)
acc[k] = vec_add_16(acc[k], column[k]);
}
}
// Store accumulator
accTile = reinterpret_cast<vec_t*>(
&info[i]->accumulator.accumulation[c][0][j * kTileHeight]);
for (IndexType k = 0; k < kNumRegs; ++k)
vec_store(&accTile[k], acc[k]);
}
}
#else
for (IndexType i = 0; info[i]; ++i)
{
std::memcpy(info[i]->accumulator.accumulation[c][0],
st->accumulator.accumulation[c][0],
kHalfDimensions * sizeof(BiasType));
st = info[i];
// Difference calculation for the deactivated features
for (const auto index : removed[i])
{
const IndexType offset = kHalfDimensions * index;
for (IndexType j = 0; j < kHalfDimensions; ++j)
st->accumulator.accumulation[c][0][j] -= weights_[offset + j];
}
// Difference calculation for the activated features
for (const auto index : added[i])
{
const IndexType offset = kHalfDimensions * index;
for (IndexType j = 0; j < kHalfDimensions; ++j)
st->accumulator.accumulation[c][0][j] += weights_[offset + j];
}
}
#endif
}
else
{
// Refresh the accumulator
auto& accumulator = pos.state()->accumulator;
accumulator.state[c] = COMPUTED;
Features::IndexList active;
Features::HalfKP<Features::Side::kFriend>::AppendActiveIndices(pos, c, &active);
#ifdef VECTOR
for (IndexType j = 0; j < kHalfDimensions / kTileHeight; ++j)
{
auto biasesTile = reinterpret_cast<const vec_t*>(
&biases_[j * kTileHeight]);
for (IndexType k = 0; k < kNumRegs; ++k)
acc[k] = biasesTile[k];
for (const auto index : active)
{
const IndexType offset = kHalfDimensions * index + j * kTileHeight;
auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);
for (unsigned k = 0; k < kNumRegs; ++k)
acc[k] = vec_add_16(acc[k], column[k]);
}
auto accTile = reinterpret_cast<vec_t*>(
&accumulator.accumulation[c][0][j * kTileHeight]);
for (unsigned k = 0; k < kNumRegs; k++)
vec_store(&accTile[k], acc[k]);
}
#else
std::memcpy(accumulator.accumulation[c][0], biases_,
kHalfDimensions * sizeof(BiasType));
for (const auto index : active)
{
const IndexType offset = kHalfDimensions * index;
for (IndexType j = 0; j < kHalfDimensions; ++j)
accumulator.accumulation[c][0][j] += weights_[offset + j];
}
#endif
}
#if defined(USE_MMX)
_mm_empty();
#endif
#else
for (Color perspective : { WHITE, BLACK }) {
if (reset[perspective]) {
std::memcpy(accumulator.accumulation[perspective][i], biases_,
kHalfDimensions * sizeof(BiasType));
} else {
std::memcpy(accumulator.accumulation[perspective][i],
prev_accumulator->accumulation[perspective][i],
kHalfDimensions * sizeof(BiasType));
// Difference calculation for the deactivated features
for (const auto index : removed_indices[perspective]) {
const IndexType offset = kHalfDimensions * index;
for (IndexType j = 0; j < kHalfDimensions; ++j)
accumulator.accumulation[perspective][i][j] -= weights_[offset + j];
}
}
{ // Difference calculation for the activated features
for (const auto index : added_indices[perspective]) {
const IndexType offset = kHalfDimensions * index;
for (IndexType j = 0; j < kHalfDimensions; ++j)
accumulator.accumulation[perspective][i][j] += weights_[offset + j];
}
}
}
#endif
accumulator.computed_accumulation = true;
}
using BiasType = std::int16_t;

View file

@ -279,6 +279,8 @@ Position& Position::set(const string& fenStr, bool isChess960, StateInfo* si, Th
chess960 = isChess960;
thisThread = th;
set_state(st);
st->accumulator.state[WHITE] = Eval::NNUE::INIT;
st->accumulator.state[BLACK] = Eval::NNUE::INIT;
assert(pos_is_ok());
@ -703,7 +705,8 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
++st->pliesFromNull;
// Used by NNUE
st->accumulator.computed_accumulation = false;
st->accumulator.state[WHITE] = Eval::NNUE::EMPTY;
st->accumulator.state[BLACK] = Eval::NNUE::EMPTY;
auto& dp = st->dirtyPiece;
dp.dirty_num = 1;
@ -996,16 +999,16 @@ void Position::do_null_move(StateInfo& newSt) {
assert(!checkers());
assert(&newSt != st);
if (Eval::useNNUE)
{
std::memcpy(&newSt, st, sizeof(StateInfo));
}
else
std::memcpy(&newSt, st, offsetof(StateInfo, accumulator));
newSt.previous = st;
st = &newSt;
st->dirtyPiece.dirty_num = 0;
st->dirtyPiece.piece[0] = NO_PIECE; // Avoid checks in UpdateAccumulator()
st->accumulator.state[WHITE] = Eval::NNUE::EMPTY;
st->accumulator.state[BLACK] = Eval::NNUE::EMPTY;
if (st->epSquare != SQ_NONE)
{
st->key ^= Zobrist::enpassant[file_of(st->epSquare)];