1
0
Fork 0
mirror of https://github.com/sockspls/badfish synced 2025-04-29 16:23:09 +00:00

Optimize update_accumulator_refresh_cache()

STC https://tests.stockfishchess.org/tests/view/664105df26ac5f9b286d30e6
LLR: 2.94 (-2.94,2.94) <0.00,2.00>
Total: 178528 W: 46235 L: 45750 D: 86543
Ptnml(0-2): 505, 17792, 52142, 18363, 462

Combo of two yellow speedups
https://tests.stockfishchess.org/tests/view/6640abf9d163897c63214f5c
LLR: -2.93 (-2.94,2.94) <0.00,2.00>
Total: 355744 W: 91714 L: 91470 D: 172560
Ptnml(0-2): 913, 36233, 103384, 36381, 961

https://tests.stockfishchess.org/tests/view/6628ce073fe04ce4cefc739c
LLR: -2.93 (-2.94,2.94) <0.00,2.00>
Total: 627040 W: 162001 L: 161339 D: 303700
Ptnml(0-2): 2268, 72379, 163532, 73105, 2236

closes https://github.com/official-stockfish/Stockfish/pull/5239

No functional change
This commit is contained in:
mstembera 2024-05-12 04:45:01 -07:00 committed by Joost VandeVondele
parent 0b08953174
commit e608eab8dd

View file

@ -664,7 +664,11 @@ class FeatureTransformer {
for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j) for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j)
{ {
auto entryTile = reinterpret_cast<vec_t*>(&entry.accumulation[j * TileHeight]); auto accTile =
reinterpret_cast<vec_t*>(&accumulator.accumulation[Perspective][j * TileHeight]);
auto entryTile =
reinterpret_cast<vec_t*>(&entry.accumulation[j * TileHeight]);
for (IndexType k = 0; k < NumRegs; ++k) for (IndexType k = 0; k < NumRegs; ++k)
acc[k] = entryTile[k]; acc[k] = entryTile[k];
@ -679,7 +683,7 @@ class FeatureTransformer {
auto columnA = reinterpret_cast<const vec_t*>(&weights[offsetA]); auto columnA = reinterpret_cast<const vec_t*>(&weights[offsetA]);
for (unsigned k = 0; k < NumRegs; ++k) for (unsigned k = 0; k < NumRegs; ++k)
acc[k] = vec_add_16(vec_sub_16(acc[k], columnR[k]), columnA[k]); acc[k] = vec_add_16(acc[k], vec_sub_16(columnA[k], columnR[k]));
} }
for (; i < int(removed.size()); ++i) for (; i < int(removed.size()); ++i)
{ {
@ -702,12 +706,17 @@ class FeatureTransformer {
for (IndexType k = 0; k < NumRegs; k++) for (IndexType k = 0; k < NumRegs; k++)
vec_store(&entryTile[k], acc[k]); vec_store(&entryTile[k], acc[k]);
for (IndexType k = 0; k < NumRegs; k++)
vec_store(&accTile[k], acc[k]);
} }
for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j) for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j)
{ {
auto entryTilePsqt = auto accTilePsqt = reinterpret_cast<psqt_vec_t*>(
reinterpret_cast<psqt_vec_t*>(&entry.psqtAccumulation[j * PsqtTileHeight]); &accumulator.psqtAccumulation[Perspective][j * PsqtTileHeight]);
auto entryTilePsqt = reinterpret_cast<psqt_vec_t*>(
&entry.psqtAccumulation[j * PsqtTileHeight]);
for (std::size_t k = 0; k < NumPsqtRegs; ++k) for (std::size_t k = 0; k < NumPsqtRegs; ++k)
psqt[k] = entryTilePsqt[k]; psqt[k] = entryTilePsqt[k];
@ -732,6 +741,8 @@ class FeatureTransformer {
for (std::size_t k = 0; k < NumPsqtRegs; ++k) for (std::size_t k = 0; k < NumPsqtRegs; ++k)
vec_store_psqt(&entryTilePsqt[k], psqt[k]); vec_store_psqt(&entryTilePsqt[k], psqt[k]);
for (std::size_t k = 0; k < NumPsqtRegs; ++k)
vec_store_psqt(&accTilePsqt[k], psqt[k]);
} }
#else #else
@ -755,8 +766,6 @@ class FeatureTransformer {
entry.psqtAccumulation[k] += psqtWeights[index * PSQTBuckets + k]; entry.psqtAccumulation[k] += psqtWeights[index * PSQTBuckets + k];
} }
#endif
// The accumulator of the refresh entry has been updated. // The accumulator of the refresh entry has been updated.
// Now copy its content to the actual accumulator we were refreshing // Now copy its content to the actual accumulator we were refreshing
@ -765,6 +774,7 @@ class FeatureTransformer {
std::memcpy(accumulator.psqtAccumulation[Perspective], entry.psqtAccumulation, std::memcpy(accumulator.psqtAccumulation[Perspective], entry.psqtAccumulation,
sizeof(int32_t) * PSQTBuckets); sizeof(int32_t) * PSQTBuckets);
#endif
for (Color c : {WHITE, BLACK}) for (Color c : {WHITE, BLACK})
entry.byColorBB[c] = pos.pieces(c); entry.byColorBB[c] = pos.pieces(c);