mirror of
https://github.com/sockspls/badfish
synced 2025-04-29 16:23:09 +00:00
Update nnue_feature_transformer.h
Unroll update_accumulator_refresh to process two active indices simultaneously. The compiler might not unroll effectively because the number of active indices isn't known at compile time. STC https://tests.stockfishchess.org/tests/view/65faa8850ec64f0526c4fca9 LLR: 2.93 (-2.94,2.94) <0.00,2.00> Total: 130464 W: 33882 L: 33431 D: 63151 Ptnml(0-2): 539, 14591, 34501, 15082, 519 closes https://github.com/official-stockfish/Stockfish/pull/5125 No functional change
This commit is contained in:
parent
d99f89506b
commit
5001d49f42
1 changed files with 31 additions and 2 deletions
|
@ -619,8 +619,22 @@ class FeatureTransformer {
|
|||
for (IndexType k = 0; k < NumRegs; ++k)
|
||||
acc[k] = biasesTile[k];
|
||||
|
||||
for (const auto index : active)
|
||||
int i = 0;
|
||||
for (; i < int(active.size()) - 1; i += 2)
|
||||
{
|
||||
IndexType index0 = active[i];
|
||||
IndexType index1 = active[i + 1];
|
||||
const IndexType offset0 = HalfDimensions * index0 + j * TileHeight;
|
||||
const IndexType offset1 = HalfDimensions * index1 + j * TileHeight;
|
||||
auto column0 = reinterpret_cast<const vec_t*>(&weights[offset0]);
|
||||
auto column1 = reinterpret_cast<const vec_t*>(&weights[offset1]);
|
||||
|
||||
for (unsigned k = 0; k < NumRegs; ++k)
|
||||
acc[k] = vec_add_16(acc[k], vec_add_16(column0[k], column1[k]));
|
||||
}
|
||||
for (; i < int(active.size()); ++i)
|
||||
{
|
||||
IndexType index = active[i];
|
||||
const IndexType offset = HalfDimensions * index + j * TileHeight;
|
||||
auto column = reinterpret_cast<const vec_t*>(&weights[offset]);
|
||||
|
||||
|
@ -639,8 +653,23 @@ class FeatureTransformer {
|
|||
for (std::size_t k = 0; k < NumPsqtRegs; ++k)
|
||||
psqt[k] = vec_zero_psqt();
|
||||
|
||||
for (const auto index : active)
|
||||
int i = 0;
|
||||
for (; i < int(active.size()) - 1; i += 2)
|
||||
{
|
||||
IndexType index0 = active[i];
|
||||
IndexType index1 = active[i + 1];
|
||||
const IndexType offset0 = PSQTBuckets * index0 + j * PsqtTileHeight;
|
||||
const IndexType offset1 = PSQTBuckets * index1 + j * PsqtTileHeight;
|
||||
auto columnPsqt0 = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offset0]);
|
||||
auto columnPsqt1 = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offset1]);
|
||||
|
||||
for (std::size_t k = 0; k < NumPsqtRegs; ++k)
|
||||
psqt[k] =
|
||||
vec_add_psqt_32(psqt[k], vec_add_psqt_32(columnPsqt0[k], columnPsqt1[k]));
|
||||
}
|
||||
for (; i < int(active.size()); ++i)
|
||||
{
|
||||
IndexType index = active[i];
|
||||
const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight;
|
||||
auto columnPsqt = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offset]);
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue