mirror of
https://github.com/sockspls/badfish
synced 2025-05-01 01:03:09 +00:00
Clean up and simplify some nnue code.
Remove some unnecessary code and it's execution during inference. Also the change on line 49 in nnue_architecture.h results in a more efficient SIMD code path through ClippedReLU::propagate(). passed STC: https://tests.stockfishchess.org/tests/view/6217d3bfda649bba32ef25d5 LLR: 2.94 (-2.94,2.94) <-2.25,0.25> Total: 12056 W: 3281 L: 3092 D: 5683 Ptnml(0-2): 55, 1213, 3312, 1384, 64 passed STC SMP: https://tests.stockfishchess.org/tests/view/6217f344da649bba32ef295e LLR: 2.94 (-2.94,2.94) <-2.25,0.25> Total: 27376 W: 7295 L: 7137 D: 12944 Ptnml(0-2): 52, 2859, 7715, 3003, 59 closes https://github.com/official-stockfish/Stockfish/pull/3944 No functional change bench: 6820724
This commit is contained in:
parent
27139dedac
commit
5f781d366e
6 changed files with 27 additions and 32 deletions
|
@ -109,7 +109,7 @@ namespace Stockfish::Eval::NNUE {
|
||||||
{
|
{
|
||||||
write_little_endian<std::uint32_t>(stream, Version);
|
write_little_endian<std::uint32_t>(stream, Version);
|
||||||
write_little_endian<std::uint32_t>(stream, hashValue);
|
write_little_endian<std::uint32_t>(stream, hashValue);
|
||||||
write_little_endian<std::uint32_t>(stream, desc.size());
|
write_little_endian<std::uint32_t>(stream, (std::uint32_t)desc.size());
|
||||||
stream.write(&desc[0], desc.size());
|
stream.write(&desc[0], desc.size());
|
||||||
return !stream.fail();
|
return !stream.fail();
|
||||||
}
|
}
|
||||||
|
@ -157,7 +157,7 @@ namespace Stockfish::Eval::NNUE {
|
||||||
|
|
||||||
ASSERT_ALIGNED(transformedFeatures, alignment);
|
ASSERT_ALIGNED(transformedFeatures, alignment);
|
||||||
|
|
||||||
const std::size_t bucket = (pos.count<ALL_PIECES>() - 1) / 4;
|
const int bucket = (pos.count<ALL_PIECES>() - 1) / 4;
|
||||||
const auto psqt = featureTransformer->transform(pos, transformedFeatures, bucket);
|
const auto psqt = featureTransformer->transform(pos, transformedFeatures, bucket);
|
||||||
const auto positional = network[bucket]->propagate(transformedFeatures);
|
const auto positional = network[bucket]->propagate(transformedFeatures);
|
||||||
|
|
||||||
|
@ -197,7 +197,7 @@ namespace Stockfish::Eval::NNUE {
|
||||||
|
|
||||||
NnueEvalTrace t{};
|
NnueEvalTrace t{};
|
||||||
t.correctBucket = (pos.count<ALL_PIECES>() - 1) / 4;
|
t.correctBucket = (pos.count<ALL_PIECES>() - 1) / 4;
|
||||||
for (std::size_t bucket = 0; bucket < LayerStacks; ++bucket) {
|
for (IndexType bucket = 0; bucket < LayerStacks; ++bucket) {
|
||||||
const auto materialist = featureTransformer->transform(pos, transformedFeatures, bucket);
|
const auto materialist = featureTransformer->transform(pos, transformedFeatures, bucket);
|
||||||
const auto positional = network[bucket]->propagate(transformedFeatures);
|
const auto positional = network[bucket]->propagate(transformedFeatures);
|
||||||
|
|
||||||
|
|
|
@ -235,10 +235,10 @@ namespace Stockfish::Eval::NNUE::Layers {
|
||||||
|
|
||||||
// Read network parameters
|
// Read network parameters
|
||||||
bool read_parameters(std::istream& stream) {
|
bool read_parameters(std::istream& stream) {
|
||||||
for (std::size_t i = 0; i < OutputDimensions; ++i)
|
for (IndexType i = 0; i < OutputDimensions; ++i)
|
||||||
biases[i] = read_little_endian<BiasType>(stream);
|
biases[i] = read_little_endian<BiasType>(stream);
|
||||||
|
|
||||||
for (std::size_t i = 0; i < OutputDimensions * PaddedInputDimensions; ++i)
|
for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i)
|
||||||
weights[get_weight_index(i)] = read_little_endian<WeightType>(stream);
|
weights[get_weight_index(i)] = read_little_endian<WeightType>(stream);
|
||||||
|
|
||||||
return !stream.fail();
|
return !stream.fail();
|
||||||
|
@ -246,10 +246,10 @@ namespace Stockfish::Eval::NNUE::Layers {
|
||||||
|
|
||||||
// Write network parameters
|
// Write network parameters
|
||||||
bool write_parameters(std::ostream& stream) const {
|
bool write_parameters(std::ostream& stream) const {
|
||||||
for (std::size_t i = 0; i < OutputDimensions; ++i)
|
for (IndexType i = 0; i < OutputDimensions; ++i)
|
||||||
write_little_endian<BiasType>(stream, biases[i]);
|
write_little_endian<BiasType>(stream, biases[i]);
|
||||||
|
|
||||||
for (std::size_t i = 0; i < OutputDimensions * PaddedInputDimensions; ++i)
|
for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i)
|
||||||
write_little_endian<WeightType>(stream, weights[get_weight_index(i)]);
|
write_little_endian<WeightType>(stream, weights[get_weight_index(i)]);
|
||||||
|
|
||||||
return !stream.fail();
|
return !stream.fail();
|
||||||
|
@ -422,9 +422,9 @@ namespace Stockfish::Eval::NNUE::Layers {
|
||||||
|
|
||||||
// Read network parameters
|
// Read network parameters
|
||||||
bool read_parameters(std::istream& stream) {
|
bool read_parameters(std::istream& stream) {
|
||||||
for (std::size_t i = 0; i < OutputDimensions; ++i)
|
for (IndexType i = 0; i < OutputDimensions; ++i)
|
||||||
biases[i] = read_little_endian<BiasType>(stream);
|
biases[i] = read_little_endian<BiasType>(stream);
|
||||||
for (std::size_t i = 0; i < OutputDimensions * PaddedInputDimensions; ++i)
|
for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i)
|
||||||
weights[get_weight_index(i)] = read_little_endian<WeightType>(stream);
|
weights[get_weight_index(i)] = read_little_endian<WeightType>(stream);
|
||||||
|
|
||||||
return !stream.fail();
|
return !stream.fail();
|
||||||
|
@ -432,10 +432,10 @@ namespace Stockfish::Eval::NNUE::Layers {
|
||||||
|
|
||||||
// Write network parameters
|
// Write network parameters
|
||||||
bool write_parameters(std::ostream& stream) const {
|
bool write_parameters(std::ostream& stream) const {
|
||||||
for (std::size_t i = 0; i < OutputDimensions; ++i)
|
for (IndexType i = 0; i < OutputDimensions; ++i)
|
||||||
write_little_endian<BiasType>(stream, biases[i]);
|
write_little_endian<BiasType>(stream, biases[i]);
|
||||||
|
|
||||||
for (std::size_t i = 0; i < OutputDimensions * PaddedInputDimensions; ++i)
|
for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i)
|
||||||
write_little_endian<WeightType>(stream, weights[get_weight_index(i)]);
|
write_little_endian<WeightType>(stream, weights[get_weight_index(i)]);
|
||||||
|
|
||||||
return !stream.fail();
|
return !stream.fail();
|
||||||
|
|
|
@ -171,14 +171,6 @@ namespace Stockfish::Eval::NNUE::Layers {
|
||||||
std::max(0, std::min(127, input[i] >> WeightScaleBits)));
|
std::max(0, std::min(127, input[i] >> WeightScaleBits)));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Affine transform layers expect that there is at least
|
|
||||||
// ceil_to_multiple(OutputDimensions, 32) initialized values.
|
|
||||||
// We cannot do this in the affine transform because it requires
|
|
||||||
// preallocating space here.
|
|
||||||
for (IndexType i = OutputDimensions; i < PaddedOutputDimensions; ++i) {
|
|
||||||
output[i] = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
return output;
|
return output;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
|
@ -46,7 +46,7 @@ struct Network
|
||||||
static constexpr int FC_1_OUTPUTS = 32;
|
static constexpr int FC_1_OUTPUTS = 32;
|
||||||
|
|
||||||
Layers::AffineTransform<TransformedFeatureDimensions, FC_0_OUTPUTS + 1> fc_0;
|
Layers::AffineTransform<TransformedFeatureDimensions, FC_0_OUTPUTS + 1> fc_0;
|
||||||
Layers::ClippedReLU<FC_0_OUTPUTS> ac_0;
|
Layers::ClippedReLU<FC_0_OUTPUTS + 1> ac_0;
|
||||||
Layers::AffineTransform<FC_0_OUTPUTS, FC_1_OUTPUTS> fc_1;
|
Layers::AffineTransform<FC_0_OUTPUTS, FC_1_OUTPUTS> fc_1;
|
||||||
Layers::ClippedReLU<FC_1_OUTPUTS> ac_1;
|
Layers::ClippedReLU<FC_1_OUTPUTS> ac_1;
|
||||||
Layers::AffineTransform<FC_1_OUTPUTS, 1> fc_2;
|
Layers::AffineTransform<FC_1_OUTPUTS, 1> fc_2;
|
||||||
|
@ -97,14 +97,19 @@ struct Network
|
||||||
alignas(CacheLineSize) decltype(fc_1)::OutputBuffer fc_1_out;
|
alignas(CacheLineSize) decltype(fc_1)::OutputBuffer fc_1_out;
|
||||||
alignas(CacheLineSize) decltype(ac_1)::OutputBuffer ac_1_out;
|
alignas(CacheLineSize) decltype(ac_1)::OutputBuffer ac_1_out;
|
||||||
alignas(CacheLineSize) decltype(fc_2)::OutputBuffer fc_2_out;
|
alignas(CacheLineSize) decltype(fc_2)::OutputBuffer fc_2_out;
|
||||||
|
|
||||||
|
Buffer()
|
||||||
|
{
|
||||||
|
std::memset(this, 0, sizeof(*this));
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
#if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN)
|
#if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN)
|
||||||
char bufferRaw[sizeof(Buffer) + alignment];
|
static thread_local char bufferRaw[sizeof(Buffer) + alignment];
|
||||||
char* bufferRawAligned = align_ptr_up<alignment>(&bufferRaw[0]);
|
static thread_local char* bufferRawAligned = align_ptr_up<alignment>(&bufferRaw[0]);
|
||||||
Buffer& buffer = *(new (bufferRawAligned) Buffer);
|
static thread_local Buffer& buffer = *(new (bufferRawAligned) Buffer);
|
||||||
#else
|
#else
|
||||||
alignas(alignment) Buffer buffer;
|
alignas(alignment) static thread_local Buffer buffer;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
fc_0.propagate(transformedFeatures, buffer.fc_0_out);
|
fc_0.propagate(transformedFeatures, buffer.fc_0_out);
|
||||||
|
@ -118,10 +123,6 @@ struct Network
|
||||||
std::int32_t fwdOut = int(buffer.fc_0_out[FC_0_OUTPUTS]) * (600*OutputScale) / (127*(1<<WeightScaleBits));
|
std::int32_t fwdOut = int(buffer.fc_0_out[FC_0_OUTPUTS]) * (600*OutputScale) / (127*(1<<WeightScaleBits));
|
||||||
std::int32_t outputValue = buffer.fc_2_out[0] + fwdOut;
|
std::int32_t outputValue = buffer.fc_2_out[0] + fwdOut;
|
||||||
|
|
||||||
#if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN)
|
|
||||||
buffer.~Buffer();
|
|
||||||
#endif
|
|
||||||
|
|
||||||
return outputValue;
|
return outputValue;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
|
@ -127,11 +127,11 @@ namespace Stockfish::Eval::NNUE {
|
||||||
{
|
{
|
||||||
for (; i + 1 < sizeof(IntType); ++i)
|
for (; i + 1 < sizeof(IntType); ++i)
|
||||||
{
|
{
|
||||||
u[i] = v;
|
u[i] = (std::uint8_t)v;
|
||||||
v >>= 8;
|
v >>= 8;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
u[i] = v;
|
u[i] = (std::uint8_t)v;
|
||||||
|
|
||||||
stream.write(reinterpret_cast<char*>(u), sizeof(IntType));
|
stream.write(reinterpret_cast<char*>(u), sizeof(IntType));
|
||||||
}
|
}
|
||||||
|
|
|
@ -123,8 +123,10 @@ namespace Stockfish::Eval::NNUE {
|
||||||
// We use __m* types as template arguments, which causes GCC to emit warnings
|
// We use __m* types as template arguments, which causes GCC to emit warnings
|
||||||
// about losing some attribute information. This is irrelevant to us as we
|
// about losing some attribute information. This is irrelevant to us as we
|
||||||
// only take their size, so the following pragma are harmless.
|
// only take their size, so the following pragma are harmless.
|
||||||
|
#if defined(__GNUC__)
|
||||||
#pragma GCC diagnostic push
|
#pragma GCC diagnostic push
|
||||||
#pragma GCC diagnostic ignored "-Wignored-attributes"
|
#pragma GCC diagnostic ignored "-Wignored-attributes"
|
||||||
|
#endif
|
||||||
|
|
||||||
template <typename SIMDRegisterType,
|
template <typename SIMDRegisterType,
|
||||||
typename LaneType,
|
typename LaneType,
|
||||||
|
@ -156,9 +158,9 @@ namespace Stockfish::Eval::NNUE {
|
||||||
|
|
||||||
static constexpr int NumRegs = BestRegisterCount<vec_t, WeightType, TransformedFeatureDimensions, NumRegistersSIMD>();
|
static constexpr int NumRegs = BestRegisterCount<vec_t, WeightType, TransformedFeatureDimensions, NumRegistersSIMD>();
|
||||||
static constexpr int NumPsqtRegs = BestRegisterCount<psqt_vec_t, PSQTWeightType, PSQTBuckets, NumRegistersSIMD>();
|
static constexpr int NumPsqtRegs = BestRegisterCount<psqt_vec_t, PSQTWeightType, PSQTBuckets, NumRegistersSIMD>();
|
||||||
|
#if defined(__GNUC__)
|
||||||
#pragma GCC diagnostic pop
|
#pragma GCC diagnostic pop
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue