mirror of
https://github.com/sockspls/badfish
synced 2025-04-30 08:43:09 +00:00
Read NNUE net faster
Load feature transformer weights in bulk on little-endian machines. This is in particular useful to test new nets with c-chess-cli, see https://github.com/lucasart/c-chess-cli/issues/44 ``` $ time ./stockfish.exe uci Before : 0m0.914s After : 0m0.483s ``` No functional change
This commit is contained in:
parent
559942d64d
commit
b84fa04db6
4 changed files with 81 additions and 39 deletions
13
src/misc.h
13
src/misc.h
|
@ -66,9 +66,10 @@ std::ostream& operator<<(std::ostream&, SyncCout);
|
|||
#define sync_cout std::cout << IO_LOCK
|
||||
#define sync_endl std::endl << IO_UNLOCK
|
||||
|
||||
// `ptr` must point to an array of size at least
|
||||
// `sizeof(T) * N + alignment` bytes, where `N` is the
|
||||
// number of elements in the array.
|
||||
|
||||
// align_ptr_up() : get the first aligned element of an array.
|
||||
// ptr must point to an array of size at least `sizeof(T) * N + alignment` bytes,
|
||||
// where N is the number of elements in the array.
|
||||
template <uintptr_t Alignment, typename T>
|
||||
T* align_ptr_up(T* ptr)
|
||||
{
|
||||
|
@ -78,6 +79,12 @@ T* align_ptr_up(T* ptr)
|
|||
return reinterpret_cast<T*>(reinterpret_cast<char*>((ptrint + (Alignment - 1)) / Alignment * Alignment));
|
||||
}
|
||||
|
||||
|
||||
// IsLittleEndian : true if and only if the binary is compiled on a little endian machine
|
||||
static inline const union { uint32_t i; char c[4]; } Le = { 0x01020304 };
|
||||
static inline const bool IsLittleEndian = (Le.c[0] == 4);
|
||||
|
||||
|
||||
template <typename T>
|
||||
class ValueListInserter {
|
||||
public:
|
||||
|
|
|
@ -24,6 +24,8 @@
|
|||
#include <cstring>
|
||||
#include <iostream>
|
||||
|
||||
#include "../misc.h" // for IsLittleEndian
|
||||
|
||||
#if defined(USE_AVX2)
|
||||
#include <immintrin.h>
|
||||
|
||||
|
@ -86,37 +88,77 @@ namespace Stockfish::Eval::NNUE {
|
|||
// necessary to return a result with the byte ordering of the compiling machine.
|
||||
template <typename IntType>
|
||||
inline IntType read_little_endian(std::istream& stream) {
|
||||
|
||||
IntType result;
|
||||
std::uint8_t u[sizeof(IntType)];
|
||||
typename std::make_unsigned<IntType>::type v = 0;
|
||||
|
||||
stream.read(reinterpret_cast<char*>(u), sizeof(IntType));
|
||||
for (std::size_t i = 0; i < sizeof(IntType); ++i)
|
||||
v = (v << 8) | u[sizeof(IntType) - i - 1];
|
||||
if (IsLittleEndian)
|
||||
stream.read(reinterpret_cast<char*>(&result), sizeof(IntType));
|
||||
else
|
||||
{
|
||||
std::uint8_t u[sizeof(IntType)];
|
||||
typename std::make_unsigned<IntType>::type v = 0;
|
||||
|
||||
stream.read(reinterpret_cast<char*>(u), sizeof(IntType));
|
||||
for (std::size_t i = 0; i < sizeof(IntType); ++i)
|
||||
v = (v << 8) | u[sizeof(IntType) - i - 1];
|
||||
|
||||
std::memcpy(&result, &v, sizeof(IntType));
|
||||
}
|
||||
|
||||
std::memcpy(&result, &v, sizeof(IntType));
|
||||
return result;
|
||||
}
|
||||
|
||||
// write_little_endian() is our utility to write an integer (signed or unsigned, any size)
|
||||
// to a stream in little-endian order. We swap the byte order before the write if
|
||||
// necessary to always write in little endian order, independantly of the byte
|
||||
// ordering of the compiling machine.
|
||||
template <typename IntType>
|
||||
inline void write_little_endian(std::ostream& stream, IntType value) {
|
||||
|
||||
std::uint8_t u[sizeof(IntType)];
|
||||
typename std::make_unsigned<IntType>::type v = value;
|
||||
if (IsLittleEndian)
|
||||
stream.write(reinterpret_cast<const char*>(&value), sizeof(IntType));
|
||||
else
|
||||
{
|
||||
std::uint8_t u[sizeof(IntType)];
|
||||
typename std::make_unsigned<IntType>::type v = value;
|
||||
|
||||
std::size_t i = 0;
|
||||
// if constexpr to silence the warning about shift by 8
|
||||
if constexpr (sizeof(IntType) > 1) {
|
||||
for (; i + 1 < sizeof(IntType); ++i) {
|
||||
u[i] = v;
|
||||
v >>= 8;
|
||||
}
|
||||
std::size_t i = 0;
|
||||
// if constexpr to silence the warning about shift by 8
|
||||
if constexpr (sizeof(IntType) > 1)
|
||||
{
|
||||
for (; i + 1 < sizeof(IntType); ++i)
|
||||
{
|
||||
u[i] = v;
|
||||
v >>= 8;
|
||||
}
|
||||
}
|
||||
u[i] = v;
|
||||
|
||||
stream.write(reinterpret_cast<char*>(u), sizeof(IntType));
|
||||
}
|
||||
u[i] = v;
|
||||
|
||||
stream.write(reinterpret_cast<char*>(u), sizeof(IntType));
|
||||
}
|
||||
|
||||
// read_little_endian(s, out, N) : read integers in bulk from a little indian stream.
|
||||
// This reads N integers from stream s and put them in array out.
|
||||
template <typename IntType>
|
||||
inline void read_little_endian(std::istream& stream, IntType* out, std::size_t count) {
|
||||
if (IsLittleEndian)
|
||||
stream.read(reinterpret_cast<char*>(out), sizeof(IntType) * count);
|
||||
else
|
||||
for (std::size_t i = 0; i < count; ++i)
|
||||
out[i] = read_little_endian<IntType>(stream);
|
||||
}
|
||||
|
||||
// write_little_endian(s, out, N) : write integers in bulk to a little indian stream.
|
||||
// This takes N integers from array values and writes them on stream s.
|
||||
template <typename IntType>
|
||||
inline void write_little_endian(std::ostream& stream, const IntType* values, std::size_t count) {
|
||||
if (IsLittleEndian)
|
||||
stream.write(reinterpret_cast<const char*>(values), sizeof(IntType) * count);
|
||||
else
|
||||
for (std::size_t i = 0; i < count; ++i)
|
||||
write_little_endian<IntType>(stream, values[i]);
|
||||
}
|
||||
|
||||
} // namespace Stockfish::Eval::NNUE
|
||||
|
||||
#endif // #ifndef NNUE_COMMON_H_INCLUDED
|
||||
|
|
|
@ -24,8 +24,6 @@
|
|||
#include "nnue_common.h"
|
||||
#include "nnue_architecture.h"
|
||||
|
||||
#include "../misc.h"
|
||||
|
||||
#include <cstring> // std::memset()
|
||||
|
||||
namespace Stockfish::Eval::NNUE {
|
||||
|
@ -150,23 +148,21 @@ namespace Stockfish::Eval::NNUE {
|
|||
|
||||
// Read network parameters
|
||||
bool read_parameters(std::istream& stream) {
|
||||
for (std::size_t i = 0; i < HalfDimensions; ++i)
|
||||
biases[i] = read_little_endian<BiasType>(stream);
|
||||
for (std::size_t i = 0; i < HalfDimensions * InputDimensions; ++i)
|
||||
weights[i] = read_little_endian<WeightType>(stream);
|
||||
for (std::size_t i = 0; i < PSQTBuckets * InputDimensions; ++i)
|
||||
psqtWeights[i] = read_little_endian<PSQTWeightType>(stream);
|
||||
|
||||
read_little_endian<BiasType >(stream, biases , HalfDimensions );
|
||||
read_little_endian<WeightType >(stream, weights , HalfDimensions * InputDimensions);
|
||||
read_little_endian<PSQTWeightType>(stream, psqtWeights, PSQTBuckets * InputDimensions);
|
||||
|
||||
return !stream.fail();
|
||||
}
|
||||
|
||||
// Write network parameters
|
||||
bool write_parameters(std::ostream& stream) const {
|
||||
for (std::size_t i = 0; i < HalfDimensions; ++i)
|
||||
write_little_endian<BiasType>(stream, biases[i]);
|
||||
for (std::size_t i = 0; i < HalfDimensions * InputDimensions; ++i)
|
||||
write_little_endian<WeightType>(stream, weights[i]);
|
||||
for (std::size_t i = 0; i < PSQTBuckets * InputDimensions; ++i)
|
||||
write_little_endian<PSQTWeightType>(stream, psqtWeights[i]);
|
||||
|
||||
write_little_endian<BiasType >(stream, biases , HalfDimensions );
|
||||
write_little_endian<WeightType >(stream, weights , HalfDimensions * InputDimensions);
|
||||
write_little_endian<PSQTWeightType>(stream, psqtWeights, PSQTBuckets * InputDimensions);
|
||||
|
||||
return !stream.fail();
|
||||
}
|
||||
|
||||
|
|
|
@ -105,9 +105,6 @@ template<> inline void swap_endian<uint8_t>(uint8_t&) {}
|
|||
|
||||
template<typename T, int LE> T number(void* addr)
|
||||
{
|
||||
static const union { uint32_t i; char c[4]; } Le = { 0x01020304 };
|
||||
static const bool IsLittleEndian = (Le.c[0] == 4);
|
||||
|
||||
T v;
|
||||
|
||||
if ((uintptr_t)addr & (alignof(T) - 1)) // Unaligned pointer (very rare)
|
||||
|
|
Loading…
Add table
Reference in a new issue