mirror of
https://github.com/sockspls/badfish
synced 2025-05-01 09:13:08 +00:00
Read NNUE net faster
Load feature transformer weights in bulk on little-endian machines. This is in particular useful to test new nets with c-chess-cli, see https://github.com/lucasart/c-chess-cli/issues/44 ``` $ time ./stockfish.exe uci Before : 0m0.914s After : 0m0.483s ``` No functional change
This commit is contained in:
parent
559942d64d
commit
b84fa04db6
4 changed files with 81 additions and 39 deletions
13
src/misc.h
13
src/misc.h
|
@ -66,9 +66,10 @@ std::ostream& operator<<(std::ostream&, SyncCout);
|
||||||
#define sync_cout std::cout << IO_LOCK
|
#define sync_cout std::cout << IO_LOCK
|
||||||
#define sync_endl std::endl << IO_UNLOCK
|
#define sync_endl std::endl << IO_UNLOCK
|
||||||
|
|
||||||
// `ptr` must point to an array of size at least
|
|
||||||
// `sizeof(T) * N + alignment` bytes, where `N` is the
|
// align_ptr_up() : get the first aligned element of an array.
|
||||||
// number of elements in the array.
|
// ptr must point to an array of size at least `sizeof(T) * N + alignment` bytes,
|
||||||
|
// where N is the number of elements in the array.
|
||||||
template <uintptr_t Alignment, typename T>
|
template <uintptr_t Alignment, typename T>
|
||||||
T* align_ptr_up(T* ptr)
|
T* align_ptr_up(T* ptr)
|
||||||
{
|
{
|
||||||
|
@ -78,6 +79,12 @@ T* align_ptr_up(T* ptr)
|
||||||
return reinterpret_cast<T*>(reinterpret_cast<char*>((ptrint + (Alignment - 1)) / Alignment * Alignment));
|
return reinterpret_cast<T*>(reinterpret_cast<char*>((ptrint + (Alignment - 1)) / Alignment * Alignment));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// IsLittleEndian : true if and only if the binary is compiled on a little endian machine
|
||||||
|
static inline const union { uint32_t i; char c[4]; } Le = { 0x01020304 };
|
||||||
|
static inline const bool IsLittleEndian = (Le.c[0] == 4);
|
||||||
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
class ValueListInserter {
|
class ValueListInserter {
|
||||||
public:
|
public:
|
||||||
|
|
|
@ -24,6 +24,8 @@
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
|
||||||
|
#include "../misc.h" // for IsLittleEndian
|
||||||
|
|
||||||
#if defined(USE_AVX2)
|
#if defined(USE_AVX2)
|
||||||
#include <immintrin.h>
|
#include <immintrin.h>
|
||||||
|
|
||||||
|
@ -86,8 +88,12 @@ namespace Stockfish::Eval::NNUE {
|
||||||
// necessary to return a result with the byte ordering of the compiling machine.
|
// necessary to return a result with the byte ordering of the compiling machine.
|
||||||
template <typename IntType>
|
template <typename IntType>
|
||||||
inline IntType read_little_endian(std::istream& stream) {
|
inline IntType read_little_endian(std::istream& stream) {
|
||||||
|
|
||||||
IntType result;
|
IntType result;
|
||||||
|
|
||||||
|
if (IsLittleEndian)
|
||||||
|
stream.read(reinterpret_cast<char*>(&result), sizeof(IntType));
|
||||||
|
else
|
||||||
|
{
|
||||||
std::uint8_t u[sizeof(IntType)];
|
std::uint8_t u[sizeof(IntType)];
|
||||||
typename std::make_unsigned<IntType>::type v = 0;
|
typename std::make_unsigned<IntType>::type v = 0;
|
||||||
|
|
||||||
|
@ -96,19 +102,31 @@ namespace Stockfish::Eval::NNUE {
|
||||||
v = (v << 8) | u[sizeof(IntType) - i - 1];
|
v = (v << 8) | u[sizeof(IntType) - i - 1];
|
||||||
|
|
||||||
std::memcpy(&result, &v, sizeof(IntType));
|
std::memcpy(&result, &v, sizeof(IntType));
|
||||||
|
}
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// write_little_endian() is our utility to write an integer (signed or unsigned, any size)
|
||||||
|
// to a stream in little-endian order. We swap the byte order before the write if
|
||||||
|
// necessary to always write in little endian order, independantly of the byte
|
||||||
|
// ordering of the compiling machine.
|
||||||
template <typename IntType>
|
template <typename IntType>
|
||||||
inline void write_little_endian(std::ostream& stream, IntType value) {
|
inline void write_little_endian(std::ostream& stream, IntType value) {
|
||||||
|
|
||||||
|
if (IsLittleEndian)
|
||||||
|
stream.write(reinterpret_cast<const char*>(&value), sizeof(IntType));
|
||||||
|
else
|
||||||
|
{
|
||||||
std::uint8_t u[sizeof(IntType)];
|
std::uint8_t u[sizeof(IntType)];
|
||||||
typename std::make_unsigned<IntType>::type v = value;
|
typename std::make_unsigned<IntType>::type v = value;
|
||||||
|
|
||||||
std::size_t i = 0;
|
std::size_t i = 0;
|
||||||
// if constexpr to silence the warning about shift by 8
|
// if constexpr to silence the warning about shift by 8
|
||||||
if constexpr (sizeof(IntType) > 1) {
|
if constexpr (sizeof(IntType) > 1)
|
||||||
for (; i + 1 < sizeof(IntType); ++i) {
|
{
|
||||||
|
for (; i + 1 < sizeof(IntType); ++i)
|
||||||
|
{
|
||||||
u[i] = v;
|
u[i] = v;
|
||||||
v >>= 8;
|
v >>= 8;
|
||||||
}
|
}
|
||||||
|
@ -117,6 +135,30 @@ namespace Stockfish::Eval::NNUE {
|
||||||
|
|
||||||
stream.write(reinterpret_cast<char*>(u), sizeof(IntType));
|
stream.write(reinterpret_cast<char*>(u), sizeof(IntType));
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// read_little_endian(s, out, N) : read integers in bulk from a little indian stream.
|
||||||
|
// This reads N integers from stream s and put them in array out.
|
||||||
|
template <typename IntType>
|
||||||
|
inline void read_little_endian(std::istream& stream, IntType* out, std::size_t count) {
|
||||||
|
if (IsLittleEndian)
|
||||||
|
stream.read(reinterpret_cast<char*>(out), sizeof(IntType) * count);
|
||||||
|
else
|
||||||
|
for (std::size_t i = 0; i < count; ++i)
|
||||||
|
out[i] = read_little_endian<IntType>(stream);
|
||||||
|
}
|
||||||
|
|
||||||
|
// write_little_endian(s, out, N) : write integers in bulk to a little indian stream.
|
||||||
|
// This takes N integers from array values and writes them on stream s.
|
||||||
|
template <typename IntType>
|
||||||
|
inline void write_little_endian(std::ostream& stream, const IntType* values, std::size_t count) {
|
||||||
|
if (IsLittleEndian)
|
||||||
|
stream.write(reinterpret_cast<const char*>(values), sizeof(IntType) * count);
|
||||||
|
else
|
||||||
|
for (std::size_t i = 0; i < count; ++i)
|
||||||
|
write_little_endian<IntType>(stream, values[i]);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace Stockfish::Eval::NNUE
|
} // namespace Stockfish::Eval::NNUE
|
||||||
|
|
||||||
#endif // #ifndef NNUE_COMMON_H_INCLUDED
|
#endif // #ifndef NNUE_COMMON_H_INCLUDED
|
||||||
|
|
|
@ -24,8 +24,6 @@
|
||||||
#include "nnue_common.h"
|
#include "nnue_common.h"
|
||||||
#include "nnue_architecture.h"
|
#include "nnue_architecture.h"
|
||||||
|
|
||||||
#include "../misc.h"
|
|
||||||
|
|
||||||
#include <cstring> // std::memset()
|
#include <cstring> // std::memset()
|
||||||
|
|
||||||
namespace Stockfish::Eval::NNUE {
|
namespace Stockfish::Eval::NNUE {
|
||||||
|
@ -150,23 +148,21 @@ namespace Stockfish::Eval::NNUE {
|
||||||
|
|
||||||
// Read network parameters
|
// Read network parameters
|
||||||
bool read_parameters(std::istream& stream) {
|
bool read_parameters(std::istream& stream) {
|
||||||
for (std::size_t i = 0; i < HalfDimensions; ++i)
|
|
||||||
biases[i] = read_little_endian<BiasType>(stream);
|
read_little_endian<BiasType >(stream, biases , HalfDimensions );
|
||||||
for (std::size_t i = 0; i < HalfDimensions * InputDimensions; ++i)
|
read_little_endian<WeightType >(stream, weights , HalfDimensions * InputDimensions);
|
||||||
weights[i] = read_little_endian<WeightType>(stream);
|
read_little_endian<PSQTWeightType>(stream, psqtWeights, PSQTBuckets * InputDimensions);
|
||||||
for (std::size_t i = 0; i < PSQTBuckets * InputDimensions; ++i)
|
|
||||||
psqtWeights[i] = read_little_endian<PSQTWeightType>(stream);
|
|
||||||
return !stream.fail();
|
return !stream.fail();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Write network parameters
|
// Write network parameters
|
||||||
bool write_parameters(std::ostream& stream) const {
|
bool write_parameters(std::ostream& stream) const {
|
||||||
for (std::size_t i = 0; i < HalfDimensions; ++i)
|
|
||||||
write_little_endian<BiasType>(stream, biases[i]);
|
write_little_endian<BiasType >(stream, biases , HalfDimensions );
|
||||||
for (std::size_t i = 0; i < HalfDimensions * InputDimensions; ++i)
|
write_little_endian<WeightType >(stream, weights , HalfDimensions * InputDimensions);
|
||||||
write_little_endian<WeightType>(stream, weights[i]);
|
write_little_endian<PSQTWeightType>(stream, psqtWeights, PSQTBuckets * InputDimensions);
|
||||||
for (std::size_t i = 0; i < PSQTBuckets * InputDimensions; ++i)
|
|
||||||
write_little_endian<PSQTWeightType>(stream, psqtWeights[i]);
|
|
||||||
return !stream.fail();
|
return !stream.fail();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -105,9 +105,6 @@ template<> inline void swap_endian<uint8_t>(uint8_t&) {}
|
||||||
|
|
||||||
template<typename T, int LE> T number(void* addr)
|
template<typename T, int LE> T number(void* addr)
|
||||||
{
|
{
|
||||||
static const union { uint32_t i; char c[4]; } Le = { 0x01020304 };
|
|
||||||
static const bool IsLittleEndian = (Le.c[0] == 4);
|
|
||||||
|
|
||||||
T v;
|
T v;
|
||||||
|
|
||||||
if ((uintptr_t)addr & (alignof(T) - 1)) // Unaligned pointer (very rare)
|
if ((uintptr_t)addr & (alignof(T) - 1)) // Unaligned pointer (very rare)
|
||||||
|
|
Loading…
Add table
Reference in a new issue