mirror of
https://github.com/sockspls/badfish
synced 2025-05-01 01:03:09 +00:00
Optimize AVX2 path in NNUE evaluation
always selecting AffineTransform specialization for small inputs. A related patch was tested as Initially tested as a simplification STC https://tests.stockfishchess.org/tests/view/6317c3f437f41b13973d6dff LLR: 2.95 (-2.94,2.94) <-1.75,0.25> Total: 58072 W: 15619 L: 15425 D: 27028 Ptnml(0-2): 241, 6191, 15992, 6357, 255 Elo gain speedup test STC https://tests.stockfishchess.org/tests/view/63181c1b37f41b13973d79dc LLR: 2.94 (-2.94,2.94) <0.00,2.00> Total: 184496 W: 49922 L: 49401 D: 85173 Ptnml(0-2): 851, 19397, 51208, 19964, 828 and this patch gained in testing speedup = +0.0071 P(speedup > 0) = 1.0000 on CPU: 16 x AMD Ryzen 9 3950X closes https://github.com/official-stockfish/Stockfish/pull/4158 No functional change
This commit is contained in:
parent
1591e5ac3b
commit
82bb21dc7a
2 changed files with 11 additions and 5 deletions
|
@ -25,7 +25,7 @@
|
|||
#include <algorithm>
|
||||
#include <type_traits>
|
||||
#include "../nnue_common.h"
|
||||
#include "../../simd.h"
|
||||
#include "simd.h"
|
||||
|
||||
/*
|
||||
This file contains the definition for a fully connected layer (aka affine transform).
|
||||
|
@ -151,9 +151,15 @@ namespace Stockfish::Eval::NNUE::Layers {
|
|||
template <IndexType InDims, IndexType OutDims, typename Enabled = void>
|
||||
class AffineTransform;
|
||||
|
||||
#if defined (USE_AVX512)
|
||||
constexpr IndexType LargeInputSize = 2 * 64;
|
||||
#else
|
||||
constexpr IndexType LargeInputSize = std::numeric_limits<IndexType>::max();
|
||||
#endif
|
||||
|
||||
// A specialization for large inputs.
|
||||
template <IndexType InDims, IndexType OutDims>
|
||||
class AffineTransform<InDims, OutDims, std::enable_if_t<(ceil_to_multiple<IndexType>(InDims, MaxSimdWidth) >= 2*64)>> {
|
||||
class AffineTransform<InDims, OutDims, std::enable_if_t<(ceil_to_multiple<IndexType>(InDims, MaxSimdWidth) >= LargeInputSize)>> {
|
||||
public:
|
||||
// Input/output type
|
||||
using InputType = std::uint8_t;
|
||||
|
@ -170,7 +176,7 @@ namespace Stockfish::Eval::NNUE::Layers {
|
|||
|
||||
using OutputBuffer = OutputType[PaddedOutputDimensions];
|
||||
|
||||
static_assert(PaddedInputDimensions >= 128, "Something went wrong. This specialization should not have been chosen.");
|
||||
static_assert(PaddedInputDimensions >= LargeInputSize, "Something went wrong. This specialization should not have been chosen.");
|
||||
|
||||
#if defined (USE_AVX512)
|
||||
static constexpr const IndexType InputSimdWidth = 64;
|
||||
|
@ -369,7 +375,7 @@ namespace Stockfish::Eval::NNUE::Layers {
|
|||
};
|
||||
|
||||
template <IndexType InDims, IndexType OutDims>
|
||||
class AffineTransform<InDims, OutDims, std::enable_if_t<(ceil_to_multiple<IndexType>(InDims, MaxSimdWidth) < 2*64)>> {
|
||||
class AffineTransform<InDims, OutDims, std::enable_if_t<(ceil_to_multiple<IndexType>(InDims, MaxSimdWidth) < LargeInputSize)>> {
|
||||
public:
|
||||
// Input/output type
|
||||
// Input/output type
|
||||
|
@ -387,7 +393,7 @@ namespace Stockfish::Eval::NNUE::Layers {
|
|||
|
||||
using OutputBuffer = OutputType[PaddedOutputDimensions];
|
||||
|
||||
static_assert(PaddedInputDimensions < 128, "Something went wrong. This specialization should not have been chosen.");
|
||||
static_assert(PaddedInputDimensions < LargeInputSize, "Something went wrong. This specialization should not have been chosen.");
|
||||
|
||||
#if defined (USE_SSSE3)
|
||||
static constexpr const IndexType OutputSimdWidth = SimdWidth / 4;
|
||||
|
|
Loading…
Add table
Reference in a new issue