1
0
Fork 0
mirror of https://github.com/sockspls/badfish synced 2025-05-01 01:03:09 +00:00

More simplifications and cleanup in affine_transform_sparse_input.h

closes https://github.com/official-stockfish/Stockfish/pull/4677

No functional change
This commit is contained in:
mstembera 2023-07-11 22:19:48 -07:00 committed by Joost VandeVondele
parent f972947492
commit 529d3be8e2

View file

@ -34,43 +34,15 @@
*/ */
namespace Stockfish::Eval::NNUE::Layers { namespace Stockfish::Eval::NNUE::Layers {
#if defined(__GNUC__) // GCC, Clang, ICC
static inline IndexType lsb_(std::uint32_t b) {
assert(b);
return IndexType(__builtin_ctzl(b));
}
#elif defined(_MSC_VER) // MSVC
static inline IndexType lsb_(std::uint32_t b) {
assert(b);
unsigned long idx;
_BitScanForward(&idx, b);
return (IndexType) idx;
}
#else // Compiler is neither GCC nor MSVC compatible
#error "Compiler not supported."
#endif
#if defined(USE_SSSE3) #if defined(USE_SSSE3)
alignas(CacheLineSize) static inline const std::array<std::array<std::uint16_t, 8>, 256> lookup_indices = [](){ alignas(CacheLineSize) static inline const std::array<std::array<std::uint16_t, 8>, 256> lookup_indices = [](){
std::array<std::array<std::uint16_t, 8>, 256> v{}; std::array<std::array<std::uint16_t, 8>, 256> v{};
for (int i = 0; i < 256; ++i) for (unsigned i = 0; i < 256; ++i)
{ {
int j = i; std::uint64_t j = i, k = 0;
int k = 0;
while(j) while(j)
{ v[i][k++] = pop_lsb(j);
const IndexType lsbIndex = lsb_(std::uint32_t(j));
j &= j - 1;
v[i][k] = lsbIndex;
++k;
}
} }
return v; return v;
}(); }();
@ -83,7 +55,11 @@ namespace Stockfish::Eval::NNUE::Layers {
#define vec_nnz(a) _mm512_cmpgt_epi32_mask(a, _mm512_setzero_si512()) #define vec_nnz(a) _mm512_cmpgt_epi32_mask(a, _mm512_setzero_si512())
#elif defined (USE_AVX2) #elif defined (USE_AVX2)
using vec_t = __m256i; using vec_t = __m256i;
#define vec_nnz(a) _mm256_movemask_ps(_mm256_castsi256_ps(_mm256_cmpgt_epi32(a, _mm256_setzero_si256()))) #if defined(USE_VNNI) && !defined(USE_AVXVNNI)
#define vec_nnz(a) _mm256_cmpgt_epi32_mask(a, _mm256_setzero_si256())
#else
#define vec_nnz(a) _mm256_movemask_ps(_mm256_castsi256_ps(_mm256_cmpgt_epi32(a, _mm256_setzero_si256())))
#endif
#elif defined (USE_SSSE3) #elif defined (USE_SSSE3)
using vec_t = __m128i; using vec_t = __m128i;
#define vec_nnz(a) _mm_movemask_ps(_mm_castsi128_ps(_mm_cmpgt_epi32(a, _mm_setzero_si128()))) #define vec_nnz(a) _mm_movemask_ps(_mm_castsi128_ps(_mm_cmpgt_epi32(a, _mm_setzero_si128())))
@ -97,8 +73,8 @@ namespace Stockfish::Eval::NNUE::Layers {
const auto inputVector = reinterpret_cast<const vec_t*>(input); const auto inputVector = reinterpret_cast<const vec_t*>(input);
IndexType count = 0; IndexType count = 0;
__m128i base = _mm_set1_epi16(0); __m128i base = _mm_setzero_si128();
__m128i increment = _mm_set1_epi16(8); const __m128i increment = _mm_set1_epi16(8);
for (IndexType i = 0; i < NumChunks; ++i) for (IndexType i = 0; i < NumChunks; ++i)
{ {
// bitmask of nonzero values in this chunk // bitmask of nonzero values in this chunk