More simplifications and cleanup in affine_transform_sparse_input.h

closes https://github.com/official-stockfish/Stockfish/pull/4677 No functional change
2025-05-01 01:03:09 +00:00 · 2023-07-11 22:19:48 -07:00 · 2023-07-11 22:19:48 -07:00 · 529d3be8e2
commit 529d3be8e2
parent f972947492
1 changed files with 10 additions and 34 deletions
--- a/src/nnue/layers/affine_transform_sparse_input.h
+++ b/src/nnue/layers/affine_transform_sparse_input.h
@ -34,43 +34,15 @@
 */
 namespace Stockfish::Eval::NNUE::Layers {
 #if defined(__GNUC__)  // GCC, Clang, ICC
  static inline IndexType lsb_(std::uint32_t b) {
    assert(b);
    return IndexType(__builtin_ctzl(b));
  }
 #elif defined(_MSC_VER)  // MSVC
  static inline IndexType lsb_(std::uint32_t b) {
    assert(b);
    unsigned long idx;
    _BitScanForward(&idx, b);
    return (IndexType) idx;
  }
 #else  // Compiler is neither GCC nor MSVC compatible
 #error "Compiler not supported."
 #endif
 #if defined(USE_SSSE3)
  alignas(CacheLineSize) static inline const std::array<std::array<std::uint16_t, 8>, 256> lookup_indices = [](){
    std::array<std::array<std::uint16_t, 8>, 256> v{};
-    for (int i = 0; i < 256; ++i)
+    for (unsigned i = 0; i < 256; ++i)
    {
-      int j = i;
+      std::uint64_t j = i, k = 0;
      int k = 0;
      while(j)
-      {
+        v[i][k++] = pop_lsb(j);
        const IndexType lsbIndex = lsb_(std::uint32_t(j));
        j &= j - 1;
        v[i][k] = lsbIndex;
        ++k;
      }
    }
    return v;
  }();
@ -83,7 +55,11 @@ namespace Stockfish::Eval::NNUE::Layers {
    #define vec_nnz(a) _mm512_cmpgt_epi32_mask(a, _mm512_setzero_si512())
 #elif defined (USE_AVX2)
    using vec_t = __m256i;
-    #define vec_nnz(a) _mm256_movemask_ps(_mm256_castsi256_ps(_mm256_cmpgt_epi32(a, _mm256_setzero_si256())))
+    #if defined(USE_VNNI) && !defined(USE_AVXVNNI)
        #define vec_nnz(a) _mm256_cmpgt_epi32_mask(a, _mm256_setzero_si256())
    #else
        #define vec_nnz(a) _mm256_movemask_ps(_mm256_castsi256_ps(_mm256_cmpgt_epi32(a, _mm256_setzero_si256())))
    #endif
 #elif defined (USE_SSSE3)
    using vec_t = __m128i;
    #define vec_nnz(a) _mm_movemask_ps(_mm_castsi128_ps(_mm_cmpgt_epi32(a, _mm_setzero_si128())))
@ -97,8 +73,8 @@ namespace Stockfish::Eval::NNUE::Layers {
    const auto inputVector = reinterpret_cast<const vec_t*>(input);
    IndexType count = 0;
-    __m128i base = _mm_set1_epi16(0);
+    __m128i base = _mm_setzero_si128();
-    __m128i increment = _mm_set1_epi16(8);
+    const __m128i increment = _mm_set1_epi16(8);
    for (IndexType i = 0; i < NumChunks; ++i)
    {
      // bitmask of nonzero values in this chunk