Fix for MSVC compilation.

MSVC needs two more explicit casts to compile new affine_transform_sparse_input. See https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castsi256_ps and https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castsi128_ps closes https://github.com/official-stockfish/Stockfish/pull/4616 No functional change
2025-04-30 08:43:09 +00:00 · 2023-06-13 06:24:04 +02:00 · 2023-06-13 06:24:04 +02:00 · 7922e07af8
commit 7922e07af8
parent 92c949e12e
2 changed files with 3 additions and 2 deletions
--- a/1
+++ b/1
@ -19,6 +19,7 @@ Alexander Kure
 Alexander Pagel (Lolligerhans)
 Alfredo Menezes (lonfom169)
 Ali AlZhrani (Cooffe)
+Andreas Matthies (Matthies)
 Andrei Vetrov (proukornew)
 Andrew Grant (AndyGrant)
 Andrey Neporada (nepal)
--- a/src/nnue/layers/affine_transform_sparse_input.h
+++ b/src/nnue/layers/affine_transform_sparse_input.h
@ -98,10 +98,10 @@ namespace Stockfish::Eval::NNUE::Layers {
    #define vec_nnz(a) _mm512_cmpgt_epi32_mask(a, _mm512_setzero_si512())
 #elif defined (USE_AVX2)
    using vec_t = __m256i;
-    #define vec_nnz(a) _mm256_movemask_ps((__m256)_mm256_cmpgt_epi32(a, _mm256_setzero_si256()))
+    #define vec_nnz(a) _mm256_movemask_ps(_mm256_castsi256_ps(_mm256_cmpgt_epi32(a, _mm256_setzero_si256())))
 #elif defined (USE_SSSE3)
    using vec_t = __m128i;
-    #define vec_nnz(a) _mm_movemask_ps((__m128)_mm_cmpgt_epi32(a, _mm_setzero_si128()))
+    #define vec_nnz(a) _mm_movemask_ps(_mm_castsi128_ps(_mm_cmpgt_epi32(a, _mm_setzero_si128())))
 #endif
    constexpr IndexType InputSimdWidth = sizeof(vec_t) / sizeof(std::int32_t);
    // Inputs are processed InputSimdWidth at a time and outputs are processed 8 at a time so we process in chunks of max(InputSimdWidth, 8)