1
0
Fork 0
mirror of https://github.com/sockspls/badfish synced 2025-05-02 09:39:36 +00:00

Fix for MSVC compilation.

MSVC needs two more explicit casts to compile new affine_transform_sparse_input.
See https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castsi256_ps
and https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castsi128_ps

closes https://github.com/official-stockfish/Stockfish/pull/4616

No functional change
This commit is contained in:
Andreas Matthies 2023-06-13 06:24:04 +02:00 committed by Joost VandeVondele
parent 92c949e12e
commit 7922e07af8
2 changed files with 3 additions and 2 deletions

View file

@ -19,6 +19,7 @@ Alexander Kure
Alexander Pagel (Lolligerhans) Alexander Pagel (Lolligerhans)
Alfredo Menezes (lonfom169) Alfredo Menezes (lonfom169)
Ali AlZhrani (Cooffe) Ali AlZhrani (Cooffe)
Andreas Matthies (Matthies)
Andrei Vetrov (proukornew) Andrei Vetrov (proukornew)
Andrew Grant (AndyGrant) Andrew Grant (AndyGrant)
Andrey Neporada (nepal) Andrey Neporada (nepal)

View file

@ -98,10 +98,10 @@ namespace Stockfish::Eval::NNUE::Layers {
#define vec_nnz(a) _mm512_cmpgt_epi32_mask(a, _mm512_setzero_si512()) #define vec_nnz(a) _mm512_cmpgt_epi32_mask(a, _mm512_setzero_si512())
#elif defined (USE_AVX2) #elif defined (USE_AVX2)
using vec_t = __m256i; using vec_t = __m256i;
#define vec_nnz(a) _mm256_movemask_ps((__m256)_mm256_cmpgt_epi32(a, _mm256_setzero_si256())) #define vec_nnz(a) _mm256_movemask_ps(_mm256_castsi256_ps(_mm256_cmpgt_epi32(a, _mm256_setzero_si256())))
#elif defined (USE_SSSE3) #elif defined (USE_SSSE3)
using vec_t = __m128i; using vec_t = __m128i;
#define vec_nnz(a) _mm_movemask_ps((__m128)_mm_cmpgt_epi32(a, _mm_setzero_si128())) #define vec_nnz(a) _mm_movemask_ps(_mm_castsi128_ps(_mm_cmpgt_epi32(a, _mm_setzero_si128())))
#endif #endif
constexpr IndexType InputSimdWidth = sizeof(vec_t) / sizeof(std::int32_t); constexpr IndexType InputSimdWidth = sizeof(vec_t) / sizeof(std::int32_t);
// Inputs are processed InputSimdWidth at a time and outputs are processed 8 at a time so we process in chunks of max(InputSimdWidth, 8) // Inputs are processed InputSimdWidth at a time and outputs are processed 8 at a time so we process in chunks of max(InputSimdWidth, 8)