Fix AVX512 build with older compilers

avoids an intrinsic that is missing in gcc < 10. For this target, might trigger another gcc bug on windows that requires up-to-date gcc 8, 9, or 10, or usage of clang. Fixes https://github.com/official-stockfish/Stockfish/issues/2975 closes https://github.com/official-stockfish/Stockfish/pull/2976 No functional change
2025-04-30 00:33:09 +00:00 · 2020-08-10 12:52:46 -07:00 · 2020-08-10 12:52:46 -07:00 · f46c73040c
commit f46c73040c
parent 399cddf444
2 changed files with 2 additions and 3 deletions
--- a/src/Makefile
+++ b/src/Makefile
@ -416,7 +416,7 @@ endif
 ifeq ($(avx512),yes)
 	CXXFLAGS += -DUSE_AVX512
 	ifeq ($(comp),$(filter $(comp),gcc clang mingw))
-		CXXFLAGS += -mavx512bw
+		CXXFLAGS += -mavx512f -mavx512bw
 	endif
 endif

--- a/src/nnue/layers/affine_transform.h
+++ b/src/nnue/layers/affine_transform.h
@ -126,8 +126,7 @@ namespace Eval::NNUE::Layers {
            const auto iv256  = reinterpret_cast<const __m256i*>(&input_vector[kNumChunks]);
            const auto row256 = reinterpret_cast<const __m256i*>(&row[kNumChunks]);
            __m256i product256 = _mm256_maddubs_epi16(_mm256_loadA_si256(&iv256[0]), _mm256_load_si256(&row256[0]));
-            product256 = _mm256_madd_epi16(product256, _mm256_set1_epi16(1));
-            sum = _mm512_add_epi32(sum, _mm512_zextsi256_si512(product256));
+            sum = _mm512_add_epi32(sum, _mm512_cvtepi16_epi32(product256));
        }
        output[i] = _mm512_reduce_add_epi32(sum) + biases_[i];