diff --git a/src/Makefile b/src/Makefile index fd2618a4..e34fbf61 100644 --- a/src/Makefile +++ b/src/Makefile @@ -416,7 +416,7 @@ endif ifeq ($(avx512),yes) CXXFLAGS += -DUSE_AVX512 ifeq ($(comp),$(filter $(comp),gcc clang mingw)) - CXXFLAGS += -mavx512bw + CXXFLAGS += -mavx512f -mavx512bw endif endif diff --git a/src/nnue/layers/affine_transform.h b/src/nnue/layers/affine_transform.h index 985ee71a..8d2acd18 100644 --- a/src/nnue/layers/affine_transform.h +++ b/src/nnue/layers/affine_transform.h @@ -126,8 +126,7 @@ namespace Eval::NNUE::Layers { const auto iv256 = reinterpret_cast(&input_vector[kNumChunks]); const auto row256 = reinterpret_cast(&row[kNumChunks]); __m256i product256 = _mm256_maddubs_epi16(_mm256_loadA_si256(&iv256[0]), _mm256_load_si256(&row256[0])); - product256 = _mm256_madd_epi16(product256, _mm256_set1_epi16(1)); - sum = _mm512_add_epi32(sum, _mm512_zextsi256_si512(product256)); + sum = _mm512_add_epi32(sum, _mm512_cvtepi16_epi32(product256)); } output[i] = _mm512_reduce_add_epi32(sum) + biases_[i];