mirror of
https://github.com/sockspls/badfish
synced 2025-05-01 01:03:09 +00:00
More simplifications and cleanup in affine_transform_sparse_input.h
closes https://github.com/official-stockfish/Stockfish/pull/4677 No functional change
This commit is contained in:
parent
f972947492
commit
529d3be8e2
1 changed files with 10 additions and 34 deletions
|
@ -34,43 +34,15 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
namespace Stockfish::Eval::NNUE::Layers {
|
namespace Stockfish::Eval::NNUE::Layers {
|
||||||
#if defined(__GNUC__) // GCC, Clang, ICC
|
|
||||||
|
|
||||||
static inline IndexType lsb_(std::uint32_t b) {
|
|
||||||
assert(b);
|
|
||||||
return IndexType(__builtin_ctzl(b));
|
|
||||||
}
|
|
||||||
|
|
||||||
#elif defined(_MSC_VER) // MSVC
|
|
||||||
|
|
||||||
static inline IndexType lsb_(std::uint32_t b) {
|
|
||||||
assert(b);
|
|
||||||
unsigned long idx;
|
|
||||||
_BitScanForward(&idx, b);
|
|
||||||
return (IndexType) idx;
|
|
||||||
}
|
|
||||||
|
|
||||||
#else // Compiler is neither GCC nor MSVC compatible
|
|
||||||
|
|
||||||
#error "Compiler not supported."
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
#if defined(USE_SSSE3)
|
#if defined(USE_SSSE3)
|
||||||
alignas(CacheLineSize) static inline const std::array<std::array<std::uint16_t, 8>, 256> lookup_indices = [](){
|
alignas(CacheLineSize) static inline const std::array<std::array<std::uint16_t, 8>, 256> lookup_indices = [](){
|
||||||
std::array<std::array<std::uint16_t, 8>, 256> v{};
|
std::array<std::array<std::uint16_t, 8>, 256> v{};
|
||||||
for (int i = 0; i < 256; ++i)
|
for (unsigned i = 0; i < 256; ++i)
|
||||||
{
|
{
|
||||||
int j = i;
|
std::uint64_t j = i, k = 0;
|
||||||
int k = 0;
|
|
||||||
while(j)
|
while(j)
|
||||||
{
|
v[i][k++] = pop_lsb(j);
|
||||||
const IndexType lsbIndex = lsb_(std::uint32_t(j));
|
|
||||||
j &= j - 1;
|
|
||||||
v[i][k] = lsbIndex;
|
|
||||||
++k;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return v;
|
return v;
|
||||||
}();
|
}();
|
||||||
|
@ -83,7 +55,11 @@ namespace Stockfish::Eval::NNUE::Layers {
|
||||||
#define vec_nnz(a) _mm512_cmpgt_epi32_mask(a, _mm512_setzero_si512())
|
#define vec_nnz(a) _mm512_cmpgt_epi32_mask(a, _mm512_setzero_si512())
|
||||||
#elif defined (USE_AVX2)
|
#elif defined (USE_AVX2)
|
||||||
using vec_t = __m256i;
|
using vec_t = __m256i;
|
||||||
#define vec_nnz(a) _mm256_movemask_ps(_mm256_castsi256_ps(_mm256_cmpgt_epi32(a, _mm256_setzero_si256())))
|
#if defined(USE_VNNI) && !defined(USE_AVXVNNI)
|
||||||
|
#define vec_nnz(a) _mm256_cmpgt_epi32_mask(a, _mm256_setzero_si256())
|
||||||
|
#else
|
||||||
|
#define vec_nnz(a) _mm256_movemask_ps(_mm256_castsi256_ps(_mm256_cmpgt_epi32(a, _mm256_setzero_si256())))
|
||||||
|
#endif
|
||||||
#elif defined (USE_SSSE3)
|
#elif defined (USE_SSSE3)
|
||||||
using vec_t = __m128i;
|
using vec_t = __m128i;
|
||||||
#define vec_nnz(a) _mm_movemask_ps(_mm_castsi128_ps(_mm_cmpgt_epi32(a, _mm_setzero_si128())))
|
#define vec_nnz(a) _mm_movemask_ps(_mm_castsi128_ps(_mm_cmpgt_epi32(a, _mm_setzero_si128())))
|
||||||
|
@ -97,8 +73,8 @@ namespace Stockfish::Eval::NNUE::Layers {
|
||||||
|
|
||||||
const auto inputVector = reinterpret_cast<const vec_t*>(input);
|
const auto inputVector = reinterpret_cast<const vec_t*>(input);
|
||||||
IndexType count = 0;
|
IndexType count = 0;
|
||||||
__m128i base = _mm_set1_epi16(0);
|
__m128i base = _mm_setzero_si128();
|
||||||
__m128i increment = _mm_set1_epi16(8);
|
const __m128i increment = _mm_set1_epi16(8);
|
||||||
for (IndexType i = 0; i < NumChunks; ++i)
|
for (IndexType i = 0; i < NumChunks; ++i)
|
||||||
{
|
{
|
||||||
// bitmask of nonzero values in this chunk
|
// bitmask of nonzero values in this chunk
|
||||||
|
|
Loading…
Add table
Reference in a new issue