1
0
Fork 0
mirror of https://github.com/sockspls/badfish synced 2025-04-29 16:23:09 +00:00

Optimize pop_1st_bit() take 2

This time we use MSVC intrinsics that are
C wrappers for Intel assembler 'bsf' instruction.

The speed up in node count is around 3%, probably
it does not worth the effort. Anyway this patch
can be useful at least for documentation purposes.

This optimization covers 32 bit systems only.

Signed-off-by: Marco Costalba <mcostalba@gmail.com>
This commit is contained in:
Marco Costalba 2008-09-21 22:13:03 +01:00
parent 01dd46a309
commit 2a3ebc884e

View file

@ -21,6 +21,16 @@
//// Includes
////
#ifdef _MSC_VER
#include <intrin.h>
#ifdef _WIN64
#pragma intrinsic(_BitScanForward64)
#else
#pragma intrinsic(_BitScanForward)
#endif
#define USING_INTRINSICS
#endif
#include <iostream>
#include "bitboard.h"
@ -339,20 +349,30 @@ Square first_1(Bitboard b) {
/// pop_1st_bit() finds and clears the least significant nonzero bit in a
/// nonzero bitboard.
#if defined(USE_32BIT_ATTACKS) && defined(_WIN32)
#if defined(USE_32BIT_ATTACKS) && defined(_MSC_VER)
Square pop_1st_bit(Bitboard *bb) {
// On 32bit system compiled with MSVC this verion seems
// slightly faster then the standard one.
uint32_t a = uint32_t(*bb);
uint32_t* ptr = a ? (uint32_t*)bb : (uint32_t*)bb + 1; // Little endian only?
uint32_t b = a ? a : *ptr;
uint32_t c = ~(b ^ (b - 1));
Square pop_1st_bit(Bitboard *b) {
*ptr = b & c; // clear the bit
if (a)
c = ~c;
unsigned long index;
uint32_t *l, *h;
return Square(BitTable[(c * 0x783a9b23) >> 26]);
if (*(l = (uint32_t*)b) != 0)
{
_BitScanForward(&index, *l);
*l &= ~(1 << index);
}
else if (*(h = (uint32_t*)b + 1) != 0)
{
_BitScanForward(&index, *h);
*h &= ~(1 << index);
index += 32;
} else
return SQ_NONE;
return Square(index);
}
#else