1
0
Fork 0
mirror of https://github.com/sockspls/badfish synced 2025-07-11 19:49:14 +00:00

Optimize pop_1st_bit() on 32 bits x86

Operations on 64 bits Bitboard types are slow
on x86 compiled with gcc, so optimize this case.

BTW profiling shows that pop_1st_bit() is a
veeery performance critical path!

Signed-off-by: Marco Costalba <mcostalba@gmail.com>
This commit is contained in:
Marco Costalba 2008-09-18 16:09:19 +02:00
parent 95ce27f926
commit 9ae2b69235

View file

@ -6,12 +6,12 @@
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or the Free Software Foundation, either version 3 of the License, or
(at your option) any later version. (at your option) any later version.
Glaurung is distributed in the hope that it will be useful, Glaurung is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details. GNU General Public License for more details.
You should have received a copy of the GNU General Public License You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
@ -38,7 +38,7 @@ const Bitboard FileBB[8] = {
}; };
const Bitboard NeighboringFilesBB[8] = { const Bitboard NeighboringFilesBB[8] = {
FileBBB, FileABB|FileCBB, FileBBB|FileDBB, FileCBB|FileEBB, FileBBB, FileABB|FileCBB, FileBBB|FileDBB, FileCBB|FileEBB,
FileDBB|FileFBB, FileEBB|FileGBB, FileFBB|FileHBB, FileGBB FileDBB|FileFBB, FileEBB|FileGBB, FileFBB|FileHBB, FileGBB
}; };
@ -47,7 +47,7 @@ const Bitboard ThisAndNeighboringFilesBB[8] = {
FileBBB|FileCBB|FileDBB, FileCBB|FileDBB|FileEBB, FileBBB|FileCBB|FileDBB, FileCBB|FileDBB|FileEBB,
FileDBB|FileEBB|FileFBB, FileEBB|FileFBB|FileGBB, FileDBB|FileEBB|FileFBB, FileEBB|FileFBB|FileGBB,
FileFBB|FileGBB|FileHBB, FileGBB|FileHBB FileFBB|FileGBB|FileHBB, FileGBB|FileHBB
}; };
const Bitboard RankBB[8] = { const Bitboard RankBB[8] = {
Rank1BB, Rank2BB, Rank3BB, Rank4BB, Rank5BB, Rank6BB, Rank7BB, Rank8BB Rank1BB, Rank2BB, Rank3BB, Rank4BB, Rank5BB, Rank6BB, Rank7BB, Rank8BB
@ -126,27 +126,27 @@ const int RShift[64] = {
#else // if defined(USE_32BIT_ATTACKS) #else // if defined(USE_32BIT_ATTACKS)
const uint64_t RMult[64] = { const uint64_t RMult[64] = {
0xa8002c000108020ULL, 0x4440200140003000ULL, 0x8080200010011880ULL, 0xa8002c000108020ULL, 0x4440200140003000ULL, 0x8080200010011880ULL,
0x380180080141000ULL, 0x1a00060008211044ULL, 0x410001000a0c0008ULL, 0x380180080141000ULL, 0x1a00060008211044ULL, 0x410001000a0c0008ULL,
0x9500060004008100ULL, 0x100024284a20700ULL, 0x802140008000ULL, 0x9500060004008100ULL, 0x100024284a20700ULL, 0x802140008000ULL,
0x80c01002a00840ULL, 0x402004282011020ULL, 0x9862000820420050ULL, 0x80c01002a00840ULL, 0x402004282011020ULL, 0x9862000820420050ULL,
0x1001448011100ULL, 0x6432800200800400ULL, 0x40100010002000cULL, 0x1001448011100ULL, 0x6432800200800400ULL, 0x40100010002000cULL,
0x2800d0010c080ULL, 0x90c0008000803042ULL, 0x4010004000200041ULL, 0x2800d0010c080ULL, 0x90c0008000803042ULL, 0x4010004000200041ULL,
0x3010010200040ULL, 0xa40828028001000ULL, 0x123010008000430ULL, 0x3010010200040ULL, 0xa40828028001000ULL, 0x123010008000430ULL,
0x24008004020080ULL, 0x60040001104802ULL, 0x582200028400d1ULL, 0x24008004020080ULL, 0x60040001104802ULL, 0x582200028400d1ULL,
0x4000802080044000ULL, 0x408208200420308ULL, 0x610038080102000ULL, 0x4000802080044000ULL, 0x408208200420308ULL, 0x610038080102000ULL,
0x3601000900100020ULL, 0x80080040180ULL, 0xc2020080040080ULL, 0x3601000900100020ULL, 0x80080040180ULL, 0xc2020080040080ULL,
0x80084400100102ULL, 0x4022408200014401ULL, 0x40052040800082ULL, 0x80084400100102ULL, 0x4022408200014401ULL, 0x40052040800082ULL,
0xb08200280804000ULL, 0x8a80a008801000ULL, 0x4000480080801000ULL, 0xb08200280804000ULL, 0x8a80a008801000ULL, 0x4000480080801000ULL,
0x911808800801401ULL, 0x822a003002001894ULL, 0x401068091400108aULL, 0x911808800801401ULL, 0x822a003002001894ULL, 0x401068091400108aULL,
0x4a10a00004cULL, 0x2000800640008024ULL, 0x1486408102020020ULL, 0x4a10a00004cULL, 0x2000800640008024ULL, 0x1486408102020020ULL,
0x100a000d50041ULL, 0x810050020b0020ULL, 0x204000800808004ULL, 0x100a000d50041ULL, 0x810050020b0020ULL, 0x204000800808004ULL,
0x20048100a000cULL, 0x112000831020004ULL, 0x9000040810002ULL, 0x20048100a000cULL, 0x112000831020004ULL, 0x9000040810002ULL,
0x440490200208200ULL, 0x8910401000200040ULL, 0x6404200050008480ULL, 0x440490200208200ULL, 0x8910401000200040ULL, 0x6404200050008480ULL,
0x4b824a2010010100ULL, 0x4080801810c0080ULL, 0x400802a0080ULL, 0x4b824a2010010100ULL, 0x4080801810c0080ULL, 0x400802a0080ULL,
0x8224080110026400ULL, 0x40002c4104088200ULL, 0x1002100104a0282ULL, 0x8224080110026400ULL, 0x40002c4104088200ULL, 0x1002100104a0282ULL,
0x1208400811048021ULL, 0x3201014a40d02001ULL, 0x5100019200501ULL, 0x1208400811048021ULL, 0x3201014a40d02001ULL, 0x5100019200501ULL,
0x101000208001005ULL, 0x2008450080702ULL, 0x1002080301d00cULL, 0x101000208001005ULL, 0x2008450080702ULL, 0x1002080301d00cULL,
0x410201ce5c030092ULL 0x410201ce5c030092ULL
}; };
@ -190,7 +190,7 @@ const uint64_t BMult[64] = {
0x881c7c67fcbfc4f6ULL, 0x47ca41e7e440d423ULL, 0xeb0c88112048d004ULL, 0x881c7c67fcbfc4f6ULL, 0x47ca41e7e440d423ULL, 0xeb0c88112048d004ULL,
0x51c60e04359aef1aULL, 0x1aa1fe0e957a5554ULL, 0xdd9448db4f5e3104ULL, 0x51c60e04359aef1aULL, 0x1aa1fe0e957a5554ULL, 0xdd9448db4f5e3104ULL,
0xdc01f6dca4bebbdcULL, 0xdc01f6dca4bebbdcULL,
}; };
const int BShift[64] = { const int BShift[64] = {
26, 27, 27, 27, 27, 27, 27, 26, 27, 27, 27, 27, 27, 27, 27, 27, 26, 27, 27, 27, 27, 27, 27, 26, 27, 27, 27, 27, 27, 27, 27, 27,
@ -202,27 +202,27 @@ const int BShift[64] = {
#else // if defined(USE_32BIT_ATTACKS) #else // if defined(USE_32BIT_ATTACKS)
const uint64_t BMult[64] = { const uint64_t BMult[64] = {
0x440049104032280ULL, 0x1021023c82008040ULL, 0x404040082000048ULL, 0x440049104032280ULL, 0x1021023c82008040ULL, 0x404040082000048ULL,
0x48c4440084048090ULL, 0x2801104026490000ULL, 0x4100880442040800ULL, 0x48c4440084048090ULL, 0x2801104026490000ULL, 0x4100880442040800ULL,
0x181011002e06040ULL, 0x9101004104200e00ULL, 0x1240848848310401ULL, 0x181011002e06040ULL, 0x9101004104200e00ULL, 0x1240848848310401ULL,
0x2000142828050024ULL, 0x1004024d5000ULL, 0x102044400800200ULL, 0x2000142828050024ULL, 0x1004024d5000ULL, 0x102044400800200ULL,
0x8108108820112000ULL, 0xa880818210c00046ULL, 0x4008008801082000ULL, 0x8108108820112000ULL, 0xa880818210c00046ULL, 0x4008008801082000ULL,
0x60882404049400ULL, 0x104402004240810ULL, 0xa002084250200ULL, 0x60882404049400ULL, 0x104402004240810ULL, 0xa002084250200ULL,
0x100b0880801100ULL, 0x4080201220101ULL, 0x44008080a00000ULL, 0x100b0880801100ULL, 0x4080201220101ULL, 0x44008080a00000ULL,
0x202200842000ULL, 0x5006004882d00808ULL, 0x200045080802ULL, 0x202200842000ULL, 0x5006004882d00808ULL, 0x200045080802ULL,
0x86100020200601ULL, 0xa802080a20112c02ULL, 0x80411218080900ULL, 0x86100020200601ULL, 0xa802080a20112c02ULL, 0x80411218080900ULL,
0x200a0880080a0ULL, 0x9a01010000104000ULL, 0x28008003100080ULL, 0x200a0880080a0ULL, 0x9a01010000104000ULL, 0x28008003100080ULL,
0x211021004480417ULL, 0x401004188220806ULL, 0x825051400c2006ULL, 0x211021004480417ULL, 0x401004188220806ULL, 0x825051400c2006ULL,
0x140c0210943000ULL, 0x242800300080ULL, 0xc2208120080200ULL, 0x140c0210943000ULL, 0x242800300080ULL, 0xc2208120080200ULL,
0x2430008200002200ULL, 0x1010100112008040ULL, 0x8141050100020842ULL, 0x2430008200002200ULL, 0x1010100112008040ULL, 0x8141050100020842ULL,
0x822081014405ULL, 0x800c049e40400804ULL, 0x4a0404028a000820ULL, 0x822081014405ULL, 0x800c049e40400804ULL, 0x4a0404028a000820ULL,
0x22060201041200ULL, 0x360904200840801ULL, 0x881a08208800400ULL, 0x22060201041200ULL, 0x360904200840801ULL, 0x881a08208800400ULL,
0x60202c00400420ULL, 0x1204440086061400ULL, 0x8184042804040ULL, 0x60202c00400420ULL, 0x1204440086061400ULL, 0x8184042804040ULL,
0x64040315300400ULL, 0xc01008801090a00ULL, 0x808010401140c00ULL, 0x64040315300400ULL, 0xc01008801090a00ULL, 0x808010401140c00ULL,
0x4004830c2020040ULL, 0x80005002020054ULL, 0x40000c14481a0490ULL, 0x4004830c2020040ULL, 0x80005002020054ULL, 0x40000c14481a0490ULL,
0x10500101042048ULL, 0x1010100200424000ULL, 0x640901901040ULL, 0x10500101042048ULL, 0x1010100200424000ULL, 0x640901901040ULL,
0xa0201014840ULL, 0x840082aa011002ULL, 0x10010840084240aULL, 0xa0201014840ULL, 0x840082aa011002ULL, 0x10010840084240aULL,
0x420400810420608ULL, 0x8d40230408102100ULL, 0x4a00200612222409ULL, 0x420400810420608ULL, 0x8d40230408102100ULL, 0x4a00200612222409ULL,
0xa08520292120600ULL 0xa08520292120600ULL
}; };
@ -320,9 +320,9 @@ void init_bitboards() {
#if defined(USE_FOLDED_BITSCAN) #if defined(USE_FOLDED_BITSCAN)
static const int BitTable[64] = { static const int BitTable[64] = {
63, 30, 3, 32, 25, 41, 22, 33, 15, 50, 42, 13, 11, 53, 19, 34, 61, 29, 2, 63, 30, 3, 32, 25, 41, 22, 33, 15, 50, 42, 13, 11, 53, 19, 34, 61, 29, 2,
51, 21, 43, 45, 10, 18, 47, 1, 54, 9, 57, 0, 35, 62, 31, 40, 4, 49, 5, 52, 51, 21, 43, 45, 10, 18, 47, 1, 54, 9, 57, 0, 35, 62, 31, 40, 4, 49, 5, 52,
26, 60, 6, 23, 44, 46, 27, 56, 16, 7, 39, 48, 24, 59, 14, 12, 55, 38, 28, 26, 60, 6, 23, 44, 46, 27, 56, 16, 7, 39, 48, 24, 59, 14, 12, 55, 38, 28,
58, 20, 37, 17, 36, 8 58, 20, 37, 17, 36, 8
}; };
@ -339,6 +339,26 @@ Square first_1(Bitboard b) {
/// pop_1st_bit() finds and clears the least significant nonzero bit in a /// pop_1st_bit() finds and clears the least significant nonzero bit in a
/// nonzero bitboard. /// nonzero bitboard.
#if defined(USE_32BIT_ATTACKS)
Square pop_1st_bit(Bitboard *bb) {
uint32_t t = uint32_t(*bb);
uint32_t* p = t ? (uint32_t*)bb : (uint32_t*)bb + 1; // Little endian only?
uint32_t b = t ? t : *p;
*p = b & (b -1);
if (t)
b ^= (b - 1);
else
b = ~(b ^ (b - 1));
return Square(BitTable[(b * 0x783a9b23) >> 26]);
}
#else
Square pop_1st_bit(Bitboard *b) { Square pop_1st_bit(Bitboard *b) {
Bitboard bb = *b ^ (*b - 1); Bitboard bb = *b ^ (*b - 1);
uint32_t fold = int(bb) ^ int(bb >> 32); uint32_t fold = int(bb) ^ int(bb >> 32);
@ -346,6 +366,8 @@ Square pop_1st_bit(Bitboard *b) {
return Square(BitTable[(fold * 0x783a9b23) >> 26]); return Square(BitTable[(fold * 0x783a9b23) >> 26]);
} }
#endif
#else #else
static const int BitTable[64] = { static const int BitTable[64] = {
@ -369,7 +391,7 @@ Square first_1(Bitboard b) {
Square pop_1st_bit(Bitboard *b) { Square pop_1st_bit(Bitboard *b) {
Bitboard bb = *b; Bitboard bb = *b;
*b &= (*b - 1); *b &= (*b - 1);
return Square(BitTable[((bb & -bb) * 0x218a392cd3d5dbfULL) >> 58]); return Square(BitTable[((bb & -bb) * 0x218a392cd3d5dbfULL) >> 58]);
} }
#endif // defined(USE_FOLDED_BITSCAN) #endif // defined(USE_FOLDED_BITSCAN)
@ -417,7 +439,7 @@ namespace {
{-7,-9,0}, {17,15,10,6,-6,-10,-15,-17}, {9,7,-7,-9,0}, {8,1,-1,-8,0}, {-7,-9,0}, {17,15,10,6,-6,-10,-15,-17}, {9,7,-7,-9,0}, {8,1,-1,-8,0},
{9,7,-7,-9,8,1,-1,-8}, {9,7,-7,-9,8,1,-1,-8} {9,7,-7,-9,8,1,-1,-8}, {9,7,-7,-9,8,1,-1,-8}
}; };
for(i = 0; i < 64; i++) { for(i = 0; i < 64; i++) {
for(j = 0; j <= int(BK); j++) { for(j = 0; j <= int(BK); j++) {
StepAttackBB[j][i] = EmptyBoardBB; StepAttackBB[j][i] = EmptyBoardBB;
@ -483,14 +505,14 @@ namespace {
Bitboard b; Bitboard b;
for(i = 0; i < 64; i++) { for(i = 0; i < 64; i++) {
attackIndex[i] = index; attackIndex[i] = index;
mask[i] = sliding_attacks(i, 0ULL, 4, deltas, 1, 6, 1, 6); mask[i] = sliding_attacks(i, 0ULL, 4, deltas, 1, 6, 1, 6);
j = (1 << (64 - shift[i])); j = (1 << (64 - shift[i]));
for(k = 0; k < j; k++) { for(k = 0; k < j; k++) {
#if defined(USE_32BIT_ATTACKS) #if defined(USE_32BIT_ATTACKS)
b = index_to_bitboard(k, mask[i]); b = index_to_bitboard(k, mask[i]);
attacks[index + attacks[index +
(unsigned(int(b) * int(mult[i]) ^ (unsigned(int(b) * int(mult[i]) ^
int(b >> 32) * int(mult[i] >> 32)) int(b >> 32) * int(mult[i] >> 32))
>> shift[i])] = >> shift[i])] =
sliding_attacks(i, b, 4, deltas); sliding_attacks(i, b, 4, deltas);
#else #else
@ -502,7 +524,7 @@ namespace {
index += j; index += j;
} }
} }
void init_pseudo_attacks() { void init_pseudo_attacks() {
Square s; Square s;
@ -537,5 +559,5 @@ namespace {
} }
} }
#endif // defined(USE_COMPACT_ROOK_ATTACKS) #endif // defined(USE_COMPACT_ROOK_ATTACKS)
} }