mirror of
https://github.com/sockspls/badfish
synced 2025-04-29 16:23:09 +00:00
Optimize pop_1st_bit() on 32 bits x86
Operations on 64 bits Bitboard types are slow on x86 compiled with gcc, so optimize this case. BTW profiling shows that pop_1st_bit() is a veeery performance critical path! Signed-off-by: Marco Costalba <mcostalba@gmail.com>
This commit is contained in:
parent
95ce27f926
commit
9ae2b69235
1 changed files with 80 additions and 58 deletions
138
src/bitboard.cpp
138
src/bitboard.cpp
|
@ -6,12 +6,12 @@
|
|||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
|
||||
Glaurung is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
@ -38,7 +38,7 @@ const Bitboard FileBB[8] = {
|
|||
};
|
||||
|
||||
const Bitboard NeighboringFilesBB[8] = {
|
||||
FileBBB, FileABB|FileCBB, FileBBB|FileDBB, FileCBB|FileEBB,
|
||||
FileBBB, FileABB|FileCBB, FileBBB|FileDBB, FileCBB|FileEBB,
|
||||
FileDBB|FileFBB, FileEBB|FileGBB, FileFBB|FileHBB, FileGBB
|
||||
};
|
||||
|
||||
|
@ -47,7 +47,7 @@ const Bitboard ThisAndNeighboringFilesBB[8] = {
|
|||
FileBBB|FileCBB|FileDBB, FileCBB|FileDBB|FileEBB,
|
||||
FileDBB|FileEBB|FileFBB, FileEBB|FileFBB|FileGBB,
|
||||
FileFBB|FileGBB|FileHBB, FileGBB|FileHBB
|
||||
};
|
||||
};
|
||||
|
||||
const Bitboard RankBB[8] = {
|
||||
Rank1BB, Rank2BB, Rank3BB, Rank4BB, Rank5BB, Rank6BB, Rank7BB, Rank8BB
|
||||
|
@ -126,27 +126,27 @@ const int RShift[64] = {
|
|||
#else // if defined(USE_32BIT_ATTACKS)
|
||||
|
||||
const uint64_t RMult[64] = {
|
||||
0xa8002c000108020ULL, 0x4440200140003000ULL, 0x8080200010011880ULL,
|
||||
0x380180080141000ULL, 0x1a00060008211044ULL, 0x410001000a0c0008ULL,
|
||||
0x9500060004008100ULL, 0x100024284a20700ULL, 0x802140008000ULL,
|
||||
0x80c01002a00840ULL, 0x402004282011020ULL, 0x9862000820420050ULL,
|
||||
0x1001448011100ULL, 0x6432800200800400ULL, 0x40100010002000cULL,
|
||||
0x2800d0010c080ULL, 0x90c0008000803042ULL, 0x4010004000200041ULL,
|
||||
0x3010010200040ULL, 0xa40828028001000ULL, 0x123010008000430ULL,
|
||||
0x24008004020080ULL, 0x60040001104802ULL, 0x582200028400d1ULL,
|
||||
0x4000802080044000ULL, 0x408208200420308ULL, 0x610038080102000ULL,
|
||||
0x3601000900100020ULL, 0x80080040180ULL, 0xc2020080040080ULL,
|
||||
0x80084400100102ULL, 0x4022408200014401ULL, 0x40052040800082ULL,
|
||||
0xb08200280804000ULL, 0x8a80a008801000ULL, 0x4000480080801000ULL,
|
||||
0x911808800801401ULL, 0x822a003002001894ULL, 0x401068091400108aULL,
|
||||
0x4a10a00004cULL, 0x2000800640008024ULL, 0x1486408102020020ULL,
|
||||
0x100a000d50041ULL, 0x810050020b0020ULL, 0x204000800808004ULL,
|
||||
0x20048100a000cULL, 0x112000831020004ULL, 0x9000040810002ULL,
|
||||
0x440490200208200ULL, 0x8910401000200040ULL, 0x6404200050008480ULL,
|
||||
0x4b824a2010010100ULL, 0x4080801810c0080ULL, 0x400802a0080ULL,
|
||||
0x8224080110026400ULL, 0x40002c4104088200ULL, 0x1002100104a0282ULL,
|
||||
0x1208400811048021ULL, 0x3201014a40d02001ULL, 0x5100019200501ULL,
|
||||
0x101000208001005ULL, 0x2008450080702ULL, 0x1002080301d00cULL,
|
||||
0xa8002c000108020ULL, 0x4440200140003000ULL, 0x8080200010011880ULL,
|
||||
0x380180080141000ULL, 0x1a00060008211044ULL, 0x410001000a0c0008ULL,
|
||||
0x9500060004008100ULL, 0x100024284a20700ULL, 0x802140008000ULL,
|
||||
0x80c01002a00840ULL, 0x402004282011020ULL, 0x9862000820420050ULL,
|
||||
0x1001448011100ULL, 0x6432800200800400ULL, 0x40100010002000cULL,
|
||||
0x2800d0010c080ULL, 0x90c0008000803042ULL, 0x4010004000200041ULL,
|
||||
0x3010010200040ULL, 0xa40828028001000ULL, 0x123010008000430ULL,
|
||||
0x24008004020080ULL, 0x60040001104802ULL, 0x582200028400d1ULL,
|
||||
0x4000802080044000ULL, 0x408208200420308ULL, 0x610038080102000ULL,
|
||||
0x3601000900100020ULL, 0x80080040180ULL, 0xc2020080040080ULL,
|
||||
0x80084400100102ULL, 0x4022408200014401ULL, 0x40052040800082ULL,
|
||||
0xb08200280804000ULL, 0x8a80a008801000ULL, 0x4000480080801000ULL,
|
||||
0x911808800801401ULL, 0x822a003002001894ULL, 0x401068091400108aULL,
|
||||
0x4a10a00004cULL, 0x2000800640008024ULL, 0x1486408102020020ULL,
|
||||
0x100a000d50041ULL, 0x810050020b0020ULL, 0x204000800808004ULL,
|
||||
0x20048100a000cULL, 0x112000831020004ULL, 0x9000040810002ULL,
|
||||
0x440490200208200ULL, 0x8910401000200040ULL, 0x6404200050008480ULL,
|
||||
0x4b824a2010010100ULL, 0x4080801810c0080ULL, 0x400802a0080ULL,
|
||||
0x8224080110026400ULL, 0x40002c4104088200ULL, 0x1002100104a0282ULL,
|
||||
0x1208400811048021ULL, 0x3201014a40d02001ULL, 0x5100019200501ULL,
|
||||
0x101000208001005ULL, 0x2008450080702ULL, 0x1002080301d00cULL,
|
||||
0x410201ce5c030092ULL
|
||||
};
|
||||
|
||||
|
@ -190,7 +190,7 @@ const uint64_t BMult[64] = {
|
|||
0x881c7c67fcbfc4f6ULL, 0x47ca41e7e440d423ULL, 0xeb0c88112048d004ULL,
|
||||
0x51c60e04359aef1aULL, 0x1aa1fe0e957a5554ULL, 0xdd9448db4f5e3104ULL,
|
||||
0xdc01f6dca4bebbdcULL,
|
||||
};
|
||||
};
|
||||
|
||||
const int BShift[64] = {
|
||||
26, 27, 27, 27, 27, 27, 27, 26, 27, 27, 27, 27, 27, 27, 27, 27,
|
||||
|
@ -202,27 +202,27 @@ const int BShift[64] = {
|
|||
#else // if defined(USE_32BIT_ATTACKS)
|
||||
|
||||
const uint64_t BMult[64] = {
|
||||
0x440049104032280ULL, 0x1021023c82008040ULL, 0x404040082000048ULL,
|
||||
0x48c4440084048090ULL, 0x2801104026490000ULL, 0x4100880442040800ULL,
|
||||
0x181011002e06040ULL, 0x9101004104200e00ULL, 0x1240848848310401ULL,
|
||||
0x2000142828050024ULL, 0x1004024d5000ULL, 0x102044400800200ULL,
|
||||
0x8108108820112000ULL, 0xa880818210c00046ULL, 0x4008008801082000ULL,
|
||||
0x60882404049400ULL, 0x104402004240810ULL, 0xa002084250200ULL,
|
||||
0x100b0880801100ULL, 0x4080201220101ULL, 0x44008080a00000ULL,
|
||||
0x202200842000ULL, 0x5006004882d00808ULL, 0x200045080802ULL,
|
||||
0x86100020200601ULL, 0xa802080a20112c02ULL, 0x80411218080900ULL,
|
||||
0x200a0880080a0ULL, 0x9a01010000104000ULL, 0x28008003100080ULL,
|
||||
0x211021004480417ULL, 0x401004188220806ULL, 0x825051400c2006ULL,
|
||||
0x140c0210943000ULL, 0x242800300080ULL, 0xc2208120080200ULL,
|
||||
0x2430008200002200ULL, 0x1010100112008040ULL, 0x8141050100020842ULL,
|
||||
0x822081014405ULL, 0x800c049e40400804ULL, 0x4a0404028a000820ULL,
|
||||
0x22060201041200ULL, 0x360904200840801ULL, 0x881a08208800400ULL,
|
||||
0x60202c00400420ULL, 0x1204440086061400ULL, 0x8184042804040ULL,
|
||||
0x64040315300400ULL, 0xc01008801090a00ULL, 0x808010401140c00ULL,
|
||||
0x4004830c2020040ULL, 0x80005002020054ULL, 0x40000c14481a0490ULL,
|
||||
0x10500101042048ULL, 0x1010100200424000ULL, 0x640901901040ULL,
|
||||
0xa0201014840ULL, 0x840082aa011002ULL, 0x10010840084240aULL,
|
||||
0x420400810420608ULL, 0x8d40230408102100ULL, 0x4a00200612222409ULL,
|
||||
0x440049104032280ULL, 0x1021023c82008040ULL, 0x404040082000048ULL,
|
||||
0x48c4440084048090ULL, 0x2801104026490000ULL, 0x4100880442040800ULL,
|
||||
0x181011002e06040ULL, 0x9101004104200e00ULL, 0x1240848848310401ULL,
|
||||
0x2000142828050024ULL, 0x1004024d5000ULL, 0x102044400800200ULL,
|
||||
0x8108108820112000ULL, 0xa880818210c00046ULL, 0x4008008801082000ULL,
|
||||
0x60882404049400ULL, 0x104402004240810ULL, 0xa002084250200ULL,
|
||||
0x100b0880801100ULL, 0x4080201220101ULL, 0x44008080a00000ULL,
|
||||
0x202200842000ULL, 0x5006004882d00808ULL, 0x200045080802ULL,
|
||||
0x86100020200601ULL, 0xa802080a20112c02ULL, 0x80411218080900ULL,
|
||||
0x200a0880080a0ULL, 0x9a01010000104000ULL, 0x28008003100080ULL,
|
||||
0x211021004480417ULL, 0x401004188220806ULL, 0x825051400c2006ULL,
|
||||
0x140c0210943000ULL, 0x242800300080ULL, 0xc2208120080200ULL,
|
||||
0x2430008200002200ULL, 0x1010100112008040ULL, 0x8141050100020842ULL,
|
||||
0x822081014405ULL, 0x800c049e40400804ULL, 0x4a0404028a000820ULL,
|
||||
0x22060201041200ULL, 0x360904200840801ULL, 0x881a08208800400ULL,
|
||||
0x60202c00400420ULL, 0x1204440086061400ULL, 0x8184042804040ULL,
|
||||
0x64040315300400ULL, 0xc01008801090a00ULL, 0x808010401140c00ULL,
|
||||
0x4004830c2020040ULL, 0x80005002020054ULL, 0x40000c14481a0490ULL,
|
||||
0x10500101042048ULL, 0x1010100200424000ULL, 0x640901901040ULL,
|
||||
0xa0201014840ULL, 0x840082aa011002ULL, 0x10010840084240aULL,
|
||||
0x420400810420608ULL, 0x8d40230408102100ULL, 0x4a00200612222409ULL,
|
||||
0xa08520292120600ULL
|
||||
};
|
||||
|
||||
|
@ -320,9 +320,9 @@ void init_bitboards() {
|
|||
#if defined(USE_FOLDED_BITSCAN)
|
||||
|
||||
static const int BitTable[64] = {
|
||||
63, 30, 3, 32, 25, 41, 22, 33, 15, 50, 42, 13, 11, 53, 19, 34, 61, 29, 2,
|
||||
51, 21, 43, 45, 10, 18, 47, 1, 54, 9, 57, 0, 35, 62, 31, 40, 4, 49, 5, 52,
|
||||
26, 60, 6, 23, 44, 46, 27, 56, 16, 7, 39, 48, 24, 59, 14, 12, 55, 38, 28,
|
||||
63, 30, 3, 32, 25, 41, 22, 33, 15, 50, 42, 13, 11, 53, 19, 34, 61, 29, 2,
|
||||
51, 21, 43, 45, 10, 18, 47, 1, 54, 9, 57, 0, 35, 62, 31, 40, 4, 49, 5, 52,
|
||||
26, 60, 6, 23, 44, 46, 27, 56, 16, 7, 39, 48, 24, 59, 14, 12, 55, 38, 28,
|
||||
58, 20, 37, 17, 36, 8
|
||||
};
|
||||
|
||||
|
@ -339,6 +339,26 @@ Square first_1(Bitboard b) {
|
|||
/// pop_1st_bit() finds and clears the least significant nonzero bit in a
|
||||
/// nonzero bitboard.
|
||||
|
||||
#if defined(USE_32BIT_ATTACKS)
|
||||
|
||||
Square pop_1st_bit(Bitboard *bb) {
|
||||
|
||||
uint32_t t = uint32_t(*bb);
|
||||
uint32_t* p = t ? (uint32_t*)bb : (uint32_t*)bb + 1; // Little endian only?
|
||||
uint32_t b = t ? t : *p;
|
||||
|
||||
*p = b & (b -1);
|
||||
|
||||
if (t)
|
||||
b ^= (b - 1);
|
||||
else
|
||||
b = ~(b ^ (b - 1));
|
||||
|
||||
return Square(BitTable[(b * 0x783a9b23) >> 26]);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
Square pop_1st_bit(Bitboard *b) {
|
||||
Bitboard bb = *b ^ (*b - 1);
|
||||
uint32_t fold = int(bb) ^ int(bb >> 32);
|
||||
|
@ -346,6 +366,8 @@ Square pop_1st_bit(Bitboard *b) {
|
|||
return Square(BitTable[(fold * 0x783a9b23) >> 26]);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
static const int BitTable[64] = {
|
||||
|
@ -369,7 +391,7 @@ Square first_1(Bitboard b) {
|
|||
Square pop_1st_bit(Bitboard *b) {
|
||||
Bitboard bb = *b;
|
||||
*b &= (*b - 1);
|
||||
return Square(BitTable[((bb & -bb) * 0x218a392cd3d5dbfULL) >> 58]);
|
||||
return Square(BitTable[((bb & -bb) * 0x218a392cd3d5dbfULL) >> 58]);
|
||||
}
|
||||
|
||||
#endif // defined(USE_FOLDED_BITSCAN)
|
||||
|
@ -417,7 +439,7 @@ namespace {
|
|||
{-7,-9,0}, {17,15,10,6,-6,-10,-15,-17}, {9,7,-7,-9,0}, {8,1,-1,-8,0},
|
||||
{9,7,-7,-9,8,1,-1,-8}, {9,7,-7,-9,8,1,-1,-8}
|
||||
};
|
||||
|
||||
|
||||
for(i = 0; i < 64; i++) {
|
||||
for(j = 0; j <= int(BK); j++) {
|
||||
StepAttackBB[j][i] = EmptyBoardBB;
|
||||
|
@ -483,14 +505,14 @@ namespace {
|
|||
Bitboard b;
|
||||
for(i = 0; i < 64; i++) {
|
||||
attackIndex[i] = index;
|
||||
mask[i] = sliding_attacks(i, 0ULL, 4, deltas, 1, 6, 1, 6);
|
||||
mask[i] = sliding_attacks(i, 0ULL, 4, deltas, 1, 6, 1, 6);
|
||||
j = (1 << (64 - shift[i]));
|
||||
for(k = 0; k < j; k++) {
|
||||
#if defined(USE_32BIT_ATTACKS)
|
||||
b = index_to_bitboard(k, mask[i]);
|
||||
attacks[index +
|
||||
(unsigned(int(b) * int(mult[i]) ^
|
||||
int(b >> 32) * int(mult[i] >> 32))
|
||||
attacks[index +
|
||||
(unsigned(int(b) * int(mult[i]) ^
|
||||
int(b >> 32) * int(mult[i] >> 32))
|
||||
>> shift[i])] =
|
||||
sliding_attacks(i, b, 4, deltas);
|
||||
#else
|
||||
|
@ -502,7 +524,7 @@ namespace {
|
|||
index += j;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
void init_pseudo_attacks() {
|
||||
Square s;
|
||||
|
@ -537,5 +559,5 @@ namespace {
|
|||
}
|
||||
}
|
||||
#endif // defined(USE_COMPACT_ROOK_ATTACKS)
|
||||
|
||||
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue