1
0
Fork 0
mirror of https://github.com/sockspls/badfish synced 2025-04-29 16:23:09 +00:00

Optimize pop_1st_bit() on 32 bits x86

Operations on 64 bits Bitboard types are slow
on x86 compiled with gcc, so optimize this case.

BTW profiling shows that pop_1st_bit() is a
veeery performance critical path!

Signed-off-by: Marco Costalba <mcostalba@gmail.com>
This commit is contained in:
Marco Costalba 2008-09-18 16:09:19 +02:00
parent 95ce27f926
commit 9ae2b69235

View file

@ -6,12 +6,12 @@
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Glaurung is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
@ -38,7 +38,7 @@ const Bitboard FileBB[8] = {
};
const Bitboard NeighboringFilesBB[8] = {
FileBBB, FileABB|FileCBB, FileBBB|FileDBB, FileCBB|FileEBB,
FileBBB, FileABB|FileCBB, FileBBB|FileDBB, FileCBB|FileEBB,
FileDBB|FileFBB, FileEBB|FileGBB, FileFBB|FileHBB, FileGBB
};
@ -47,7 +47,7 @@ const Bitboard ThisAndNeighboringFilesBB[8] = {
FileBBB|FileCBB|FileDBB, FileCBB|FileDBB|FileEBB,
FileDBB|FileEBB|FileFBB, FileEBB|FileFBB|FileGBB,
FileFBB|FileGBB|FileHBB, FileGBB|FileHBB
};
};
const Bitboard RankBB[8] = {
Rank1BB, Rank2BB, Rank3BB, Rank4BB, Rank5BB, Rank6BB, Rank7BB, Rank8BB
@ -126,27 +126,27 @@ const int RShift[64] = {
#else // if defined(USE_32BIT_ATTACKS)
const uint64_t RMult[64] = {
0xa8002c000108020ULL, 0x4440200140003000ULL, 0x8080200010011880ULL,
0x380180080141000ULL, 0x1a00060008211044ULL, 0x410001000a0c0008ULL,
0x9500060004008100ULL, 0x100024284a20700ULL, 0x802140008000ULL,
0x80c01002a00840ULL, 0x402004282011020ULL, 0x9862000820420050ULL,
0x1001448011100ULL, 0x6432800200800400ULL, 0x40100010002000cULL,
0x2800d0010c080ULL, 0x90c0008000803042ULL, 0x4010004000200041ULL,
0x3010010200040ULL, 0xa40828028001000ULL, 0x123010008000430ULL,
0x24008004020080ULL, 0x60040001104802ULL, 0x582200028400d1ULL,
0x4000802080044000ULL, 0x408208200420308ULL, 0x610038080102000ULL,
0x3601000900100020ULL, 0x80080040180ULL, 0xc2020080040080ULL,
0x80084400100102ULL, 0x4022408200014401ULL, 0x40052040800082ULL,
0xb08200280804000ULL, 0x8a80a008801000ULL, 0x4000480080801000ULL,
0x911808800801401ULL, 0x822a003002001894ULL, 0x401068091400108aULL,
0x4a10a00004cULL, 0x2000800640008024ULL, 0x1486408102020020ULL,
0x100a000d50041ULL, 0x810050020b0020ULL, 0x204000800808004ULL,
0x20048100a000cULL, 0x112000831020004ULL, 0x9000040810002ULL,
0x440490200208200ULL, 0x8910401000200040ULL, 0x6404200050008480ULL,
0x4b824a2010010100ULL, 0x4080801810c0080ULL, 0x400802a0080ULL,
0x8224080110026400ULL, 0x40002c4104088200ULL, 0x1002100104a0282ULL,
0x1208400811048021ULL, 0x3201014a40d02001ULL, 0x5100019200501ULL,
0x101000208001005ULL, 0x2008450080702ULL, 0x1002080301d00cULL,
0xa8002c000108020ULL, 0x4440200140003000ULL, 0x8080200010011880ULL,
0x380180080141000ULL, 0x1a00060008211044ULL, 0x410001000a0c0008ULL,
0x9500060004008100ULL, 0x100024284a20700ULL, 0x802140008000ULL,
0x80c01002a00840ULL, 0x402004282011020ULL, 0x9862000820420050ULL,
0x1001448011100ULL, 0x6432800200800400ULL, 0x40100010002000cULL,
0x2800d0010c080ULL, 0x90c0008000803042ULL, 0x4010004000200041ULL,
0x3010010200040ULL, 0xa40828028001000ULL, 0x123010008000430ULL,
0x24008004020080ULL, 0x60040001104802ULL, 0x582200028400d1ULL,
0x4000802080044000ULL, 0x408208200420308ULL, 0x610038080102000ULL,
0x3601000900100020ULL, 0x80080040180ULL, 0xc2020080040080ULL,
0x80084400100102ULL, 0x4022408200014401ULL, 0x40052040800082ULL,
0xb08200280804000ULL, 0x8a80a008801000ULL, 0x4000480080801000ULL,
0x911808800801401ULL, 0x822a003002001894ULL, 0x401068091400108aULL,
0x4a10a00004cULL, 0x2000800640008024ULL, 0x1486408102020020ULL,
0x100a000d50041ULL, 0x810050020b0020ULL, 0x204000800808004ULL,
0x20048100a000cULL, 0x112000831020004ULL, 0x9000040810002ULL,
0x440490200208200ULL, 0x8910401000200040ULL, 0x6404200050008480ULL,
0x4b824a2010010100ULL, 0x4080801810c0080ULL, 0x400802a0080ULL,
0x8224080110026400ULL, 0x40002c4104088200ULL, 0x1002100104a0282ULL,
0x1208400811048021ULL, 0x3201014a40d02001ULL, 0x5100019200501ULL,
0x101000208001005ULL, 0x2008450080702ULL, 0x1002080301d00cULL,
0x410201ce5c030092ULL
};
@ -190,7 +190,7 @@ const uint64_t BMult[64] = {
0x881c7c67fcbfc4f6ULL, 0x47ca41e7e440d423ULL, 0xeb0c88112048d004ULL,
0x51c60e04359aef1aULL, 0x1aa1fe0e957a5554ULL, 0xdd9448db4f5e3104ULL,
0xdc01f6dca4bebbdcULL,
};
};
const int BShift[64] = {
26, 27, 27, 27, 27, 27, 27, 26, 27, 27, 27, 27, 27, 27, 27, 27,
@ -202,27 +202,27 @@ const int BShift[64] = {
#else // if defined(USE_32BIT_ATTACKS)
const uint64_t BMult[64] = {
0x440049104032280ULL, 0x1021023c82008040ULL, 0x404040082000048ULL,
0x48c4440084048090ULL, 0x2801104026490000ULL, 0x4100880442040800ULL,
0x181011002e06040ULL, 0x9101004104200e00ULL, 0x1240848848310401ULL,
0x2000142828050024ULL, 0x1004024d5000ULL, 0x102044400800200ULL,
0x8108108820112000ULL, 0xa880818210c00046ULL, 0x4008008801082000ULL,
0x60882404049400ULL, 0x104402004240810ULL, 0xa002084250200ULL,
0x100b0880801100ULL, 0x4080201220101ULL, 0x44008080a00000ULL,
0x202200842000ULL, 0x5006004882d00808ULL, 0x200045080802ULL,
0x86100020200601ULL, 0xa802080a20112c02ULL, 0x80411218080900ULL,
0x200a0880080a0ULL, 0x9a01010000104000ULL, 0x28008003100080ULL,
0x211021004480417ULL, 0x401004188220806ULL, 0x825051400c2006ULL,
0x140c0210943000ULL, 0x242800300080ULL, 0xc2208120080200ULL,
0x2430008200002200ULL, 0x1010100112008040ULL, 0x8141050100020842ULL,
0x822081014405ULL, 0x800c049e40400804ULL, 0x4a0404028a000820ULL,
0x22060201041200ULL, 0x360904200840801ULL, 0x881a08208800400ULL,
0x60202c00400420ULL, 0x1204440086061400ULL, 0x8184042804040ULL,
0x64040315300400ULL, 0xc01008801090a00ULL, 0x808010401140c00ULL,
0x4004830c2020040ULL, 0x80005002020054ULL, 0x40000c14481a0490ULL,
0x10500101042048ULL, 0x1010100200424000ULL, 0x640901901040ULL,
0xa0201014840ULL, 0x840082aa011002ULL, 0x10010840084240aULL,
0x420400810420608ULL, 0x8d40230408102100ULL, 0x4a00200612222409ULL,
0x440049104032280ULL, 0x1021023c82008040ULL, 0x404040082000048ULL,
0x48c4440084048090ULL, 0x2801104026490000ULL, 0x4100880442040800ULL,
0x181011002e06040ULL, 0x9101004104200e00ULL, 0x1240848848310401ULL,
0x2000142828050024ULL, 0x1004024d5000ULL, 0x102044400800200ULL,
0x8108108820112000ULL, 0xa880818210c00046ULL, 0x4008008801082000ULL,
0x60882404049400ULL, 0x104402004240810ULL, 0xa002084250200ULL,
0x100b0880801100ULL, 0x4080201220101ULL, 0x44008080a00000ULL,
0x202200842000ULL, 0x5006004882d00808ULL, 0x200045080802ULL,
0x86100020200601ULL, 0xa802080a20112c02ULL, 0x80411218080900ULL,
0x200a0880080a0ULL, 0x9a01010000104000ULL, 0x28008003100080ULL,
0x211021004480417ULL, 0x401004188220806ULL, 0x825051400c2006ULL,
0x140c0210943000ULL, 0x242800300080ULL, 0xc2208120080200ULL,
0x2430008200002200ULL, 0x1010100112008040ULL, 0x8141050100020842ULL,
0x822081014405ULL, 0x800c049e40400804ULL, 0x4a0404028a000820ULL,
0x22060201041200ULL, 0x360904200840801ULL, 0x881a08208800400ULL,
0x60202c00400420ULL, 0x1204440086061400ULL, 0x8184042804040ULL,
0x64040315300400ULL, 0xc01008801090a00ULL, 0x808010401140c00ULL,
0x4004830c2020040ULL, 0x80005002020054ULL, 0x40000c14481a0490ULL,
0x10500101042048ULL, 0x1010100200424000ULL, 0x640901901040ULL,
0xa0201014840ULL, 0x840082aa011002ULL, 0x10010840084240aULL,
0x420400810420608ULL, 0x8d40230408102100ULL, 0x4a00200612222409ULL,
0xa08520292120600ULL
};
@ -320,9 +320,9 @@ void init_bitboards() {
#if defined(USE_FOLDED_BITSCAN)
static const int BitTable[64] = {
63, 30, 3, 32, 25, 41, 22, 33, 15, 50, 42, 13, 11, 53, 19, 34, 61, 29, 2,
51, 21, 43, 45, 10, 18, 47, 1, 54, 9, 57, 0, 35, 62, 31, 40, 4, 49, 5, 52,
26, 60, 6, 23, 44, 46, 27, 56, 16, 7, 39, 48, 24, 59, 14, 12, 55, 38, 28,
63, 30, 3, 32, 25, 41, 22, 33, 15, 50, 42, 13, 11, 53, 19, 34, 61, 29, 2,
51, 21, 43, 45, 10, 18, 47, 1, 54, 9, 57, 0, 35, 62, 31, 40, 4, 49, 5, 52,
26, 60, 6, 23, 44, 46, 27, 56, 16, 7, 39, 48, 24, 59, 14, 12, 55, 38, 28,
58, 20, 37, 17, 36, 8
};
@ -339,6 +339,26 @@ Square first_1(Bitboard b) {
/// pop_1st_bit() finds and clears the least significant nonzero bit in a
/// nonzero bitboard.
#if defined(USE_32BIT_ATTACKS)
Square pop_1st_bit(Bitboard *bb) {
uint32_t t = uint32_t(*bb);
uint32_t* p = t ? (uint32_t*)bb : (uint32_t*)bb + 1; // Little endian only?
uint32_t b = t ? t : *p;
*p = b & (b -1);
if (t)
b ^= (b - 1);
else
b = ~(b ^ (b - 1));
return Square(BitTable[(b * 0x783a9b23) >> 26]);
}
#else
Square pop_1st_bit(Bitboard *b) {
Bitboard bb = *b ^ (*b - 1);
uint32_t fold = int(bb) ^ int(bb >> 32);
@ -346,6 +366,8 @@ Square pop_1st_bit(Bitboard *b) {
return Square(BitTable[(fold * 0x783a9b23) >> 26]);
}
#endif
#else
static const int BitTable[64] = {
@ -369,7 +391,7 @@ Square first_1(Bitboard b) {
Square pop_1st_bit(Bitboard *b) {
Bitboard bb = *b;
*b &= (*b - 1);
return Square(BitTable[((bb & -bb) * 0x218a392cd3d5dbfULL) >> 58]);
return Square(BitTable[((bb & -bb) * 0x218a392cd3d5dbfULL) >> 58]);
}
#endif // defined(USE_FOLDED_BITSCAN)
@ -417,7 +439,7 @@ namespace {
{-7,-9,0}, {17,15,10,6,-6,-10,-15,-17}, {9,7,-7,-9,0}, {8,1,-1,-8,0},
{9,7,-7,-9,8,1,-1,-8}, {9,7,-7,-9,8,1,-1,-8}
};
for(i = 0; i < 64; i++) {
for(j = 0; j <= int(BK); j++) {
StepAttackBB[j][i] = EmptyBoardBB;
@ -483,14 +505,14 @@ namespace {
Bitboard b;
for(i = 0; i < 64; i++) {
attackIndex[i] = index;
mask[i] = sliding_attacks(i, 0ULL, 4, deltas, 1, 6, 1, 6);
mask[i] = sliding_attacks(i, 0ULL, 4, deltas, 1, 6, 1, 6);
j = (1 << (64 - shift[i]));
for(k = 0; k < j; k++) {
#if defined(USE_32BIT_ATTACKS)
b = index_to_bitboard(k, mask[i]);
attacks[index +
(unsigned(int(b) * int(mult[i]) ^
int(b >> 32) * int(mult[i] >> 32))
attacks[index +
(unsigned(int(b) * int(mult[i]) ^
int(b >> 32) * int(mult[i] >> 32))
>> shift[i])] =
sliding_attacks(i, b, 4, deltas);
#else
@ -502,7 +524,7 @@ namespace {
index += j;
}
}
void init_pseudo_attacks() {
Square s;
@ -537,5 +559,5 @@ namespace {
}
}
#endif // defined(USE_COMPACT_ROOK_ATTACKS)
}