1
0
Fork 0
mirror of https://github.com/sockspls/badfish synced 2025-04-30 08:43:09 +00:00

Merge hardware POPCNT detection and use

Tests on Joona luxury iCore7 QUAD show that speed increase
against standrd 64bit routine is between 3% and 4%.

So it seems a good thing to have. Also the user feedback at
startup regarding the compile and the hardware detection can
be an useful debug tool.

No functional change.

Signed-off-by: Marco Costalba <mcostalba@gmail.com>
This commit is contained in:
Marco Costalba 2009-05-25 07:52:59 +01:00
commit 3d0b60b065
10 changed files with 233 additions and 87 deletions

View file

@ -35,6 +35,7 @@
#include <iostream>
#include "bitboard.h"
#include "bitcount.h"
#include "direction.h"
@ -339,7 +340,7 @@ Square pop_1st_bit(Bitboard *b) {
#endif
#else
#else // defined(USE_FOLDED_BITSCAN)
static const int BitTable[64] = {
0, 1, 2, 7, 3, 13, 8, 19, 4, 25, 14, 28, 9, 34, 20, 40, 5, 17, 26, 38, 15,

View file

@ -22,7 +22,6 @@
#if !defined(BITBOARD_H_INCLUDED)
#define BITBOARD_H_INCLUDED
////
//// Defines
////
@ -47,15 +46,10 @@
//#define USE_32BIT_ATTACKS
#define USE_FOLDED_BITSCAN
#define BITCOUNT_SWAR_64
//#define BITCOUNT_SWAR_32
//#define BITCOUNT_LOOP
#else
#define USE_32BIT_ATTACKS
#define USE_FOLDED_BITSCAN
#define BITCOUNT_SWAR_32
#endif
@ -429,65 +423,6 @@ inline Bitboard isolated_pawn_mask(Square s) {
}
/// count_1s() counts the number of nonzero bits in a bitboard.
#if defined(BITCOUNT_LOOP)
inline int count_1s(Bitboard b) {
int r;
for(r = 0; b; r++, b &= b - 1);
return r;
}
inline int count_1s_max_15(Bitboard b) {
return count_1s(b);
}
#elif defined(BITCOUNT_SWAR_32)
inline int count_1s(Bitboard b) {
unsigned w = unsigned(b >> 32), v = unsigned(b);
v -= (v >> 1) & 0x55555555; // 0-2 in 2 bits
w -= (w >> 1) & 0x55555555;
v = ((v >> 2) & 0x33333333) + (v & 0x33333333); // 0-4 in 4 bits
w = ((w >> 2) & 0x33333333) + (w & 0x33333333);
v = ((v >> 4) + v) & 0x0F0F0F0F; // 0-8 in 8 bits
v += (((w >> 4) + w) & 0x0F0F0F0F); // 0-16 in 8 bits
v *= 0x01010101; // mul is fast on amd procs
return int(v >> 24);
}
inline int count_1s_max_15(Bitboard b) {
unsigned w = unsigned(b >> 32), v = unsigned(b);
v -= (v >> 1) & 0x55555555; // 0-2 in 2 bits
w -= (w >> 1) & 0x55555555;
v = ((v >> 2) & 0x33333333) + (v & 0x33333333); // 0-4 in 4 bits
w = ((w >> 2) & 0x33333333) + (w & 0x33333333);
v += w; // 0-8 in 4 bits
v *= 0x11111111;
return int(v >> 28);
}
#elif defined(BITCOUNT_SWAR_64)
inline int count_1s(Bitboard b) {
b -= ((b>>1) & 0x5555555555555555ULL);
b = ((b>>2) & 0x3333333333333333ULL) + (b & 0x3333333333333333ULL);
b = ((b>>4) + b) & 0x0F0F0F0F0F0F0F0FULL;
b *= 0x0101010101010101ULL;
return int(b >> 56);
}
inline int count_1s_max_15(Bitboard b) {
b -= (b>>1) & 0x5555555555555555ULL;
b = ((b>>2) & 0x3333333333333333ULL) + (b & 0x3333333333333333ULL);
b *= 0x1111111111111111ULL;
return int(b >> 60);
}
#endif // BITCOUNT
////
//// Prototypes
////

186
src/bitcount.h Normal file
View file

@ -0,0 +1,186 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
Copyright (C) 2008-2009 Marco Costalba
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Stockfish is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#if !defined(BITCOUNT_H_INCLUDED)
#define BITCOUNT_H_INCLUDED
// To disable POPCNT support uncomment following line. You should do it only
// in PGO compiling to exercise the default fallback path. Don't forget to
// re-comment the line for the final optimized compile though ;-)
//#define DISABLE_POPCNT_SUPPORT
#include "bitboard.h"
// Select type of software bit count function to use
#if !defined(AUTO_CONFIGURATION) || defined(IS_64BIT)
//#define USE_COMPACT_ROOK_ATTACKS
//#define USE_32BIT_ATTACKS
#define USE_FOLDED_BITSCAN
#define BITCOUNT_SWAR_64
//#define BITCOUNT_SWAR_32
//#define BITCOUNT_LOOP
#else
#define USE_32BIT_ATTACKS
#define USE_FOLDED_BITSCAN
#define BITCOUNT_SWAR_32
#endif
// Select type of intrinsic bit count instruction to use
#if defined(_MSC_VER) && defined(_WIN64) // Microsoft compiler
#include <intrin.h>
inline bool cpu_has_popcnt() {
int CPUInfo[4] = {-1};
__cpuid(CPUInfo, 0x00000001);
return (CPUInfo[2] >> 23) & 1;
}
#define POPCNT_INTRINSIC(x) __popcnt64(x)
#elif defined(__INTEL_COMPILER) && (defined(__x86_64) || defined(_M_X64)) // Intel compiler
#include <nmmintrin.h>
inline bool cpu_has_popcnt() {
int CPUInfo[4] = {-1};
__cpuid(CPUInfo, 0x00000001);
return (CPUInfo[2] >> 23) & 1;
}
#define POPCNT_INTRINSIC(x) _mm_popcnt_u64(x)
#else // Safe fallback for unsupported compilers
inline bool cpu_has_popcnt() { return false; }
#define POPCNT_INTRINSIC(x) count_1s(x)
#endif
/// Software implementation of bit count functions
#if defined(BITCOUNT_LOOP)
inline int count_1s(Bitboard b) {
int r;
for(r = 0; b; r++, b &= b - 1);
return r;
}
inline int count_1s_max_15(Bitboard b) {
return count_1s(b);
}
#elif defined(BITCOUNT_SWAR_32)
inline int count_1s(Bitboard b) {
unsigned w = unsigned(b >> 32), v = unsigned(b);
v -= (v >> 1) & 0x55555555; // 0-2 in 2 bits
w -= (w >> 1) & 0x55555555;
v = ((v >> 2) & 0x33333333) + (v & 0x33333333); // 0-4 in 4 bits
w = ((w >> 2) & 0x33333333) + (w & 0x33333333);
v = ((v >> 4) + v) & 0x0F0F0F0F; // 0-8 in 8 bits
v += (((w >> 4) + w) & 0x0F0F0F0F); // 0-16 in 8 bits
v *= 0x01010101; // mul is fast on amd procs
return int(v >> 24);
}
inline int count_1s_max_15(Bitboard b) {
unsigned w = unsigned(b >> 32), v = unsigned(b);
v -= (v >> 1) & 0x55555555; // 0-2 in 2 bits
w -= (w >> 1) & 0x55555555;
v = ((v >> 2) & 0x33333333) + (v & 0x33333333); // 0-4 in 4 bits
w = ((w >> 2) & 0x33333333) + (w & 0x33333333);
v += w; // 0-8 in 4 bits
v *= 0x11111111;
return int(v >> 28);
}
#elif defined(BITCOUNT_SWAR_64)
inline int count_1s(Bitboard b) {
b -= ((b>>1) & 0x5555555555555555ULL);
b = ((b>>2) & 0x3333333333333333ULL) + (b & 0x3333333333333333ULL);
b = ((b>>4) + b) & 0x0F0F0F0F0F0F0F0FULL;
b *= 0x0101010101010101ULL;
return int(b >> 56);
}
inline int count_1s_max_15(Bitboard b) {
b -= (b>>1) & 0x5555555555555555ULL;
b = ((b>>2) & 0x3333333333333333ULL) + (b & 0x3333333333333333ULL);
b *= 0x1111111111111111ULL;
return int(b >> 60);
}
#endif // BITCOUNT
/// count_1s() counts the number of nonzero bits in a bitboard.
/// If template parameter is true an intrinsic is called, otherwise
/// we fallback on a software implementation.
template<bool UseIntrinsic>
inline int count_1s(Bitboard b) {
return UseIntrinsic ? POPCNT_INTRINSIC(b) : count_1s(b);
}
template<bool UseIntrinsic>
inline int count_1s_max_15(Bitboard b) {
return UseIntrinsic ? POPCNT_INTRINSIC(b) : count_1s_max_15(b);
}
// Global variable initialized at startup that is set to true if
// CPU on which application runs supports POPCNT intrinsic. Unless
// DISABLE_POPCNT_SUPPORT is defined.
#if defined(DISABLE_POPCNT_SUPPORT)
const bool CpuHasPOPCNT = false;
#else
const bool CpuHasPOPCNT = cpu_has_popcnt();
#endif
// Global variable used to print info about the use of 64 optimized
// functions to verify that a 64bit compile has been correctly built.
#if defined(BITCOUNT_SWAR_64)
const bool CpuHas64BitPath = true;
#else
const bool CpuHas64BitPath = false;
#endif
#endif // !defined(BITCOUNT_H_INCLUDED)

View file

@ -25,6 +25,7 @@
#include <cassert>
#include "bitbase.h"
#include "bitcount.h"
#include "endgame.h"

View file

@ -25,6 +25,7 @@
#include <cassert>
#include <cstring>
#include "bitcount.h"
#include "evaluate.h"
#include "material.h"
#include "pawns.h"
@ -267,11 +268,14 @@ namespace {
uint8_t BitCount8Bit[256];
// Function prototypes
template<PieceType Piece>
template<bool HasPopCnt>
Value do_evaluate(const Position& pos, EvalInfo& ei, int threadID);
template<PieceType Piece, bool HasPopCnt>
void evaluate_pieces(const Position& p, Color us, EvalInfo& ei);
template<>
void evaluate_pieces<KING>(const Position& p, Color us, EvalInfo &ei);
void evaluate_pieces<KING, false>(const Position& p, Color us, EvalInfo &ei);
void evaluate_passed_pawns(const Position &pos, EvalInfo &ei);
void evaluate_trapped_bishop_a7h7(const Position &pos, Square s, Color us,
@ -294,11 +298,19 @@ namespace {
//// Functions
////
/// evaluate() is the main evaluation function. It always computes two
/// evaluate() is the main evaluation function. It always computes two
/// values, an endgame score and a middle game score, and interpolates
/// between them based on the remaining material.
Value evaluate(const Position& pos, EvalInfo& ei, int threadID) {
Value evaluate(const Position &pos, EvalInfo &ei, int threadID) {
return CpuHasPOPCNT ? do_evaluate<true>(pos, ei, threadID)
: do_evaluate<false>(pos, ei, threadID);
}
namespace {
template<bool HasPopCnt>
Value do_evaluate(const Position& pos, EvalInfo& ei, int threadID) {
assert(pos.is_ok());
assert(threadID >= 0 && threadID < THREAD_MAX);
@ -339,16 +351,16 @@ Value evaluate(const Position &pos, EvalInfo &ei, int threadID) {
// Initialize pawn attack bitboards for both sides
ei.attackedBy[WHITE][PAWN] = ((pos.pawns(WHITE) << 9) & ~FileABB) | ((pos.pawns(WHITE) << 7) & ~FileHBB);
ei.attackedBy[BLACK][PAWN] = ((pos.pawns(BLACK) >> 7) & ~FileABB) | ((pos.pawns(BLACK) >> 9) & ~FileHBB);
ei.kingAttackersCount[WHITE] = count_1s_max_15(ei.attackedBy[WHITE][PAWN] & ei.attackedBy[BLACK][KING])/2;
ei.kingAttackersCount[BLACK] = count_1s_max_15(ei.attackedBy[BLACK][PAWN] & ei.attackedBy[WHITE][KING])/2;
ei.kingAttackersCount[WHITE] = count_1s_max_15<HasPopCnt>(ei.attackedBy[WHITE][PAWN] & ei.attackedBy[BLACK][KING])/2;
ei.kingAttackersCount[BLACK] = count_1s_max_15<HasPopCnt>(ei.attackedBy[BLACK][PAWN] & ei.attackedBy[WHITE][KING])/2;
// Evaluate pieces
for (Color c = WHITE; c <= BLACK; c++)
{
evaluate_pieces<KNIGHT>(pos, c, ei);
evaluate_pieces<BISHOP>(pos, c, ei);
evaluate_pieces<ROOK>(pos, c, ei);
evaluate_pieces<QUEEN>(pos, c, ei);
evaluate_pieces<KNIGHT, HasPopCnt>(pos, c, ei);
evaluate_pieces<BISHOP, HasPopCnt>(pos, c, ei);
evaluate_pieces<ROOK, HasPopCnt>(pos, c, ei);
evaluate_pieces<QUEEN, HasPopCnt>(pos, c, ei);
// Sum up all attacked squares
ei.attackedBy[c][0] = ei.attackedBy[c][PAWN] | ei.attackedBy[c][KNIGHT]
@ -360,7 +372,7 @@ Value evaluate(const Position &pos, EvalInfo &ei, int threadID) {
// because we need complete attack information for all pieces when computing
// the king safety evaluation.
for (Color c = WHITE; c <= BLACK; c++)
evaluate_pieces<KING>(pos, c, ei);
evaluate_pieces<KING, false>(pos, c, ei);
// Evaluate passed pawns. We evaluate passed pawns for both sides at once,
// because we need to know which side promotes first in positions where
@ -436,6 +448,7 @@ Value evaluate(const Position &pos, EvalInfo &ei, int threadID) {
return (ei.mateThreat[stm] == MOVE_NONE ? v : 8 * QueenValueMidgame - v);
}
} // namespace
/// quick_evaluate() does a very approximate evaluation of the current position.
/// It currently considers only material and piece square table scores. Perhaps
@ -527,7 +540,7 @@ namespace {
// evaluate_common() computes terms common to all pieces attack
template<PieceType Piece>
template<PieceType Piece, bool HasPopCnt>
int evaluate_common(const Position& p, const Bitboard& b, Color us, EvalInfo& ei, Square s = SQ_NONE) {
static const int AttackWeight[] = { 0, 0, KnightAttackWeight, BishopAttackWeight, RookAttackWeight, QueenAttackWeight };
@ -547,15 +560,15 @@ namespace {
ei.kingAttackersWeight[us] += AttackWeight[Piece];
Bitboard bb = (b & ei.attackedBy[them][KING]);
if (bb)
ei.kingAdjacentZoneAttacksCount[us] += count_1s_max_15(bb);
ei.kingAdjacentZoneAttacksCount[us] += count_1s_max_15<HasPopCnt>(bb);
}
// Remove squares protected by enemy pawns
Bitboard bb = (b & ~ei.attackedBy[them][PAWN]);
// Mobility
int mob = (Piece != QUEEN ? count_1s_max_15(bb & ~p.pieces_of_color(us))
: count_1s(bb & ~p.pieces_of_color(us)));
int mob = (Piece != QUEEN ? count_1s_max_15<HasPopCnt>(bb & ~p.pieces_of_color(us))
: count_1s<HasPopCnt>(bb & ~p.pieces_of_color(us)));
ei.mgMobility += Sign[us] * MgBonus[Piece][mob];
ei.egMobility += Sign[us] * EgBonus[Piece][mob];
@ -587,7 +600,7 @@ namespace {
// evaluate_pieces<>() assigns bonuses and penalties to the pieces of a given
// color.
template<PieceType Piece>
template<PieceType Piece, bool HasPopCnt>
void evaluate_pieces(const Position& pos, Color us, EvalInfo& ei) {
Bitboard b;
@ -608,7 +621,7 @@ namespace {
b = rook_attacks_bb(s, pos.occupied_squares() & ~pos.rooks_and_queens(us));
// Attacks, mobility and outposts
mob = evaluate_common<Piece>(pos, b, us, ei, s);
mob = evaluate_common<Piece, HasPopCnt>(pos, b, us, ei, s);
// Special patterns: trapped bishops on a7/h7/a2/h2
// and trapped bishops on a1/h1/a8/h8 in Chess960.
@ -691,7 +704,7 @@ namespace {
// color.
template<>
void evaluate_pieces<KING>(const Position& p, Color us, EvalInfo& ei) {
void evaluate_pieces<KING, false>(const Position& p, Color us, EvalInfo& ei) {
int shelter = 0, sign = Sign[us];
Square s = p.king_square(us);

View file

@ -29,6 +29,7 @@
#include <string>
#include "benchmark.h"
#include "bitcount.h"
#include "misc.h"
#include "uci.h"
@ -74,9 +75,12 @@ int main(int argc, char *argv[]) {
}
// Print copyright notice
cout << engine_name() << ". Copyright (C) "
cout << engine_name() << ". Copyright (C) "
<< "2004-2009 Tord Romstad, Marco Costalba. " << endl;
if (CpuHasPOPCNT)
cout << "Good! CPU has hardware POPCNT. We will use it." << endl;
// Enter UCI mode
uci_main_loop();
return 0;

View file

@ -65,6 +65,7 @@ static int gettimeofday(struct timeval* tp, struct timezone*)
#include <iostream>
#include <sstream>
#include "bitcount.h"
#include "misc.h"
using namespace std;
@ -162,8 +163,10 @@ void dbg_print_mean(ofstream& logFile) {
const string engine_name() {
const string cpu64(CpuHas64BitPath ? " 64bit" : "");
if (!EngineVersion.empty())
return "Stockfish " + EngineVersion;
return AppName+ " " + EngineVersion + cpu64;
string date(__DATE__); // From compiler, format is "Sep 21 2008"
string months("Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec");
@ -176,7 +179,7 @@ const string engine_name() {
string name = AppName + " " + AppTag + " ";
s << name << date.substr(date.length() - 2) << setfill('0')
<< setw(2) << mon << setw(2) << day;
<< setw(2) << mon << setw(2) << day << cpu64;
return s.str();
}

View file

@ -24,6 +24,7 @@
#include <cassert>
#include "bitcount.h"
#include "movegen.h"
// Simple macro to wrap a very common while loop, no facny, no flexibility,

View file

@ -25,6 +25,7 @@
#include <cassert>
#include <cstring>
#include "bitcount.h"
#include "pawns.h"
#include "position.h"

View file

@ -27,6 +27,7 @@
#include <fstream>
#include <iostream>
#include "bitcount.h"
#include "mersenne.h"
#include "movegen.h"
#include "movepick.h"