mirror of
https://github.com/sockspls/badfish
synced 2025-04-30 16:53:09 +00:00
Use 128 bit multiply for TT index
Remove super cluster stuff from TT and just use a 128 bit multiply. STC https://tests.stockfishchess.org/tests/view/5ee719b3aae8aec816ab7548 LLR: 2.94 (-2.94,2.94) {-1.50,0.50} Total: 12736 W: 2502 L: 2333 D: 7901 Ptnml(0-2): 191, 1452, 2944, 1559, 222 LTC https://tests.stockfishchess.org/tests/view/5ee732d1aae8aec816ab7556 LLR: 2.93 (-2.94,2.94) {-1.50,0.50} Total: 27584 W: 3431 L: 3350 D: 20803 Ptnml(0-2): 173, 2500, 8400, 2511, 208 Scheme back to being derived from https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/ Also the default optimized version of the index calculation now uses fewer instructions. https://godbolt.org/z/Tktxbv Might benefit from mulx (requires -mbmi2) closes https://github.com/official-stockfish/Stockfish/pull/2744 bench: 4320954
This commit is contained in:
parent
995ee4b311
commit
1ea488d34c
5 changed files with 22 additions and 22 deletions
13
src/misc.h
13
src/misc.h
|
@ -110,6 +110,19 @@ public:
|
||||||
{ return T(rand64() & rand64() & rand64()); }
|
{ return T(rand64() & rand64() & rand64()); }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
inline uint64_t mul_hi64(uint64_t a, uint64_t b) {
|
||||||
|
#if defined(__GNUC__) && defined(IS_64BIT)
|
||||||
|
__extension__ typedef unsigned __int128 uint128;
|
||||||
|
return ((uint128)a * (uint128)b) >> 64;
|
||||||
|
#else
|
||||||
|
uint64_t aL = (uint32_t)a, aH = a >> 32;
|
||||||
|
uint64_t bL = (uint32_t)b, bH = b >> 32;
|
||||||
|
uint64_t c1 = (aL * bL) >> 32;
|
||||||
|
uint64_t c2 = aH * bL + c1;
|
||||||
|
uint64_t c3 = aL * bH + (uint32_t)c2;
|
||||||
|
return aH * bH + (c2 >> 32) + (c3 >> 32);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
/// Under Windows it is not possible for a process to run on more than one
|
/// Under Windows it is not possible for a process to run on more than one
|
||||||
/// logical processor group. This usually means to be limited to use max 64
|
/// logical processor group. This usually means to be limited to use max 64
|
||||||
|
|
|
@ -662,7 +662,7 @@ namespace {
|
||||||
// search to overwrite a previous full search TT value, so we use a different
|
// search to overwrite a previous full search TT value, so we use a different
|
||||||
// position key in case of an excluded move.
|
// position key in case of an excluded move.
|
||||||
excludedMove = ss->excludedMove;
|
excludedMove = ss->excludedMove;
|
||||||
posKey = pos.key() ^ Key(excludedMove << 16); // Isn't a very good hash
|
posKey = pos.key() ^ (Key(excludedMove) << 48); // Isn't a very good hash
|
||||||
tte = TT.probe(posKey, ttHit);
|
tte = TT.probe(posKey, ttHit);
|
||||||
ttValue = ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE;
|
ttValue = ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE;
|
||||||
ttMove = rootNode ? thisThread->rootMoves[thisThread->pvIdx].pv[0]
|
ttMove = rootNode ? thisThread->rootMoves[thisThread->pvIdx].pv[0]
|
||||||
|
|
16
src/tt.cpp
16
src/tt.cpp
|
@ -36,17 +36,17 @@ TranspositionTable TT; // Our global transposition table
|
||||||
void TTEntry::save(Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev) {
|
void TTEntry::save(Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev) {
|
||||||
|
|
||||||
// Preserve any existing move for the same position
|
// Preserve any existing move for the same position
|
||||||
if (m || (k >> 48) != key16)
|
if (m || (uint16_t)k != key16)
|
||||||
move16 = (uint16_t)m;
|
move16 = (uint16_t)m;
|
||||||
|
|
||||||
// Overwrite less valuable entries
|
// Overwrite less valuable entries
|
||||||
if ( (k >> 48) != key16
|
if ((uint16_t)k != key16
|
||||||
|| d - DEPTH_OFFSET > depth8 - 4
|
|| d - DEPTH_OFFSET > depth8 - 4
|
||||||
|| b == BOUND_EXACT)
|
|| b == BOUND_EXACT)
|
||||||
{
|
{
|
||||||
assert(d >= DEPTH_OFFSET);
|
assert(d >= DEPTH_OFFSET);
|
||||||
|
|
||||||
key16 = (uint16_t)(k >> 48);
|
key16 = (uint16_t)k;
|
||||||
value16 = (int16_t)v;
|
value16 = (int16_t)v;
|
||||||
eval16 = (int16_t)ev;
|
eval16 = (int16_t)ev;
|
||||||
genBound8 = (uint8_t)(TT.generation8 | uint8_t(pv) << 2 | b);
|
genBound8 = (uint8_t)(TT.generation8 | uint8_t(pv) << 2 | b);
|
||||||
|
@ -65,10 +65,8 @@ void TranspositionTable::resize(size_t mbSize) {
|
||||||
|
|
||||||
aligned_ttmem_free(mem);
|
aligned_ttmem_free(mem);
|
||||||
|
|
||||||
superClusterCount = mbSize * 1024 * 1024 / (sizeof(Cluster) * ClustersPerSuperCluster);
|
clusterCount = mbSize * 1024 * 1024 / sizeof(Cluster);
|
||||||
|
table = static_cast<Cluster*>(aligned_ttmem_alloc(clusterCount * sizeof(Cluster), mem));
|
||||||
table = static_cast<Cluster*>(
|
|
||||||
aligned_ttmem_alloc(superClusterCount * ClustersPerSuperCluster * sizeof(Cluster), mem));
|
|
||||||
if (!mem)
|
if (!mem)
|
||||||
{
|
{
|
||||||
std::cerr << "Failed to allocate " << mbSize
|
std::cerr << "Failed to allocate " << mbSize
|
||||||
|
@ -91,8 +89,6 @@ void TranspositionTable::clear() {
|
||||||
{
|
{
|
||||||
threads.emplace_back([this, idx]() {
|
threads.emplace_back([this, idx]() {
|
||||||
|
|
||||||
const size_t clusterCount = superClusterCount * ClustersPerSuperCluster;
|
|
||||||
|
|
||||||
// Thread binding gives faster search on systems with a first-touch policy
|
// Thread binding gives faster search on systems with a first-touch policy
|
||||||
if (Options["Threads"] > 8)
|
if (Options["Threads"] > 8)
|
||||||
WinProcGroup::bindThisThread(idx);
|
WinProcGroup::bindThisThread(idx);
|
||||||
|
@ -121,7 +117,7 @@ void TranspositionTable::clear() {
|
||||||
TTEntry* TranspositionTable::probe(const Key key, bool& found) const {
|
TTEntry* TranspositionTable::probe(const Key key, bool& found) const {
|
||||||
|
|
||||||
TTEntry* const tte = first_entry(key);
|
TTEntry* const tte = first_entry(key);
|
||||||
const uint16_t key16 = key >> 48; // Use the high 16 bits as key inside the cluster
|
const uint16_t key16 = (uint16_t)key; // Use the low 16 bits as key inside the cluster
|
||||||
|
|
||||||
for (int i = 0; i < ClusterSize; ++i)
|
for (int i = 0; i < ClusterSize; ++i)
|
||||||
if (!tte[i].key16 || tte[i].key16 == key16)
|
if (!tte[i].key16 || tte[i].key16 == key16)
|
||||||
|
|
12
src/tt.h
12
src/tt.h
|
@ -66,7 +66,6 @@ private:
|
||||||
class TranspositionTable {
|
class TranspositionTable {
|
||||||
|
|
||||||
static constexpr int ClusterSize = 3;
|
static constexpr int ClusterSize = 3;
|
||||||
static constexpr int ClustersPerSuperCluster = 256;
|
|
||||||
|
|
||||||
struct Cluster {
|
struct Cluster {
|
||||||
TTEntry entry[ClusterSize];
|
TTEntry entry[ClusterSize];
|
||||||
|
@ -84,20 +83,13 @@ public:
|
||||||
void clear();
|
void clear();
|
||||||
|
|
||||||
TTEntry* first_entry(const Key key) const {
|
TTEntry* first_entry(const Key key) const {
|
||||||
|
return &table[mul_hi64(key, clusterCount)].entry[0];
|
||||||
// The index is computed from
|
|
||||||
// Idx = (K48 * SCC) / 2^40, with K48 the 48 lowest bits swizzled.
|
|
||||||
|
|
||||||
const uint64_t firstTerm = uint32_t(key) * uint64_t(superClusterCount);
|
|
||||||
const uint64_t secondTerm = (uint16_t(key >> 32) * uint64_t(superClusterCount)) >> 16;
|
|
||||||
|
|
||||||
return &table[(firstTerm + secondTerm) >> 24].entry[0];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
friend struct TTEntry;
|
friend struct TTEntry;
|
||||||
|
|
||||||
size_t superClusterCount;
|
size_t clusterCount;
|
||||||
Cluster* table;
|
Cluster* table;
|
||||||
void* mem;
|
void* mem;
|
||||||
uint8_t generation8; // Size must be not bigger than TTEntry::genBound8
|
uint8_t generation8; // Size must be not bigger than TTEntry::genBound8
|
||||||
|
|
|
@ -56,7 +56,6 @@ bool CaseInsensitiveLess::operator() (const string& s1, const string& s2) const
|
||||||
|
|
||||||
void init(OptionsMap& o) {
|
void init(OptionsMap& o) {
|
||||||
|
|
||||||
// At most 2^32 superclusters. Supercluster = 8 kB
|
|
||||||
constexpr int MaxHashMB = Is64Bit ? 33554432 : 2048;
|
constexpr int MaxHashMB = Is64Bit ? 33554432 : 2048;
|
||||||
|
|
||||||
o["Debug Log File"] << Option("", on_logger);
|
o["Debug Log File"] << Option("", on_logger);
|
||||||
|
|
Loading…
Add table
Reference in a new issue