mirror of
https://github.com/sockspls/badfish
synced 2025-05-01 09:13:08 +00:00
Add TT prefetching support
TT.retrieve() is the most time consuming function because almost always involves a very slow RAM access. TT table is so big that is never cached. This patch prefetches TT data just after a move is done, so that subsequent TT.retrieve will be very fast. Profiling with VTune shows that TT:retrieve() times are almost cutted in half ! No functional change. Signed-off-by: Marco Costalba <mcostalba@gmail.com>
This commit is contained in:
parent
e6863f46de
commit
cd4604b05c
3 changed files with 18 additions and 0 deletions
|
@ -1126,6 +1126,7 @@ namespace {
|
||||||
// Make and search the move
|
// Make and search the move
|
||||||
StateInfo st;
|
StateInfo st;
|
||||||
pos.do_move(move, st, dcCandidates);
|
pos.do_move(move, st, dcCandidates);
|
||||||
|
TT.prefetch(pos.get_key());
|
||||||
|
|
||||||
if (moveCount == 1) // The first move in list is the PV
|
if (moveCount == 1) // The first move in list is the PV
|
||||||
value = -search_pv(pos, ss, -beta, -alpha, newDepth, ply+1, threadID);
|
value = -search_pv(pos, ss, -beta, -alpha, newDepth, ply+1, threadID);
|
||||||
|
@ -1296,6 +1297,8 @@ namespace {
|
||||||
|
|
||||||
StateInfo st;
|
StateInfo st;
|
||||||
pos.do_null_move(st);
|
pos.do_null_move(st);
|
||||||
|
TT.prefetch(pos.get_key());
|
||||||
|
|
||||||
int R = (depth >= 5 * OnePly ? 4 : 3); // Null move dynamic reduction
|
int R = (depth >= 5 * OnePly ? 4 : 3); // Null move dynamic reduction
|
||||||
|
|
||||||
Value nullValue = -search(pos, ss, -(beta-1), depth-R*OnePly, ply+1, false, threadID);
|
Value nullValue = -search(pos, ss, -(beta-1), depth-R*OnePly, ply+1, false, threadID);
|
||||||
|
@ -1410,6 +1413,7 @@ namespace {
|
||||||
// Make and search the move
|
// Make and search the move
|
||||||
StateInfo st;
|
StateInfo st;
|
||||||
pos.do_move(move, st, dcCandidates);
|
pos.do_move(move, st, dcCandidates);
|
||||||
|
TT.prefetch(pos.get_key());
|
||||||
|
|
||||||
// Try to reduce non-pv search depth by one ply if move seems not problematic,
|
// Try to reduce non-pv search depth by one ply if move seems not problematic,
|
||||||
// if the move fails high will be re-searched at full depth.
|
// if the move fails high will be re-searched at full depth.
|
||||||
|
@ -1619,6 +1623,7 @@ namespace {
|
||||||
// Make and search the move.
|
// Make and search the move.
|
||||||
StateInfo st;
|
StateInfo st;
|
||||||
pos.do_move(move, st, dcCandidates);
|
pos.do_move(move, st, dcCandidates);
|
||||||
|
TT.prefetch(pos.get_key());
|
||||||
Value value = -qsearch(pos, ss, -beta, -alpha, depth-OnePly, ply+1, threadID);
|
Value value = -qsearch(pos, ss, -beta, -alpha, depth-OnePly, ply+1, threadID);
|
||||||
pos.undo_move(move);
|
pos.undo_move(move);
|
||||||
|
|
||||||
|
|
12
src/tt.cpp
12
src/tt.cpp
|
@ -25,6 +25,7 @@
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
#include <xmmintrin.h>
|
||||||
|
|
||||||
#include "movegen.h"
|
#include "movegen.h"
|
||||||
#include "tt.h"
|
#include "tt.h"
|
||||||
|
@ -153,6 +154,17 @@ TTEntry* TranspositionTable::retrieve(const Key posKey) const {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// TranspositionTable::prefetch looks up the current position in the
|
||||||
|
/// transposition table and load it in L1/L2 cache. This is a non
|
||||||
|
/// blocking function and do not stalls the CPU waiting for data
|
||||||
|
/// to be loaded from RAM, that can be very slow. When we will
|
||||||
|
/// subsequently call retrieve() the TT data will be already
|
||||||
|
/// quickly accessible in L1/l2 CPU cache.
|
||||||
|
|
||||||
|
void TranspositionTable::prefetch(const Key posKey) const {
|
||||||
|
|
||||||
|
_mm_prefetch((char*)first_entry(posKey), _MM_HINT_T0);
|
||||||
|
}
|
||||||
|
|
||||||
/// TranspositionTable::first_entry returns a pointer to the first
|
/// TranspositionTable::first_entry returns a pointer to the first
|
||||||
/// entry of a cluster given a position. The low 32 bits of the key
|
/// entry of a cluster given a position. The low 32 bits of the key
|
||||||
|
|
1
src/tt.h
1
src/tt.h
|
@ -85,6 +85,7 @@ public:
|
||||||
void clear();
|
void clear();
|
||||||
void store(const Key posKey, Value v, ValueType type, Depth d, Move m);
|
void store(const Key posKey, Value v, ValueType type, Depth d, Move m);
|
||||||
TTEntry* retrieve(const Key posKey) const;
|
TTEntry* retrieve(const Key posKey) const;
|
||||||
|
void prefetch(const Key posKey) const;
|
||||||
void new_search();
|
void new_search();
|
||||||
void insert_pv(const Position& pos, Move pv[]);
|
void insert_pv(const Position& pos, Move pv[]);
|
||||||
void extract_pv(const Position& pos, Move pv[]);
|
void extract_pv(const Position& pos, Move pv[]);
|
||||||
|
|
Loading…
Add table
Reference in a new issue