mirror of
https://github.com/sockspls/badfish
synced 2025-04-30 08:43:09 +00:00
Add TT prefetching support
TT.retrieve() is the most time consuming function because almost always involves a very slow RAM access. TT table is so big that is never cached. This patch prefetches TT data just after a move is done, so that subsequent TT.retrieve will be very fast. Profiling with VTune shows that TT:retrieve() times are almost cutted in half ! No functional change. Signed-off-by: Marco Costalba <mcostalba@gmail.com>
This commit is contained in:
parent
e6863f46de
commit
cd4604b05c
3 changed files with 18 additions and 0 deletions
|
@ -1126,6 +1126,7 @@ namespace {
|
|||
// Make and search the move
|
||||
StateInfo st;
|
||||
pos.do_move(move, st, dcCandidates);
|
||||
TT.prefetch(pos.get_key());
|
||||
|
||||
if (moveCount == 1) // The first move in list is the PV
|
||||
value = -search_pv(pos, ss, -beta, -alpha, newDepth, ply+1, threadID);
|
||||
|
@ -1296,6 +1297,8 @@ namespace {
|
|||
|
||||
StateInfo st;
|
||||
pos.do_null_move(st);
|
||||
TT.prefetch(pos.get_key());
|
||||
|
||||
int R = (depth >= 5 * OnePly ? 4 : 3); // Null move dynamic reduction
|
||||
|
||||
Value nullValue = -search(pos, ss, -(beta-1), depth-R*OnePly, ply+1, false, threadID);
|
||||
|
@ -1410,6 +1413,7 @@ namespace {
|
|||
// Make and search the move
|
||||
StateInfo st;
|
||||
pos.do_move(move, st, dcCandidates);
|
||||
TT.prefetch(pos.get_key());
|
||||
|
||||
// Try to reduce non-pv search depth by one ply if move seems not problematic,
|
||||
// if the move fails high will be re-searched at full depth.
|
||||
|
@ -1619,6 +1623,7 @@ namespace {
|
|||
// Make and search the move.
|
||||
StateInfo st;
|
||||
pos.do_move(move, st, dcCandidates);
|
||||
TT.prefetch(pos.get_key());
|
||||
Value value = -qsearch(pos, ss, -beta, -alpha, depth-OnePly, ply+1, threadID);
|
||||
pos.undo_move(move);
|
||||
|
||||
|
|
12
src/tt.cpp
12
src/tt.cpp
|
@ -25,6 +25,7 @@
|
|||
#include <cassert>
|
||||
#include <cmath>
|
||||
#include <cstring>
|
||||
#include <xmmintrin.h>
|
||||
|
||||
#include "movegen.h"
|
||||
#include "tt.h"
|
||||
|
@ -153,6 +154,17 @@ TTEntry* TranspositionTable::retrieve(const Key posKey) const {
|
|||
return NULL;
|
||||
}
|
||||
|
||||
/// TranspositionTable::prefetch looks up the current position in the
|
||||
/// transposition table and load it in L1/L2 cache. This is a non
|
||||
/// blocking function and do not stalls the CPU waiting for data
|
||||
/// to be loaded from RAM, that can be very slow. When we will
|
||||
/// subsequently call retrieve() the TT data will be already
|
||||
/// quickly accessible in L1/l2 CPU cache.
|
||||
|
||||
void TranspositionTable::prefetch(const Key posKey) const {
|
||||
|
||||
_mm_prefetch((char*)first_entry(posKey), _MM_HINT_T0);
|
||||
}
|
||||
|
||||
/// TranspositionTable::first_entry returns a pointer to the first
|
||||
/// entry of a cluster given a position. The low 32 bits of the key
|
||||
|
|
1
src/tt.h
1
src/tt.h
|
@ -85,6 +85,7 @@ public:
|
|||
void clear();
|
||||
void store(const Key posKey, Value v, ValueType type, Depth d, Move m);
|
||||
TTEntry* retrieve(const Key posKey) const;
|
||||
void prefetch(const Key posKey) const;
|
||||
void new_search();
|
||||
void insert_pv(const Position& pos, Move pv[]);
|
||||
void extract_pv(const Position& pos, Move pv[]);
|
||||
|
|
Loading…
Add table
Reference in a new issue