1
0
Fork 0
mirror of https://github.com/sockspls/badfish synced 2025-04-30 08:43:09 +00:00

Add TT prefetching support

TT.retrieve() is the most time consuming function
because almost always involves a very slow RAM access.

TT table is so big that is never cached. This patch
prefetches TT data just after a move is done, so that
subsequent TT.retrieve will be very  fast.

Profiling with VTune shows that TT:retrieve() times are
almost cutted in half !

No functional change.

Signed-off-by: Marco Costalba <mcostalba@gmail.com>
This commit is contained in:
Marco Costalba 2009-08-09 13:44:55 +01:00
parent e6863f46de
commit cd4604b05c
3 changed files with 18 additions and 0 deletions

View file

@ -1126,6 +1126,7 @@ namespace {
// Make and search the move
StateInfo st;
pos.do_move(move, st, dcCandidates);
TT.prefetch(pos.get_key());
if (moveCount == 1) // The first move in list is the PV
value = -search_pv(pos, ss, -beta, -alpha, newDepth, ply+1, threadID);
@ -1296,6 +1297,8 @@ namespace {
StateInfo st;
pos.do_null_move(st);
TT.prefetch(pos.get_key());
int R = (depth >= 5 * OnePly ? 4 : 3); // Null move dynamic reduction
Value nullValue = -search(pos, ss, -(beta-1), depth-R*OnePly, ply+1, false, threadID);
@ -1410,6 +1413,7 @@ namespace {
// Make and search the move
StateInfo st;
pos.do_move(move, st, dcCandidates);
TT.prefetch(pos.get_key());
// Try to reduce non-pv search depth by one ply if move seems not problematic,
// if the move fails high will be re-searched at full depth.
@ -1619,6 +1623,7 @@ namespace {
// Make and search the move.
StateInfo st;
pos.do_move(move, st, dcCandidates);
TT.prefetch(pos.get_key());
Value value = -qsearch(pos, ss, -beta, -alpha, depth-OnePly, ply+1, threadID);
pos.undo_move(move);

View file

@ -25,6 +25,7 @@
#include <cassert>
#include <cmath>
#include <cstring>
#include <xmmintrin.h>
#include "movegen.h"
#include "tt.h"
@ -153,6 +154,17 @@ TTEntry* TranspositionTable::retrieve(const Key posKey) const {
return NULL;
}
/// TranspositionTable::prefetch looks up the current position in the
/// transposition table and load it in L1/L2 cache. This is a non
/// blocking function and do not stalls the CPU waiting for data
/// to be loaded from RAM, that can be very slow. When we will
/// subsequently call retrieve() the TT data will be already
/// quickly accessible in L1/l2 CPU cache.
void TranspositionTable::prefetch(const Key posKey) const {
_mm_prefetch((char*)first_entry(posKey), _MM_HINT_T0);
}
/// TranspositionTable::first_entry returns a pointer to the first
/// entry of a cluster given a position. The low 32 bits of the key

View file

@ -85,6 +85,7 @@ public:
void clear();
void store(const Key posKey, Value v, ValueType type, Depth d, Move m);
TTEntry* retrieve(const Key posKey) const;
void prefetch(const Key posKey) const;
void new_search();
void insert_pv(const Position& pos, Move pv[]);
void extract_pv(const Position& pos, Move pv[]);