1
0
Fork 0
mirror of https://github.com/sockspls/badfish synced 2025-07-11 11:39:15 +00:00

Finally fix prefetch on Linux

It was due to a missing -msse compiler option !

Without this option the CPU silently discards
prefetcht2 instructions during execution.

Also added a (gcc documented) hack to prevent Intel
compiler to optimize away the prefetches.

Special thanks to Heinz for testing and suggesting
improvments. And for Jim for testing icc on Windows.

Signed-off-by: Marco Costalba <mcostalba@gmail.com>
This commit is contained in:
Marco Costalba 2009-08-12 09:40:03 +02:00
parent 166c09a7a0
commit fd12e8cb23
2 changed files with 12 additions and 16 deletions

View file

@ -26,8 +26,8 @@ EXE = stockfish
### Compiler speed switches for both GCC and ICC. These settings are generally ### Compiler speed switches for both GCC and ICC. These settings are generally
### fast on a broad range of systems, but may be changed experimentally ### fast on a broad range of systems, but may be changed experimentally
### ========================================================================== ### ==========================================================================
GCCFLAGS = -O3 GCCFLAGS = -O3 -msse
ICCFLAGS = -fast ICCFLAGS = -fast -msse
### ========================================================================== ### ==========================================================================
@ -169,6 +169,6 @@ $(EXE): $(OBJS)
### Dependencies. Do not change ### Dependencies. Do not change
.depend: .depend:
$(CXX) -MM $(OBJS:.o=.cpp) > $@ $(CXX) -msse -MM $(OBJS:.o=.cpp) > $@
include .depend include .depend

View file

@ -25,14 +25,11 @@
#include <cassert> #include <cassert>
#include <cmath> #include <cmath>
#include <cstring> #include <cstring>
#include <xmmintrin.h>
#include "movegen.h" #include "movegen.h"
#include "tt.h" #include "tt.h"
#if defined(_MSC_VER)
#include <xmmintrin.h>
#endif
// The main transposition table // The main transposition table
TranspositionTable TT; TranspositionTable TT;
@ -175,16 +172,15 @@ TTEntry* TranspositionTable::retrieve(const Key posKey) const {
void TranspositionTable::prefetch(const Key posKey) const { void TranspositionTable::prefetch(const Key posKey) const {
#if defined(_MSC_VER) #if defined(__INTEL_COMPILER) || defined(__ICL)
char* addr = (char*)first_entry(posKey); // This hack prevents prefetches to be optimized away by the
_mm_prefetch(addr, _MM_HINT_T0); // Intel compiler. Both MSVC and gcc seems not affected.
_mm_prefetch(addr+64, _MM_HINT_T0); __asm__ ("");
#else
// We need to force an asm volatile here because gcc builtin
// is optimized away by Intel compiler.
char* addr = (char*)first_entry(posKey);
asm volatile("prefetcht0 %0" :: "m" (addr));
#endif #endif
char const* addr = (char*)first_entry(posKey);
_mm_prefetch(addr, _MM_HINT_T2);
_mm_prefetch(addr+64, _MM_HINT_T2); // 64 bytes ahead
} }