1
0
Fork 0
mirror of https://github.com/sockspls/badfish synced 2025-04-30 16:53:09 +00:00

Use thread_local compiler specifics

Much faster then pthread_getspecific() but still a
speed regression against the original code.

Following are the nps on a bench:

Position
454165
454838
455433

tls
441046
442767
442767

ms (Win)
450521
447510
451105

ms (pthread)
422115
422115
424276

Signed-off-by: Marco Costalba <mcostalba@gmail.com>
This commit is contained in:
Marco Costalba 2012-04-06 17:01:41 +01:00
parent bed4075580
commit b1f57e92ce
7 changed files with 17 additions and 24 deletions

View file

@ -371,7 +371,7 @@ Value do_evaluate(const Position& pos, Value& margin) {
margins[WHITE] = margins[BLACK] = VALUE_ZERO;
// Probe the material hash table
ei.mi = Threads.this_thread()->materialTable.probe(pos);
ei.mi = this_thread->materialTable.probe(pos);
score += ei.mi->material_value();
// If we have a specialized evaluation function for the current material
@ -383,7 +383,7 @@ Value do_evaluate(const Position& pos, Value& margin) {
}
// Probe the pawn hash table
ei.pi = Threads.this_thread()->pawnTable.probe(pos);
ei.pi = this_thread->pawnTable.probe(pos);
score += ei.pi->pawns_value();
// Initialize attack and king safety bitboards

View file

@ -54,7 +54,6 @@ inline uint64_t time_to_msec(const sys_time_t& t) { return t.tv_sec * 1000LL + t
typedef pthread_mutex_t Lock;
typedef pthread_cond_t WaitCondition;
typedef pthread_t NativeHandle;
typedef pthread_key_t ThreadLocalStorageKey;
typedef void*(*pt_start_fn)(void*);
# define lock_init(x) pthread_mutex_init(&(x), NULL)
@ -68,10 +67,6 @@ typedef void*(*pt_start_fn)(void*);
# define cond_timedwait(x,y,z) pthread_cond_timedwait(&(x),&(y),z)
# define thread_create(x,f,t) !pthread_create(&(x),NULL,(pt_start_fn)f,t)
# define thread_join(x) pthread_join(x, NULL)
# define tls_init(k) pthread_key_create(&k,NULL)
# define tls_get(k) pthread_getspecific(k)
# define tls_set(k,x) pthread_setspecific(k,x)
# define tls_destroy(k) pthread_key_delete(k)
#else // Windows and MinGW
@ -96,7 +91,6 @@ inline uint64_t time_to_msec(const sys_time_t& t) { return t.time * 1000LL + t.m
typedef CRITICAL_SECTION Lock;
typedef HANDLE WaitCondition;
typedef HANDLE NativeHandle;
typedef DWORD ThreadLocalStorageKey;
# define lock_init(x) InitializeCriticalSection(&(x))
# define lock_grab(x) EnterCriticalSection(&(x))
@ -109,10 +103,6 @@ typedef DWORD ThreadLocalStorageKey;
# define cond_timedwait(x,y,z) { lock_release(y); WaitForSingleObject(x,z); lock_grab(y); }
# define thread_create(x,f,t) (x = CreateThread(NULL,0,(LPTHREAD_START_ROUTINE)f,t,0,NULL), x != NULL)
# define thread_join(x) { WaitForSingleObject(x, INFINITE); CloseHandle(x); }
# define tls_init(k) do { k = TlsAlloc(); } while(0)
# define tls_get(k) TlsGetValue(k)
# define tls_set(k,x) TlsSetValue(k,x)
# define tls_destroy(k) TlsFree(k)
#endif

View file

@ -895,8 +895,8 @@ void Position::do_move(Move m, StateInfo& newSt, const CheckInfo& ci, bool moveI
}
// Prefetch pawn and material hash tables
prefetch((char*)Threads.this_thread()->pawnTable.entries[st->pawnKey]);
prefetch((char*)Threads.this_thread()->materialTable.entries[st->materialKey]);
prefetch((char*)this_thread->pawnTable.entries[st->pawnKey]);
prefetch((char*)this_thread->materialTable.entries[st->materialKey]);
// Update incremental scores
st->psqScore += psq_delta(piece, from, to);

View file

@ -332,7 +332,7 @@ finalize:
// but if we are pondering or in infinite search, we shouldn't print the best
// move before we are told to do so.
if (!Signals.stop && (Limits.ponder || Limits.infinite))
Threads.this_thread()->wait_for_stop_or_ponderhit();
this_thread->wait_for_stop_or_ponderhit();
// Best move could be MOVE_NONE when searching on a stalemate position
cout << "bestmove " << move_to_uci(RootMoves[0].pv[0], Chess960)
@ -543,7 +543,7 @@ namespace {
bool isPvMove, inCheck, singularExtensionNode, givesCheck;
bool captureOrPromotion, dangerous, doFullDepthSearch;
int moveCount = 0, playedMoveCount = 0;
Thread* thisThread = Threads.this_thread();
Thread* thisThread = this_thread;
SplitPoint* sp = NULL;
refinedValue = bestValue = value = -VALUE_INFINITE;

View file

@ -28,6 +28,7 @@
using namespace Search;
ThreadsManager Threads; // Global object
THREAD_LOCAL Thread* this_thread; // Thread local variable
namespace { extern "C" {
@ -36,7 +37,7 @@ namespace { extern "C" {
long start_routine(Thread* th) {
Threads.set_this_thread(th); // Save pointer into thread local storage
this_thread = th; // Save pointer into thread local storage
(th->*(th->start_fn))();
return 0;
}
@ -205,12 +206,11 @@ bool Thread::is_available_to(Thread* master) const {
void ThreadsManager::init() {
tls_init(tlsKey);
cond_init(sleepCond);
lock_init(splitLock);
timer = new Thread(&Thread::timer_loop);
threads.push_back(new Thread(&Thread::main_loop));
set_this_thread(main_thread()); // Use main thread's resources
this_thread = main_thread(); // Use main thread's resources
read_uci_options();
}
@ -225,7 +225,6 @@ ThreadsManager::~ThreadsManager() {
delete timer;
lock_destroy(splitLock);
cond_destroy(sleepCond);
tls_destroy(tlsKey);
}
@ -314,7 +313,7 @@ Value ThreadsManager::split(Position& pos, Stack* ss, Value alpha, Value beta,
assert(beta <= VALUE_INFINITE);
assert(depth > DEPTH_ZERO);
Thread* master = this_thread();
Thread* master = this_thread;
if (master->splitPointsCnt >= MAX_SPLITPOINTS_PER_THREAD)
return bestValue;

View file

@ -120,8 +120,6 @@ public:
int min_split_depth() const { return minimumSplitDepth; }
int size() const { return (int)threads.size(); }
Thread* main_thread() const { return threads[0]; }
Thread* this_thread() const { return (Thread*)tls_get(tlsKey); }
void set_this_thread(Thread* th) const { tls_set(tlsKey, th); }
void wake_up() const;
void sleep() const;
@ -140,7 +138,6 @@ private:
std::vector<Thread*> threads;
Thread* timer;
ThreadLocalStorageKey tlsKey;
Lock splitLock;
WaitCondition sleepCond;
Depth minimumSplitDepth;
@ -149,5 +146,6 @@ private:
};
extern ThreadsManager Threads;
extern THREAD_LOCAL Thread* this_thread;
#endif // !defined(THREAD_H_INCLUDED)

View file

@ -64,6 +64,12 @@
# define FORCE_INLINE inline
#endif
#if defined(__GNUC__)
# define THREAD_LOCAL __thread
#else
# define THREAD_LOCAL __declspec(thread)
#endif
#if defined(USE_POPCNT)
const bool HasPopCnt = true;
#else