1
0
Fork 0
mirror of https://github.com/sockspls/badfish synced 2025-04-30 08:43:09 +00:00

Use spinlock instead of mutex for Threads and SplitPoint

It is reported to be defenitly faster with increasing
number of threads, we go from a +3.5% with 4 threads
to a +15% with 16 threads.

The only drawback is that now when testing with more
threads than physical available cores, the speed slows
down to a crawl. This is expected and was similar at what
we had setting the old sleepingThreads to false.

No functional change.
This commit is contained in:
Marco Costalba 2015-02-22 14:59:55 +01:00
parent 775f8239d3
commit 38112060dc
3 changed files with 41 additions and 41 deletions

View file

@ -765,7 +765,7 @@ moves_loop: // When in check and at SpNode search starts from here
continue; continue;
moveCount = ++splitPoint->moveCount; moveCount = ++splitPoint->moveCount;
splitPoint->mutex.unlock(); splitPoint->spinlock.release();
} }
else else
++moveCount; ++moveCount;
@ -834,7 +834,7 @@ moves_loop: // When in check and at SpNode search starts from here
&& moveCount >= FutilityMoveCounts[improving][depth]) && moveCount >= FutilityMoveCounts[improving][depth])
{ {
if (SpNode) if (SpNode)
splitPoint->mutex.lock(); splitPoint->spinlock.acquire();
continue; continue;
} }
@ -853,7 +853,7 @@ moves_loop: // When in check and at SpNode search starts from here
if (SpNode) if (SpNode)
{ {
splitPoint->mutex.lock(); splitPoint->spinlock.acquire();
if (bestValue > splitPoint->bestValue) if (bestValue > splitPoint->bestValue)
splitPoint->bestValue = bestValue; splitPoint->bestValue = bestValue;
} }
@ -865,7 +865,7 @@ moves_loop: // When in check and at SpNode search starts from here
if (predictedDepth < 4 * ONE_PLY && pos.see_sign(move) < VALUE_ZERO) if (predictedDepth < 4 * ONE_PLY && pos.see_sign(move) < VALUE_ZERO)
{ {
if (SpNode) if (SpNode)
splitPoint->mutex.lock(); splitPoint->spinlock.acquire();
continue; continue;
} }
@ -965,7 +965,7 @@ moves_loop: // When in check and at SpNode search starts from here
// Step 18. Check for new best move // Step 18. Check for new best move
if (SpNode) if (SpNode)
{ {
splitPoint->mutex.lock(); splitPoint->spinlock.acquire();
bestValue = splitPoint->bestValue; bestValue = splitPoint->bestValue;
alpha = splitPoint->alpha; alpha = splitPoint->alpha;
} }
@ -1526,13 +1526,13 @@ void Thread::idle_loop() {
// If this thread has been assigned work, launch a search // If this thread has been assigned work, launch a search
while (searching) while (searching)
{ {
Threads.mutex.lock(); Threads.spinlock.acquire();
assert(activeSplitPoint); assert(activeSplitPoint);
SplitPoint* sp = activeSplitPoint; SplitPoint* sp = activeSplitPoint;
Threads.mutex.unlock(); Threads.spinlock.release();
Stack stack[MAX_PLY+4], *ss = stack+2; // To allow referencing (ss-2) and (ss+2) Stack stack[MAX_PLY+4], *ss = stack+2; // To allow referencing (ss-2) and (ss+2)
Position pos(*sp->pos, this); Position pos(*sp->pos, this);
@ -1540,7 +1540,7 @@ void Thread::idle_loop() {
std::memcpy(ss-2, sp->ss-2, 5 * sizeof(Stack)); std::memcpy(ss-2, sp->ss-2, 5 * sizeof(Stack));
ss->splitPoint = sp; ss->splitPoint = sp;
sp->mutex.lock(); sp->spinlock.acquire();
assert(activePosition == nullptr); assert(activePosition == nullptr);
@ -1578,7 +1578,7 @@ void Thread::idle_loop() {
// After releasing the lock we can't access any SplitPoint related data // After releasing the lock we can't access any SplitPoint related data
// in a safe way because it could have been released under our feet by // in a safe way because it could have been released under our feet by
// the sp master. // the sp master.
sp->mutex.unlock(); sp->spinlock.release();
// Try to late join to another split point if none of its slaves has // Try to late join to another split point if none of its slaves has
// already finished. // already finished.
@ -1593,7 +1593,7 @@ void Thread::idle_loop() {
if ( sp if ( sp
&& sp->allSlavesSearching && sp->allSlavesSearching
&& sp->slavesMask.count() < MAX_SLAVES_PER_SPLITPOINT && sp->slavesMask.count() < MAX_SLAVES_PER_SPLITPOINT
&& available_to(th)) && available_to(sp->master))
{ {
assert(this != th); assert(this != th);
assert(!(this_sp && this_sp->slavesMask.none())); assert(!(this_sp && this_sp->slavesMask.none()));
@ -1618,8 +1618,8 @@ void Thread::idle_loop() {
sp = bestSp; sp = bestSp;
// Recheck the conditions under lock protection // Recheck the conditions under lock protection
Threads.mutex.lock(); Threads.spinlock.acquire();
sp->mutex.lock(); sp->spinlock.acquire();
if ( sp->allSlavesSearching if ( sp->allSlavesSearching
&& sp->slavesMask.count() < MAX_SLAVES_PER_SPLITPOINT && sp->slavesMask.count() < MAX_SLAVES_PER_SPLITPOINT
@ -1630,8 +1630,8 @@ void Thread::idle_loop() {
searching = true; searching = true;
} }
sp->mutex.unlock(); sp->spinlock.release();
Threads.mutex.unlock(); Threads.spinlock.release();
} }
} }
@ -1687,7 +1687,7 @@ void check_time() {
else if (Limits.nodes) else if (Limits.nodes)
{ {
Threads.mutex.lock(); Threads.spinlock.acquire();
int64_t nodes = RootPos.nodes_searched(); int64_t nodes = RootPos.nodes_searched();
@ -1698,7 +1698,7 @@ void check_time() {
{ {
SplitPoint& sp = th->splitPoints[i]; SplitPoint& sp = th->splitPoints[i];
sp.mutex.lock(); sp.spinlock.acquire();
nodes += sp.nodes; nodes += sp.nodes;
@ -1706,10 +1706,10 @@ void check_time() {
if (sp.slavesMask.test(idx) && Threads[idx]->activePosition) if (sp.slavesMask.test(idx) && Threads[idx]->activePosition)
nodes += Threads[idx]->activePosition->nodes_searched(); nodes += Threads[idx]->activePosition->nodes_searched();
sp.mutex.unlock(); sp.spinlock.release();
} }
Threads.mutex.unlock(); Threads.spinlock.release();
if (nodes >= Limits.nodes) if (nodes >= Limits.nodes)
Signals.stop = true; Signals.stop = true;

View file

@ -165,8 +165,8 @@ void Thread::split(Position& pos, Stack* ss, Value alpha, Value beta, Value* bes
// Try to allocate available threads and ask them to start searching setting // Try to allocate available threads and ask them to start searching setting
// 'searching' flag. This must be done under lock protection to avoid concurrent // 'searching' flag. This must be done under lock protection to avoid concurrent
// allocation of the same slave by another master. // allocation of the same slave by another master.
Threads.mutex.lock(); Threads.spinlock.acquire();
sp.mutex.lock(); sp.spinlock.acquire();
sp.allSlavesSearching = true; // Must be set under lock protection sp.allSlavesSearching = true; // Must be set under lock protection
++splitPointsSize; ++splitPointsSize;
@ -188,8 +188,8 @@ void Thread::split(Position& pos, Stack* ss, Value alpha, Value beta, Value* bes
// it will instantly launch a search, because its 'searching' flag is set. // it will instantly launch a search, because its 'searching' flag is set.
// The thread will return from the idle loop when all slaves have finished // The thread will return from the idle loop when all slaves have finished
// their work at this split point. // their work at this split point.
sp.mutex.unlock(); sp.spinlock.release();
Threads.mutex.unlock(); Threads.spinlock.release();
Thread::idle_loop(); // Force a call to base class idle_loop() Thread::idle_loop(); // Force a call to base class idle_loop()
@ -202,8 +202,8 @@ void Thread::split(Position& pos, Stack* ss, Value alpha, Value beta, Value* bes
// We have returned from the idle loop, which means that all threads are // We have returned from the idle loop, which means that all threads are
// finished. Note that setting 'searching' and decreasing splitPointsSize must // finished. Note that setting 'searching' and decreasing splitPointsSize must
// be done under lock protection to avoid a race with Thread::available_to(). // be done under lock protection to avoid a race with Thread::available_to().
Threads.mutex.lock(); Threads.spinlock.acquire();
sp.mutex.lock(); sp.spinlock.acquire();
searching = true; searching = true;
--splitPointsSize; --splitPointsSize;
@ -213,8 +213,8 @@ void Thread::split(Position& pos, Stack* ss, Value alpha, Value beta, Value* bes
*bestMove = sp.bestMove; *bestMove = sp.bestMove;
*bestValue = sp.bestValue; *bestValue = sp.bestValue;
sp.mutex.unlock(); sp.spinlock.release();
Threads.mutex.unlock(); Threads.spinlock.release();
} }

View file

@ -39,6 +39,19 @@ const size_t MAX_THREADS = 128;
const size_t MAX_SPLITPOINTS_PER_THREAD = 8; const size_t MAX_SPLITPOINTS_PER_THREAD = 8;
const size_t MAX_SLAVES_PER_SPLITPOINT = 4; const size_t MAX_SLAVES_PER_SPLITPOINT = 4;
/// Spinlock class wraps low level atomic operations to provide spin lock functionality
class Spinlock {
std::atomic_flag lock;
public:
Spinlock() { std::atomic_flag_clear(&lock); }
void acquire() { while (lock.test_and_set(std::memory_order_acquire)) {} }
void release() { lock.clear(std::memory_order_release); }
};
/// SplitPoint struct stores information shared by the threads searching in /// SplitPoint struct stores information shared by the threads searching in
/// parallel below the same split point. It is populated at splitting time. /// parallel below the same split point. It is populated at splitting time.
@ -58,7 +71,7 @@ struct SplitPoint {
SplitPoint* parentSplitPoint; SplitPoint* parentSplitPoint;
// Shared variable data // Shared variable data
std::mutex mutex; Spinlock spinlock;
std::bitset<MAX_THREADS> slavesMask; std::bitset<MAX_THREADS> slavesMask;
volatile bool allSlavesSearching; volatile bool allSlavesSearching;
volatile uint64_t nodes; volatile uint64_t nodes;
@ -70,19 +83,6 @@ struct SplitPoint {
}; };
/// Spinlock class wraps low level atomic operations to provide spin lock functionality
class Spinlock {
std::atomic_flag lock;
public:
Spinlock() { std::atomic_flag_clear(&lock); }
void acquire() { while (lock.test_and_set(std::memory_order_acquire)) {} }
void release() { lock.clear(std::memory_order_release); }
};
/// ThreadBase struct is the base of the hierarchy from where we derive all the /// ThreadBase struct is the base of the hierarchy from where we derive all the
/// specialized thread classes. /// specialized thread classes.
@ -162,7 +162,7 @@ struct ThreadPool : public std::vector<Thread*> {
void start_thinking(const Position&, const Search::LimitsType&, Search::StateStackPtr&); void start_thinking(const Position&, const Search::LimitsType&, Search::StateStackPtr&);
Depth minimumSplitDepth; Depth minimumSplitDepth;
std::mutex mutex; Spinlock spinlock;
std::condition_variable sleepCondition; std::condition_variable sleepCondition;
TimerThread* timer; TimerThread* timer;
}; };