1
0
Fork 0
mirror of https://github.com/sockspls/badfish synced 2025-05-02 09:39:36 +00:00

Improve smp performance for high number of threads

Balance threads between split points.

There are huge differences between different machines and autopurging makes it very difficult to measure the improvement in fishtest, but the following was recorded for 16 threads at 15+0.05:

    For Bravone (1000 games): 0 ELO
    For Glinscott (1000 games): +20 ELO
    For bKingUs (1000 games): +50 ELO
    For fastGM (1500 games): +50 ELO

The change was regression for no one, and a big improvement for some, so it should be fine to commit it.
Also for 8 threads at 15+0.05 we measured a statistically significant improvement:
ELO: 6.19 +-3.9 (95%) LOS: 99.9%
Total: 10325 W: 1824 L: 1640 D: 6861

Finally it was verified that there was no (significant) regression for

4 threads:
ELO: 0.09 +-2.8 (95%) LOS: 52.4%
Total: 19908 W: 3422 L: 3417 D: 13069

2 threads:
ELO: 0.38 +-3.0 (95%) LOS: 60.0%
Total: 19044 W: 3480 L: 3459 D: 12105

1 thread:
ELO: -1.27 +-2.1 (95%) LOS: 12.3%
Total: 40000 W: 7829 L: 7975 D: 24196

Resolves #258
This commit is contained in:
Joona Kiiski 2015-02-14 20:46:00 +00:00
parent f8f5dcbb68
commit d65f75c153
3 changed files with 45 additions and 15 deletions

View file

@ -1042,7 +1042,9 @@ moves_loop: // When in check and at SpNode search starts from here
&& Threads.size() >= 2 && Threads.size() >= 2
&& depth >= Threads.minimumSplitDepth && depth >= Threads.minimumSplitDepth
&& ( !thisThread->activeSplitPoint && ( !thisThread->activeSplitPoint
|| !thisThread->activeSplitPoint->allSlavesSearching) || !thisThread->activeSplitPoint->allSlavesSearching
|| ( int(Threads.size()) > MAX_SLAVES_PER_SPLITPOINT
&& thisThread->activeSplitPoint->slavesCount == MAX_SLAVES_PER_SPLITPOINT))
&& thisThread->splitPointsSize < MAX_SPLITPOINTS_PER_THREAD) && thisThread->splitPointsSize < MAX_SPLITPOINTS_PER_THREAD)
{ {
assert(bestValue > -VALUE_INFINITE && bestValue < beta); assert(bestValue > -VALUE_INFINITE && bestValue < beta);
@ -1587,6 +1589,11 @@ void Thread::idle_loop() {
// Try to late join to another split point if none of its slaves has // Try to late join to another split point if none of its slaves has
// already finished. // already finished.
if (Threads.size() > 2) if (Threads.size() > 2)
{
SplitPoint *bestSp = NULL;
int bestThread = 0;
int bestScore = INT_MAX;
for (size_t i = 0; i < Threads.size(); ++i) for (size_t i = 0; i < Threads.size(); ++i)
{ {
const int size = Threads[i]->splitPointsSize; // Local copy const int size = Threads[i]->splitPointsSize; // Local copy
@ -1594,26 +1601,42 @@ void Thread::idle_loop() {
if ( sp if ( sp
&& sp->allSlavesSearching && sp->allSlavesSearching
&& sp->slavesCount < MAX_SLAVES_PER_SPLITPOINT
&& available_to(Threads[i])) && available_to(Threads[i]))
{ {
// Recheck the conditions under lock protection int score = sp->spLevel * 256 * 256 + sp->slavesCount * 256 - sp->depth * 1;
Threads.mutex.lock();
sp->mutex.lock();
if ( sp->allSlavesSearching if (score < bestScore)
&& available_to(Threads[i]))
{ {
sp->slavesMask.set(idx); bestSp = sp;
activeSplitPoint = sp; bestThread = i;
searching = true; bestScore = score;
} }
sp->mutex.unlock();
Threads.mutex.unlock();
break; // Just a single attempt
} }
} }
if (bestSp)
{
sp = bestSp;
// Recheck the conditions under lock protection
Threads.mutex.lock();
sp->mutex.lock();
if ( sp->allSlavesSearching
&& sp->slavesCount < MAX_SLAVES_PER_SPLITPOINT
&& available_to(Threads[bestThread]))
{
sp->slavesMask.set(idx);
sp->slavesCount++;
activeSplitPoint = sp;
searching = true;
}
sp->mutex.unlock();
Threads.mutex.unlock();
}
}
} }
// Grab the lock to avoid races with Thread::notify_one() // Grab the lock to avoid races with Thread::notify_one()

View file

@ -154,7 +154,9 @@ void Thread::split(Position& pos, Stack* ss, Value alpha, Value beta, Value* bes
sp.masterThread = this; sp.masterThread = this;
sp.parentSplitPoint = activeSplitPoint; sp.parentSplitPoint = activeSplitPoint;
sp.spLevel = activeSplitPoint ? activeSplitPoint->spLevel + 1 : 0;
sp.slavesMask = 0, sp.slavesMask.set(idx); sp.slavesMask = 0, sp.slavesMask.set(idx);
sp.slavesCount = 1;
sp.depth = depth; sp.depth = depth;
sp.bestValue = *bestValue; sp.bestValue = *bestValue;
sp.bestMove = *bestMove; sp.bestMove = *bestMove;
@ -182,9 +184,11 @@ void Thread::split(Position& pos, Stack* ss, Value alpha, Value beta, Value* bes
Thread* slave; Thread* slave;
while ((slave = Threads.available_slave(this)) != NULL) while ( sp.slavesCount < MAX_SLAVES_PER_SPLITPOINT
&& (slave = Threads.available_slave(this)) != NULL)
{ {
sp.slavesMask.set(slave->idx); sp.slavesMask.set(slave->idx);
sp.slavesCount++;
slave->activeSplitPoint = &sp; slave->activeSplitPoint = &sp;
slave->searching = true; // Slave leaves idle_loop() slave->searching = true; // Slave leaves idle_loop()
slave->notify_one(); // Could be sleeping slave->notify_one(); // Could be sleeping

View file

@ -33,6 +33,7 @@ struct Thread;
const int MAX_THREADS = 128; const int MAX_THREADS = 128;
const int MAX_SPLITPOINTS_PER_THREAD = 8; const int MAX_SPLITPOINTS_PER_THREAD = 8;
const int MAX_SLAVES_PER_SPLITPOINT = 4;
/// Mutex and ConditionVariable struct are wrappers of the low level locking /// Mutex and ConditionVariable struct are wrappers of the low level locking
/// machinery and are modeled after the corresponding C++11 classes. /// machinery and are modeled after the corresponding C++11 classes.
@ -72,6 +73,7 @@ struct SplitPoint {
const Position* pos; const Position* pos;
Search::Stack* ss; Search::Stack* ss;
Thread* masterThread; Thread* masterThread;
int spLevel;
Depth depth; Depth depth;
Value beta; Value beta;
int nodeType; int nodeType;
@ -84,6 +86,7 @@ struct SplitPoint {
// Shared variable data // Shared variable data
Mutex mutex; Mutex mutex;
std::bitset<MAX_THREADS> slavesMask; std::bitset<MAX_THREADS> slavesMask;
int slavesCount;
volatile bool allSlavesSearching; volatile bool allSlavesSearching;
volatile uint64_t nodes; volatile uint64_t nodes;
volatile Value alpha; volatile Value alpha;