Use spinlock instead of mutex for Threads and SplitPoint

It is reported to be defenitly faster with increasing number of threads, we go from a +3.5% with 4 threads to a +15% with 16 threads. The only drawback is that now when testing with more threads than physical available cores, the speed slows down to a crawl. This is expected and was similar at what we had setting the old sleepingThreads to false. No functional change.
2025-07-15 13:29:14 +00:00 · 2015-02-22 14:59:55 +01:00 · 2015-02-22 14:59:55 +01:00 · 38112060dc
commit 38112060dc
parent 775f8239d3
3 changed files with 41 additions and 41 deletions
--- a/src/search.cpp
+++ b/src/search.cpp
@ -765,7 +765,7 @@ moves_loop: // When in check and at SpNode search starts from here
              continue;
          moveCount = ++splitPoint->moveCount;
-          splitPoint->mutex.unlock();
+          splitPoint->spinlock.release();
      }
      else
          ++moveCount;
@ -834,7 +834,7 @@ moves_loop: // When in check and at SpNode search starts from here
              && moveCount >= FutilityMoveCounts[improving][depth])
          {
              if (SpNode)
-                  splitPoint->mutex.lock();
+                  splitPoint->spinlock.acquire();
              continue;
          }
@ -853,7 +853,7 @@ moves_loop: // When in check and at SpNode search starts from here
                  if (SpNode)
                  {
-                      splitPoint->mutex.lock();
+                      splitPoint->spinlock.acquire();
                      if (bestValue > splitPoint->bestValue)
                          splitPoint->bestValue = bestValue;
                  }
@ -865,7 +865,7 @@ moves_loop: // When in check and at SpNode search starts from here
          if (predictedDepth < 4 * ONE_PLY && pos.see_sign(move) < VALUE_ZERO)
          {
              if (SpNode)
-                  splitPoint->mutex.lock();
+                  splitPoint->spinlock.acquire();
              continue;
          }
@ -965,7 +965,7 @@ moves_loop: // When in check and at SpNode search starts from here
      // Step 18. Check for new best move
      if (SpNode)
      {
-          splitPoint->mutex.lock();
+          splitPoint->spinlock.acquire();
          bestValue = splitPoint->bestValue;
          alpha = splitPoint->alpha;
      }
@ -1526,13 +1526,13 @@ void Thread::idle_loop() {
      // If this thread has been assigned work, launch a search
      while (searching)
      {
-          Threads.mutex.lock();
+          Threads.spinlock.acquire();
          assert(activeSplitPoint);
          SplitPoint* sp = activeSplitPoint;
-          Threads.mutex.unlock();
+          Threads.spinlock.release();
          Stack stack[MAX_PLY+4], *ss = stack+2; // To allow referencing (ss-2) and (ss+2)
          Position pos(*sp->pos, this);
@ -1540,7 +1540,7 @@ void Thread::idle_loop() {
          std::memcpy(ss-2, sp->ss-2, 5 * sizeof(Stack));
          ss->splitPoint = sp;
-          sp->mutex.lock();
+          sp->spinlock.acquire();
          assert(activePosition == nullptr);
@ -1578,7 +1578,7 @@ void Thread::idle_loop() {
          // After releasing the lock we can't access any SplitPoint related data
          // in a safe way because it could have been released under our feet by
          // the sp master.
-          sp->mutex.unlock();
+          sp->spinlock.release();
          // Try to late join to another split point if none of its slaves has
          // already finished.
@ -1593,7 +1593,7 @@ void Thread::idle_loop() {
              if (   sp
                  && sp->allSlavesSearching
                  && sp->slavesMask.count() < MAX_SLAVES_PER_SPLITPOINT
-                  && available_to(th))
+                  && available_to(sp->master))
              {
                  assert(this != th);
                  assert(!(this_sp && this_sp->slavesMask.none()));
@ -1618,8 +1618,8 @@ void Thread::idle_loop() {
              sp = bestSp;
              // Recheck the conditions under lock protection
-              Threads.mutex.lock();
+              Threads.spinlock.acquire();
-              sp->mutex.lock();
+              sp->spinlock.acquire();
              if (   sp->allSlavesSearching
                  && sp->slavesMask.count() < MAX_SLAVES_PER_SPLITPOINT
@ -1630,8 +1630,8 @@ void Thread::idle_loop() {
                  searching = true;
              }
-              sp->mutex.unlock();
+              sp->spinlock.release();
-              Threads.mutex.unlock();
+              Threads.spinlock.release();
          }
      }
@ -1687,7 +1687,7 @@ void check_time() {
  else if (Limits.nodes)
  {
-      Threads.mutex.lock();
+      Threads.spinlock.acquire();
      int64_t nodes = RootPos.nodes_searched();
@ -1698,7 +1698,7 @@ void check_time() {
          {
              SplitPoint& sp = th->splitPoints[i];
-              sp.mutex.lock();
+              sp.spinlock.acquire();
              nodes += sp.nodes;
@ -1706,10 +1706,10 @@ void check_time() {
                  if (sp.slavesMask.test(idx) && Threads[idx]->activePosition)
                      nodes += Threads[idx]->activePosition->nodes_searched();
-              sp.mutex.unlock();
+              sp.spinlock.release();
          }
-      Threads.mutex.unlock();
+      Threads.spinlock.release();
      if (nodes >= Limits.nodes)
          Signals.stop = true;
--- a/src/thread.cpp
+++ b/src/thread.cpp
@ -165,8 +165,8 @@ void Thread::split(Position& pos, Stack* ss, Value alpha, Value beta, Value* bes
  // Try to allocate available threads and ask them to start searching setting
  // 'searching' flag. This must be done under lock protection to avoid concurrent
  // allocation of the same slave by another master.
-  Threads.mutex.lock();
+  Threads.spinlock.acquire();
-  sp.mutex.lock();
+  sp.spinlock.acquire();
  sp.allSlavesSearching = true; // Must be set under lock protection
  ++splitPointsSize;
@ -188,8 +188,8 @@ void Thread::split(Position& pos, Stack* ss, Value alpha, Value beta, Value* bes
  // it will instantly launch a search, because its 'searching' flag is set.
  // The thread will return from the idle loop when all slaves have finished
  // their work at this split point.
-  sp.mutex.unlock();
+  sp.spinlock.release();
-  Threads.mutex.unlock();
+  Threads.spinlock.release();
  Thread::idle_loop(); // Force a call to base class idle_loop()
@ -202,8 +202,8 @@ void Thread::split(Position& pos, Stack* ss, Value alpha, Value beta, Value* bes
  // We have returned from the idle loop, which means that all threads are
  // finished. Note that setting 'searching' and decreasing splitPointsSize must
  // be done under lock protection to avoid a race with Thread::available_to().
-  Threads.mutex.lock();
+  Threads.spinlock.acquire();
-  sp.mutex.lock();
+  sp.spinlock.acquire();
  searching = true;
  --splitPointsSize;
@ -213,8 +213,8 @@ void Thread::split(Position& pos, Stack* ss, Value alpha, Value beta, Value* bes
  *bestMove = sp.bestMove;
  *bestValue = sp.bestValue;
-  sp.mutex.unlock();
+  sp.spinlock.release();
-  Threads.mutex.unlock();
+  Threads.spinlock.release();
 }
--- a/src/thread.h
+++ b/src/thread.h
@ -39,6 +39,19 @@ const size_t MAX_THREADS = 128;
 const size_t MAX_SPLITPOINTS_PER_THREAD = 8;
 const size_t MAX_SLAVES_PER_SPLITPOINT = 4;
 /// Spinlock class wraps low level atomic operations to provide spin lock functionality
 class Spinlock {
  std::atomic_flag lock;
 public:
  Spinlock() { std::atomic_flag_clear(&lock); }
  void acquire() { while (lock.test_and_set(std::memory_order_acquire)) {} }
  void release() { lock.clear(std::memory_order_release); }
 };
 /// SplitPoint struct stores information shared by the threads searching in
 /// parallel below the same split point. It is populated at splitting time.
@ -58,7 +71,7 @@ struct SplitPoint {
  SplitPoint* parentSplitPoint;
  // Shared variable data
-  std::mutex mutex;
+  Spinlock spinlock;
  std::bitset<MAX_THREADS> slavesMask;
  volatile bool allSlavesSearching;
  volatile uint64_t nodes;
@ -70,19 +83,6 @@ struct SplitPoint {
 };
 /// Spinlock class wraps low level atomic operations to provide spin lock functionality
 class Spinlock {
  std::atomic_flag lock;
 public:
  Spinlock() { std::atomic_flag_clear(&lock); }
  void acquire() { while (lock.test_and_set(std::memory_order_acquire)) {} }
  void release() { lock.clear(std::memory_order_release); }
 };
 /// ThreadBase struct is the base of the hierarchy from where we derive all the
 /// specialized thread classes.
@ -162,7 +162,7 @@ struct ThreadPool : public std::vector<Thread*> {
  void start_thinking(const Position&, const Search::LimitsType&, Search::StateStackPtr&);
  Depth minimumSplitDepth;
-  std::mutex mutex;
+  Spinlock spinlock;
  std::condition_variable sleepCondition;
  TimerThread* timer;
 };