mirror of
https://github.com/sockspls/badfish
synced 2025-04-29 16:23:09 +00:00
Lazy SMP
Start all threads searching on root position and use only the shared TT table as synching scheme. It seems this scheme scales better than YBWC for high number of threads. Verified for nor regression at STC 3 threads LLR: -2.95 (-2.94,2.94) [-3.00,1.00] Total: 40232 W: 6908 L: 7130 D: 26194 Verified for nor regression at LTC 3 threads LLR: 2.95 (-2.94,2.94) [-3.00,1.00] Total: 28186 W: 3908 L: 3798 D: 20480 Verified for nor regression at STC 7 threads LLR: 2.95 (-2.94,2.94) [-3.00,1.00] Total: 3607 W: 674 L: 526 D: 2407 Verified for nor regression at LTC 7 threads LLR: 2.95 (-2.94,2.94) [-3.00,1.00] Total: 4235 W: 671 L: 528 D: 3036 Tested with fixed games at LTC with 20 threads ELO: 44.75 +-7.6 (95%) LOS: 100.0% Total: 2069 W: 407 L: 142 D: 1520 Tested with fixed games at XLTC (120secs) with 20 threads ELO: 28.01 +-6.7 (95%) LOS: 100.0% Total: 2275 W: 349 L: 166 D: 1760 Original patch of mbootsector, with additional work from Ivan Ivec (log formula), Joerg Oster (id loop simplification) and Marco Costalba (assorted formatting and rework). Bench: 8116244
This commit is contained in:
parent
7ea5659c5f
commit
ecc5ff6693
10 changed files with 365 additions and 750 deletions
|
@ -156,9 +156,10 @@ void benchmark(const Position& current, istream& is) {
|
|||
else
|
||||
{
|
||||
Search::StateStackPtr st;
|
||||
limits.startTime = now();
|
||||
Threads.start_thinking(pos, limits, st);
|
||||
Threads.main()->join();
|
||||
nodes += Search::RootPos.nodes_searched();
|
||||
nodes += Threads.nodes_searched();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -238,8 +238,8 @@ void MovePicker::generate_next_stage() {
|
|||
/// a new pseudo legal move every time it is called, until there are no more moves
|
||||
/// left. It picks the move with the biggest value from a list of generated moves
|
||||
/// taking care not to return the ttMove if it has already been searched.
|
||||
template<>
|
||||
Move MovePicker::next_move<false>() {
|
||||
|
||||
Move MovePicker::next_move() {
|
||||
|
||||
Move move;
|
||||
|
||||
|
@ -320,10 +320,3 @@ Move MovePicker::next_move<false>() {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// Version of next_move() to use at split point nodes where the move is grabbed
|
||||
/// from the split point's shared MovePicker object. This function is not thread
|
||||
/// safe so must be lock protected by the caller.
|
||||
template<>
|
||||
Move MovePicker::next_move<true>() { return ss->splitPoint->movePicker->next_move<false>(); }
|
||||
|
|
|
@ -92,7 +92,7 @@ public:
|
|||
MovePicker(const Position&, Move, const HistoryStats&, const CounterMovesHistoryStats&, Value);
|
||||
MovePicker(const Position&, Move, Depth, const HistoryStats&, const CounterMovesHistoryStats&, Move, Search::Stack*);
|
||||
|
||||
template<bool SpNode> Move next_move();
|
||||
Move next_move();
|
||||
|
||||
private:
|
||||
template<GenType> void score();
|
||||
|
|
807
src/search.cpp
807
src/search.cpp
File diff suppressed because it is too large
Load diff
|
@ -88,6 +88,7 @@ struct LimitsType {
|
|||
std::vector<Move> searchmoves;
|
||||
int time[COLOR_NB], inc[COLOR_NB], npmsec, movestogo, depth, movetime, mate, infinite, ponder;
|
||||
int64_t nodes;
|
||||
TimePoint startTime;
|
||||
};
|
||||
|
||||
/// The SignalsType struct stores volatile flags updated during the search
|
||||
|
@ -101,12 +102,9 @@ typedef std::unique_ptr<std::stack<StateInfo>> StateStackPtr;
|
|||
|
||||
extern volatile SignalsType Signals;
|
||||
extern LimitsType Limits;
|
||||
extern RootMoveVector RootMoves;
|
||||
extern Position RootPos;
|
||||
extern StateStackPtr SetupStates;
|
||||
|
||||
void init();
|
||||
void think();
|
||||
void reset();
|
||||
template<bool Root> uint64_t perft(Position& pos, Depth depth);
|
||||
|
||||
|
|
200
src/thread.cpp
200
src/thread.cpp
|
@ -66,15 +66,24 @@ void ThreadBase::notify_one() {
|
|||
}
|
||||
|
||||
|
||||
// ThreadBase::wait_for() set the thread to sleep until 'condition' turns true
|
||||
// ThreadBase::wait() set the thread to sleep until 'condition' turns true
|
||||
|
||||
void ThreadBase::wait_for(volatile const bool& condition) {
|
||||
void ThreadBase::wait(volatile const bool& condition) {
|
||||
|
||||
std::unique_lock<Mutex> lk(mutex);
|
||||
sleepCondition.wait(lk, [&]{ return condition; });
|
||||
}
|
||||
|
||||
|
||||
// ThreadBase::wait_while() set the thread to sleep until 'condition' turns false
|
||||
|
||||
void ThreadBase::wait_while(volatile const bool& condition) {
|
||||
|
||||
std::unique_lock<Mutex> lk(mutex);
|
||||
sleepCondition.wait(lk, [&]{ return !condition; });
|
||||
}
|
||||
|
||||
|
||||
// Thread c'tor makes some init but does not launch any execution thread that
|
||||
// will be started only when c'tor returns.
|
||||
|
||||
|
@ -82,143 +91,10 @@ Thread::Thread() /* : splitPoints() */ { // Initialization of non POD broken in
|
|||
|
||||
searching = false;
|
||||
maxPly = 0;
|
||||
splitPointsSize = 0;
|
||||
activeSplitPoint = nullptr;
|
||||
activePosition = nullptr;
|
||||
idx = Threads.size(); // Starts from 0
|
||||
}
|
||||
|
||||
|
||||
// Thread::cutoff_occurred() checks whether a beta cutoff has occurred in the
|
||||
// current active split point, or in some ancestor of the split point.
|
||||
|
||||
bool Thread::cutoff_occurred() const {
|
||||
|
||||
for (SplitPoint* sp = activeSplitPoint; sp; sp = sp->parentSplitPoint)
|
||||
if (sp->cutoff)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
// Thread::can_join() checks whether the thread is available to join the split
|
||||
// point 'sp'. An obvious requirement is that thread must be idle. With more than
|
||||
// two threads, this is not sufficient: If the thread is the master of some split
|
||||
// point, it is only available as a slave for the split points below his active
|
||||
// one (the "helpful master" concept in YBWC terminology).
|
||||
|
||||
bool Thread::can_join(const SplitPoint* sp) const {
|
||||
|
||||
if (searching)
|
||||
return false;
|
||||
|
||||
// Make a local copy to be sure it doesn't become zero under our feet while
|
||||
// testing next condition and so leading to an out of bounds access.
|
||||
const size_t size = splitPointsSize;
|
||||
|
||||
// No split points means that the thread is available as a slave for any
|
||||
// other thread otherwise apply the "helpful master" concept if possible.
|
||||
return !size || splitPoints[size - 1].slavesMask.test(sp->master->idx);
|
||||
}
|
||||
|
||||
|
||||
// Thread::split() does the actual work of distributing the work at a node between
|
||||
// several available threads. If it does not succeed in splitting the node
|
||||
// (because no idle threads are available), the function immediately returns.
|
||||
// If splitting is possible, a SplitPoint object is initialized with all the
|
||||
// data that must be copied to the helper threads and then helper threads are
|
||||
// informed that they have been assigned work. This will cause them to instantly
|
||||
// leave their idle loops and call search(). When all threads have returned from
|
||||
// search() then split() returns.
|
||||
|
||||
void Thread::split(Position& pos, Stack* ss, Value alpha, Value beta, Value* bestValue,
|
||||
Move* bestMove, Depth depth, int moveCount,
|
||||
MovePicker* movePicker, int nodeType, bool cutNode) {
|
||||
|
||||
assert(searching);
|
||||
assert(-VALUE_INFINITE < *bestValue && *bestValue <= alpha && alpha < beta && beta <= VALUE_INFINITE);
|
||||
assert(depth >= Threads.minimumSplitDepth);
|
||||
assert(splitPointsSize < MAX_SPLITPOINTS_PER_THREAD);
|
||||
|
||||
// Pick and init the next available split point
|
||||
SplitPoint& sp = splitPoints[splitPointsSize];
|
||||
|
||||
sp.spinlock.acquire(); // No contention here until we don't increment splitPointsSize
|
||||
|
||||
sp.master = this;
|
||||
sp.parentSplitPoint = activeSplitPoint;
|
||||
sp.slavesMask = 0, sp.slavesMask.set(idx);
|
||||
sp.depth = depth;
|
||||
sp.bestValue = *bestValue;
|
||||
sp.bestMove = *bestMove;
|
||||
sp.alpha = alpha;
|
||||
sp.beta = beta;
|
||||
sp.nodeType = nodeType;
|
||||
sp.cutNode = cutNode;
|
||||
sp.movePicker = movePicker;
|
||||
sp.moveCount = moveCount;
|
||||
sp.pos = &pos;
|
||||
sp.nodes = 0;
|
||||
sp.cutoff = false;
|
||||
sp.ss = ss;
|
||||
sp.allSlavesSearching = true; // Must be set under lock protection
|
||||
|
||||
++splitPointsSize;
|
||||
activeSplitPoint = &sp;
|
||||
activePosition = nullptr;
|
||||
|
||||
// Try to allocate available threads
|
||||
Thread* slave;
|
||||
|
||||
while ( sp.slavesMask.count() < MAX_SLAVES_PER_SPLITPOINT
|
||||
&& (slave = Threads.available_slave(&sp)) != nullptr)
|
||||
{
|
||||
slave->spinlock.acquire();
|
||||
|
||||
if (slave->can_join(activeSplitPoint))
|
||||
{
|
||||
activeSplitPoint->slavesMask.set(slave->idx);
|
||||
slave->activeSplitPoint = activeSplitPoint;
|
||||
slave->searching = true;
|
||||
}
|
||||
|
||||
slave->spinlock.release();
|
||||
}
|
||||
|
||||
// Everything is set up. The master thread enters the idle loop, from which
|
||||
// it will instantly launch a search, because its 'searching' flag is set.
|
||||
// The thread will return from the idle loop when all slaves have finished
|
||||
// their work at this split point.
|
||||
sp.spinlock.release();
|
||||
|
||||
Thread::idle_loop(); // Force a call to base class idle_loop()
|
||||
|
||||
// In the helpful master concept, a master can help only a sub-tree of its
|
||||
// split point and because everything is finished here, it's not possible
|
||||
// for the master to be booked.
|
||||
assert(!searching);
|
||||
assert(!activePosition);
|
||||
|
||||
// We have returned from the idle loop, which means that all threads are
|
||||
// finished. Note that decreasing splitPointsSize must be done under lock
|
||||
// protection to avoid a race with Thread::can_join().
|
||||
spinlock.acquire();
|
||||
|
||||
searching = true;
|
||||
--splitPointsSize;
|
||||
activeSplitPoint = sp.parentSplitPoint;
|
||||
activePosition = &pos;
|
||||
|
||||
spinlock.release();
|
||||
|
||||
// Split point data cannot be changed now, so no need to lock protect
|
||||
pos.set_nodes_searched(pos.nodes_searched() + sp.nodes);
|
||||
*bestMove = sp.bestMove;
|
||||
*bestValue = sp.bestValue;
|
||||
}
|
||||
|
||||
|
||||
// TimerThread::idle_loop() is where the timer thread waits Resolution milliseconds
|
||||
// and then calls check_time(). When not searching, thread sleeps until it's woken up.
|
||||
|
||||
|
@ -233,12 +109,31 @@ void TimerThread::idle_loop() {
|
|||
|
||||
lk.unlock();
|
||||
|
||||
if (run)
|
||||
if (!exit && run)
|
||||
check_time();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Thread::idle_loop() is where the thread is parked when it has no work to do
|
||||
|
||||
void Thread::idle_loop() {
|
||||
|
||||
while (!exit)
|
||||
{
|
||||
std::unique_lock<Mutex> lk(mutex);
|
||||
|
||||
while (!searching && !exit)
|
||||
sleepCondition.wait(lk);
|
||||
|
||||
lk.unlock();
|
||||
|
||||
if (!exit && searching)
|
||||
search();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// MainThread::idle_loop() is where the main thread is parked waiting to be started
|
||||
// when there is a new search. The main thread will launch all the slave threads.
|
||||
|
||||
|
@ -259,20 +154,12 @@ void MainThread::idle_loop() {
|
|||
lk.unlock();
|
||||
|
||||
if (!exit)
|
||||
{
|
||||
searching = true;
|
||||
|
||||
Search::think();
|
||||
|
||||
assert(searching);
|
||||
|
||||
searching = false;
|
||||
}
|
||||
think();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// MainThread::join() waits for main thread to finish the search
|
||||
// MainThread::join() waits for main thread to finish thinking
|
||||
|
||||
void MainThread::join() {
|
||||
|
||||
|
@ -317,7 +204,6 @@ void ThreadPool::exit() {
|
|||
|
||||
void ThreadPool::read_uci_options() {
|
||||
|
||||
minimumSplitDepth = Options["Min Split Depth"] * ONE_PLY;
|
||||
size_t requested = Options["Threads"];
|
||||
|
||||
assert(requested > 0);
|
||||
|
@ -333,16 +219,14 @@ void ThreadPool::read_uci_options() {
|
|||
}
|
||||
|
||||
|
||||
// ThreadPool::available_slave() tries to find an idle thread which is available
|
||||
// to join SplitPoint 'sp'.
|
||||
// ThreadPool::nodes_searched() returns the number of nodes searched
|
||||
|
||||
Thread* ThreadPool::available_slave(const SplitPoint* sp) const {
|
||||
int64_t ThreadPool::nodes_searched() {
|
||||
|
||||
for (Thread* th : *this)
|
||||
if (th->can_join(sp))
|
||||
return th;
|
||||
|
||||
return nullptr;
|
||||
int64_t nodes = 0;
|
||||
for (Thread *th : *this)
|
||||
nodes += th->rootPos.nodes_searched();
|
||||
return nodes;
|
||||
}
|
||||
|
||||
|
||||
|
@ -356,8 +240,8 @@ void ThreadPool::start_thinking(const Position& pos, const LimitsType& limits,
|
|||
Signals.stopOnPonderhit = Signals.firstRootMove = false;
|
||||
Signals.stop = Signals.failedLowAtRoot = false;
|
||||
|
||||
RootMoves.clear();
|
||||
RootPos = pos;
|
||||
main()->rootMoves.clear();
|
||||
main()->rootPos = pos;
|
||||
Limits = limits;
|
||||
if (states.get()) // If we don't set a new position, preserve current state
|
||||
{
|
||||
|
@ -368,7 +252,7 @@ void ThreadPool::start_thinking(const Position& pos, const LimitsType& limits,
|
|||
for (const auto& m : MoveList<LEGAL>(pos))
|
||||
if ( limits.searchmoves.empty()
|
||||
|| std::count(limits.searchmoves.begin(), limits.searchmoves.end(), m))
|
||||
RootMoves.push_back(RootMove(m));
|
||||
main()->rootMoves.push_back(RootMove(m));
|
||||
|
||||
main()->thinking = true;
|
||||
main()->notify_one(); // Wake up main thread: 'thinking' must be already set
|
||||
|
|
75
src/thread.h
75
src/thread.h
|
@ -37,53 +37,6 @@
|
|||
struct Thread;
|
||||
|
||||
const size_t MAX_THREADS = 128;
|
||||
const size_t MAX_SPLITPOINTS_PER_THREAD = 8;
|
||||
const size_t MAX_SLAVES_PER_SPLITPOINT = 4;
|
||||
|
||||
class Spinlock {
|
||||
|
||||
std::atomic_int lock;
|
||||
|
||||
public:
|
||||
Spinlock() { lock = 1; } // Init here to workaround a bug with MSVC 2013
|
||||
void acquire() {
|
||||
while (lock.fetch_sub(1, std::memory_order_acquire) != 1)
|
||||
while (lock.load(std::memory_order_relaxed) <= 0)
|
||||
std::this_thread::yield(); // Be nice to hyperthreading
|
||||
}
|
||||
void release() { lock.store(1, std::memory_order_release); }
|
||||
};
|
||||
|
||||
|
||||
/// SplitPoint struct stores information shared by the threads searching in
|
||||
/// parallel below the same split point. It is populated at splitting time.
|
||||
|
||||
struct SplitPoint {
|
||||
|
||||
// Const data after split point has been setup
|
||||
const Position* pos;
|
||||
Search::Stack* ss;
|
||||
Thread* master;
|
||||
Depth depth;
|
||||
Value beta;
|
||||
int nodeType;
|
||||
bool cutNode;
|
||||
|
||||
// Const pointers to shared data
|
||||
MovePicker* movePicker;
|
||||
SplitPoint* parentSplitPoint;
|
||||
|
||||
// Shared variable data
|
||||
Spinlock spinlock;
|
||||
std::bitset<MAX_THREADS> slavesMask;
|
||||
volatile bool allSlavesSearching;
|
||||
volatile uint64_t nodes;
|
||||
volatile Value alpha;
|
||||
volatile Value bestValue;
|
||||
volatile Move bestMove;
|
||||
volatile int moveCount;
|
||||
volatile bool cutoff;
|
||||
};
|
||||
|
||||
|
||||
/// ThreadBase struct is the base of the hierarchy from where we derive all the
|
||||
|
@ -94,10 +47,10 @@ struct ThreadBase : public std::thread {
|
|||
virtual ~ThreadBase() = default;
|
||||
virtual void idle_loop() = 0;
|
||||
void notify_one();
|
||||
void wait_for(volatile const bool& b);
|
||||
void wait(volatile const bool& b);
|
||||
void wait_while(volatile const bool& b);
|
||||
|
||||
Mutex mutex;
|
||||
Spinlock spinlock;
|
||||
ConditionVariable sleepCondition;
|
||||
volatile bool exit = false;
|
||||
};
|
||||
|
@ -112,22 +65,21 @@ struct Thread : public ThreadBase {
|
|||
|
||||
Thread();
|
||||
virtual void idle_loop();
|
||||
bool cutoff_occurred() const;
|
||||
bool can_join(const SplitPoint* sp) const;
|
||||
void search(bool isMainThread = false);
|
||||
|
||||
void split(Position& pos, Search::Stack* ss, Value alpha, Value beta, Value* bestValue, Move* bestMove,
|
||||
Depth depth, int moveCount, MovePicker* movePicker, int nodeType, bool cutNode);
|
||||
|
||||
SplitPoint splitPoints[MAX_SPLITPOINTS_PER_THREAD];
|
||||
Pawns::Table pawnsTable;
|
||||
Material::Table materialTable;
|
||||
Endgames endgames;
|
||||
Position* activePosition;
|
||||
size_t idx;
|
||||
size_t idx, PVIdx;
|
||||
int maxPly;
|
||||
SplitPoint* volatile activeSplitPoint;
|
||||
volatile size_t splitPointsSize;
|
||||
volatile bool searching;
|
||||
|
||||
Position rootPos;
|
||||
Search::RootMoveVector rootMoves;
|
||||
Search::Stack stack[MAX_PLY+4];
|
||||
HistoryStats History;
|
||||
MovesStats Countermoves;
|
||||
Depth depth;
|
||||
};
|
||||
|
||||
|
||||
|
@ -137,6 +89,7 @@ struct Thread : public ThreadBase {
|
|||
struct MainThread : public Thread {
|
||||
virtual void idle_loop();
|
||||
void join();
|
||||
void think();
|
||||
volatile bool thinking = true; // Avoid a race with start_thinking()
|
||||
};
|
||||
|
||||
|
@ -161,10 +114,8 @@ struct ThreadPool : public std::vector<Thread*> {
|
|||
|
||||
MainThread* main() { return static_cast<MainThread*>(at(0)); }
|
||||
void read_uci_options();
|
||||
Thread* available_slave(const SplitPoint* sp) const;
|
||||
void start_thinking(const Position&, const Search::LimitsType&, Search::StateStackPtr&);
|
||||
|
||||
Depth minimumSplitDepth;
|
||||
int64_t nodes_searched();
|
||||
TimerThread* timer;
|
||||
};
|
||||
|
||||
|
|
|
@ -80,7 +80,7 @@ namespace {
|
|||
/// inc > 0 && movestogo == 0 means: x basetime + z increment
|
||||
/// inc > 0 && movestogo != 0 means: x moves in y minutes + z increment
|
||||
|
||||
void TimeManagement::init(Search::LimitsType& limits, Color us, int ply, TimePoint now)
|
||||
void TimeManagement::init(Search::LimitsType& limits, Color us, int ply)
|
||||
{
|
||||
int minThinkingTime = Options["Minimum Thinking Time"];
|
||||
int moveOverhead = Options["Move Overhead"];
|
||||
|
@ -102,7 +102,7 @@ void TimeManagement::init(Search::LimitsType& limits, Color us, int ply, TimePoi
|
|||
limits.npmsec = npmsec;
|
||||
}
|
||||
|
||||
start = now;
|
||||
startTime = limits.startTime;
|
||||
unstablePvFactor = 1;
|
||||
optimumTime = maximumTime = std::max(limits.time[us], minThinkingTime);
|
||||
|
||||
|
|
|
@ -22,22 +22,23 @@
|
|||
|
||||
#include "misc.h"
|
||||
#include "search.h"
|
||||
#include "thread.h"
|
||||
|
||||
/// The TimeManagement class computes the optimal time to think depending on
|
||||
/// the maximum available time, the game move number and other parameters.
|
||||
|
||||
class TimeManagement {
|
||||
public:
|
||||
void init(Search::LimitsType& limits, Color us, int ply, TimePoint now);
|
||||
void init(Search::LimitsType& limits, Color us, int ply);
|
||||
void pv_instability(double bestMoveChanges) { unstablePvFactor = 1 + bestMoveChanges; }
|
||||
int available() const { return int(optimumTime * unstablePvFactor * 0.76); }
|
||||
int maximum() const { return maximumTime; }
|
||||
int elapsed() const { return int(Search::Limits.npmsec ? Search::RootPos.nodes_searched() : now() - start); }
|
||||
int elapsed() const { return int(Search::Limits.npmsec ? Threads.nodes_searched() : now() - startTime); }
|
||||
|
||||
int64_t availableNodes; // When in 'nodes as time' mode
|
||||
|
||||
private:
|
||||
TimePoint start;
|
||||
TimePoint startTime;
|
||||
int optimumTime;
|
||||
int maximumTime;
|
||||
double unstablePvFactor;
|
||||
|
|
|
@ -112,6 +112,8 @@ namespace {
|
|||
Search::LimitsType limits;
|
||||
string token;
|
||||
|
||||
limits.startTime = now(); // As early as possible!
|
||||
|
||||
while (is >> token)
|
||||
if (token == "searchmoves")
|
||||
while (is >> token)
|
||||
|
|
Loading…
Add table
Reference in a new issue