1
0
Fork 0
mirror of https://github.com/sockspls/badfish synced 2025-04-29 16:23:09 +00:00
Start all threads searching on root position and
use only the shared TT table as synching scheme.

It seems this scheme scales better than YBWC for
high number of threads.

Verified for nor regression at STC 3 threads
LLR: -2.95 (-2.94,2.94) [-3.00,1.00]
Total: 40232 W: 6908 L: 7130 D: 26194

Verified for nor regression at LTC 3 threads
LLR: 2.95 (-2.94,2.94) [-3.00,1.00]
Total: 28186 W: 3908 L: 3798 D: 20480

Verified for nor regression at STC 7 threads
LLR: 2.95 (-2.94,2.94) [-3.00,1.00]
Total: 3607 W: 674 L: 526 D: 2407

Verified for nor regression at LTC 7 threads
LLR: 2.95 (-2.94,2.94) [-3.00,1.00]
Total: 4235 W: 671 L: 528 D: 3036

Tested with fixed games at LTC with 20 threads
ELO: 44.75 +-7.6 (95%) LOS: 100.0%
Total: 2069 W: 407 L: 142 D: 1520

Tested with fixed games at XLTC (120secs) with 20 threads
ELO: 28.01 +-6.7 (95%) LOS: 100.0%
Total: 2275 W: 349 L: 166 D: 1760

Original patch of mbootsector, with additional work
from Ivan Ivec (log formula), Joerg Oster (id loop
simplification) and Marco Costalba (assorted formatting
and rework).

Bench: 8116244
This commit is contained in:
mbootsector 2015-10-06 08:15:17 +02:00 committed by Marco Costalba
parent 7ea5659c5f
commit ecc5ff6693
10 changed files with 365 additions and 750 deletions

View file

@ -156,9 +156,10 @@ void benchmark(const Position& current, istream& is) {
else
{
Search::StateStackPtr st;
limits.startTime = now();
Threads.start_thinking(pos, limits, st);
Threads.main()->join();
nodes += Search::RootPos.nodes_searched();
nodes += Threads.nodes_searched();
}
}

View file

@ -238,8 +238,8 @@ void MovePicker::generate_next_stage() {
/// a new pseudo legal move every time it is called, until there are no more moves
/// left. It picks the move with the biggest value from a list of generated moves
/// taking care not to return the ttMove if it has already been searched.
template<>
Move MovePicker::next_move<false>() {
Move MovePicker::next_move() {
Move move;
@ -320,10 +320,3 @@ Move MovePicker::next_move<false>() {
}
}
}
/// Version of next_move() to use at split point nodes where the move is grabbed
/// from the split point's shared MovePicker object. This function is not thread
/// safe so must be lock protected by the caller.
template<>
Move MovePicker::next_move<true>() { return ss->splitPoint->movePicker->next_move<false>(); }

View file

@ -92,7 +92,7 @@ public:
MovePicker(const Position&, Move, const HistoryStats&, const CounterMovesHistoryStats&, Value);
MovePicker(const Position&, Move, Depth, const HistoryStats&, const CounterMovesHistoryStats&, Move, Search::Stack*);
template<bool SpNode> Move next_move();
Move next_move();
private:
template<GenType> void score();

File diff suppressed because it is too large Load diff

View file

@ -88,6 +88,7 @@ struct LimitsType {
std::vector<Move> searchmoves;
int time[COLOR_NB], inc[COLOR_NB], npmsec, movestogo, depth, movetime, mate, infinite, ponder;
int64_t nodes;
TimePoint startTime;
};
/// The SignalsType struct stores volatile flags updated during the search
@ -101,12 +102,9 @@ typedef std::unique_ptr<std::stack<StateInfo>> StateStackPtr;
extern volatile SignalsType Signals;
extern LimitsType Limits;
extern RootMoveVector RootMoves;
extern Position RootPos;
extern StateStackPtr SetupStates;
void init();
void think();
void reset();
template<bool Root> uint64_t perft(Position& pos, Depth depth);

View file

@ -66,15 +66,24 @@ void ThreadBase::notify_one() {
}
// ThreadBase::wait_for() set the thread to sleep until 'condition' turns true
// ThreadBase::wait() set the thread to sleep until 'condition' turns true
void ThreadBase::wait_for(volatile const bool& condition) {
void ThreadBase::wait(volatile const bool& condition) {
std::unique_lock<Mutex> lk(mutex);
sleepCondition.wait(lk, [&]{ return condition; });
}
// ThreadBase::wait_while() set the thread to sleep until 'condition' turns false
void ThreadBase::wait_while(volatile const bool& condition) {
std::unique_lock<Mutex> lk(mutex);
sleepCondition.wait(lk, [&]{ return !condition; });
}
// Thread c'tor makes some init but does not launch any execution thread that
// will be started only when c'tor returns.
@ -82,143 +91,10 @@ Thread::Thread() /* : splitPoints() */ { // Initialization of non POD broken in
searching = false;
maxPly = 0;
splitPointsSize = 0;
activeSplitPoint = nullptr;
activePosition = nullptr;
idx = Threads.size(); // Starts from 0
}
// Thread::cutoff_occurred() checks whether a beta cutoff has occurred in the
// current active split point, or in some ancestor of the split point.
bool Thread::cutoff_occurred() const {
for (SplitPoint* sp = activeSplitPoint; sp; sp = sp->parentSplitPoint)
if (sp->cutoff)
return true;
return false;
}
// Thread::can_join() checks whether the thread is available to join the split
// point 'sp'. An obvious requirement is that thread must be idle. With more than
// two threads, this is not sufficient: If the thread is the master of some split
// point, it is only available as a slave for the split points below his active
// one (the "helpful master" concept in YBWC terminology).
bool Thread::can_join(const SplitPoint* sp) const {
if (searching)
return false;
// Make a local copy to be sure it doesn't become zero under our feet while
// testing next condition and so leading to an out of bounds access.
const size_t size = splitPointsSize;
// No split points means that the thread is available as a slave for any
// other thread otherwise apply the "helpful master" concept if possible.
return !size || splitPoints[size - 1].slavesMask.test(sp->master->idx);
}
// Thread::split() does the actual work of distributing the work at a node between
// several available threads. If it does not succeed in splitting the node
// (because no idle threads are available), the function immediately returns.
// If splitting is possible, a SplitPoint object is initialized with all the
// data that must be copied to the helper threads and then helper threads are
// informed that they have been assigned work. This will cause them to instantly
// leave their idle loops and call search(). When all threads have returned from
// search() then split() returns.
void Thread::split(Position& pos, Stack* ss, Value alpha, Value beta, Value* bestValue,
Move* bestMove, Depth depth, int moveCount,
MovePicker* movePicker, int nodeType, bool cutNode) {
assert(searching);
assert(-VALUE_INFINITE < *bestValue && *bestValue <= alpha && alpha < beta && beta <= VALUE_INFINITE);
assert(depth >= Threads.minimumSplitDepth);
assert(splitPointsSize < MAX_SPLITPOINTS_PER_THREAD);
// Pick and init the next available split point
SplitPoint& sp = splitPoints[splitPointsSize];
sp.spinlock.acquire(); // No contention here until we don't increment splitPointsSize
sp.master = this;
sp.parentSplitPoint = activeSplitPoint;
sp.slavesMask = 0, sp.slavesMask.set(idx);
sp.depth = depth;
sp.bestValue = *bestValue;
sp.bestMove = *bestMove;
sp.alpha = alpha;
sp.beta = beta;
sp.nodeType = nodeType;
sp.cutNode = cutNode;
sp.movePicker = movePicker;
sp.moveCount = moveCount;
sp.pos = &pos;
sp.nodes = 0;
sp.cutoff = false;
sp.ss = ss;
sp.allSlavesSearching = true; // Must be set under lock protection
++splitPointsSize;
activeSplitPoint = &sp;
activePosition = nullptr;
// Try to allocate available threads
Thread* slave;
while ( sp.slavesMask.count() < MAX_SLAVES_PER_SPLITPOINT
&& (slave = Threads.available_slave(&sp)) != nullptr)
{
slave->spinlock.acquire();
if (slave->can_join(activeSplitPoint))
{
activeSplitPoint->slavesMask.set(slave->idx);
slave->activeSplitPoint = activeSplitPoint;
slave->searching = true;
}
slave->spinlock.release();
}
// Everything is set up. The master thread enters the idle loop, from which
// it will instantly launch a search, because its 'searching' flag is set.
// The thread will return from the idle loop when all slaves have finished
// their work at this split point.
sp.spinlock.release();
Thread::idle_loop(); // Force a call to base class idle_loop()
// In the helpful master concept, a master can help only a sub-tree of its
// split point and because everything is finished here, it's not possible
// for the master to be booked.
assert(!searching);
assert(!activePosition);
// We have returned from the idle loop, which means that all threads are
// finished. Note that decreasing splitPointsSize must be done under lock
// protection to avoid a race with Thread::can_join().
spinlock.acquire();
searching = true;
--splitPointsSize;
activeSplitPoint = sp.parentSplitPoint;
activePosition = &pos;
spinlock.release();
// Split point data cannot be changed now, so no need to lock protect
pos.set_nodes_searched(pos.nodes_searched() + sp.nodes);
*bestMove = sp.bestMove;
*bestValue = sp.bestValue;
}
// TimerThread::idle_loop() is where the timer thread waits Resolution milliseconds
// and then calls check_time(). When not searching, thread sleeps until it's woken up.
@ -233,12 +109,31 @@ void TimerThread::idle_loop() {
lk.unlock();
if (run)
if (!exit && run)
check_time();
}
}
// Thread::idle_loop() is where the thread is parked when it has no work to do
void Thread::idle_loop() {
while (!exit)
{
std::unique_lock<Mutex> lk(mutex);
while (!searching && !exit)
sleepCondition.wait(lk);
lk.unlock();
if (!exit && searching)
search();
}
}
// MainThread::idle_loop() is where the main thread is parked waiting to be started
// when there is a new search. The main thread will launch all the slave threads.
@ -259,20 +154,12 @@ void MainThread::idle_loop() {
lk.unlock();
if (!exit)
{
searching = true;
Search::think();
assert(searching);
searching = false;
}
think();
}
}
// MainThread::join() waits for main thread to finish the search
// MainThread::join() waits for main thread to finish thinking
void MainThread::join() {
@ -317,7 +204,6 @@ void ThreadPool::exit() {
void ThreadPool::read_uci_options() {
minimumSplitDepth = Options["Min Split Depth"] * ONE_PLY;
size_t requested = Options["Threads"];
assert(requested > 0);
@ -333,16 +219,14 @@ void ThreadPool::read_uci_options() {
}
// ThreadPool::available_slave() tries to find an idle thread which is available
// to join SplitPoint 'sp'.
// ThreadPool::nodes_searched() returns the number of nodes searched
Thread* ThreadPool::available_slave(const SplitPoint* sp) const {
int64_t ThreadPool::nodes_searched() {
for (Thread* th : *this)
if (th->can_join(sp))
return th;
return nullptr;
int64_t nodes = 0;
for (Thread *th : *this)
nodes += th->rootPos.nodes_searched();
return nodes;
}
@ -356,8 +240,8 @@ void ThreadPool::start_thinking(const Position& pos, const LimitsType& limits,
Signals.stopOnPonderhit = Signals.firstRootMove = false;
Signals.stop = Signals.failedLowAtRoot = false;
RootMoves.clear();
RootPos = pos;
main()->rootMoves.clear();
main()->rootPos = pos;
Limits = limits;
if (states.get()) // If we don't set a new position, preserve current state
{
@ -368,7 +252,7 @@ void ThreadPool::start_thinking(const Position& pos, const LimitsType& limits,
for (const auto& m : MoveList<LEGAL>(pos))
if ( limits.searchmoves.empty()
|| std::count(limits.searchmoves.begin(), limits.searchmoves.end(), m))
RootMoves.push_back(RootMove(m));
main()->rootMoves.push_back(RootMove(m));
main()->thinking = true;
main()->notify_one(); // Wake up main thread: 'thinking' must be already set

View file

@ -37,53 +37,6 @@
struct Thread;
const size_t MAX_THREADS = 128;
const size_t MAX_SPLITPOINTS_PER_THREAD = 8;
const size_t MAX_SLAVES_PER_SPLITPOINT = 4;
class Spinlock {
std::atomic_int lock;
public:
Spinlock() { lock = 1; } // Init here to workaround a bug with MSVC 2013
void acquire() {
while (lock.fetch_sub(1, std::memory_order_acquire) != 1)
while (lock.load(std::memory_order_relaxed) <= 0)
std::this_thread::yield(); // Be nice to hyperthreading
}
void release() { lock.store(1, std::memory_order_release); }
};
/// SplitPoint struct stores information shared by the threads searching in
/// parallel below the same split point. It is populated at splitting time.
struct SplitPoint {
// Const data after split point has been setup
const Position* pos;
Search::Stack* ss;
Thread* master;
Depth depth;
Value beta;
int nodeType;
bool cutNode;
// Const pointers to shared data
MovePicker* movePicker;
SplitPoint* parentSplitPoint;
// Shared variable data
Spinlock spinlock;
std::bitset<MAX_THREADS> slavesMask;
volatile bool allSlavesSearching;
volatile uint64_t nodes;
volatile Value alpha;
volatile Value bestValue;
volatile Move bestMove;
volatile int moveCount;
volatile bool cutoff;
};
/// ThreadBase struct is the base of the hierarchy from where we derive all the
@ -94,10 +47,10 @@ struct ThreadBase : public std::thread {
virtual ~ThreadBase() = default;
virtual void idle_loop() = 0;
void notify_one();
void wait_for(volatile const bool& b);
void wait(volatile const bool& b);
void wait_while(volatile const bool& b);
Mutex mutex;
Spinlock spinlock;
ConditionVariable sleepCondition;
volatile bool exit = false;
};
@ -112,22 +65,21 @@ struct Thread : public ThreadBase {
Thread();
virtual void idle_loop();
bool cutoff_occurred() const;
bool can_join(const SplitPoint* sp) const;
void search(bool isMainThread = false);
void split(Position& pos, Search::Stack* ss, Value alpha, Value beta, Value* bestValue, Move* bestMove,
Depth depth, int moveCount, MovePicker* movePicker, int nodeType, bool cutNode);
SplitPoint splitPoints[MAX_SPLITPOINTS_PER_THREAD];
Pawns::Table pawnsTable;
Material::Table materialTable;
Endgames endgames;
Position* activePosition;
size_t idx;
size_t idx, PVIdx;
int maxPly;
SplitPoint* volatile activeSplitPoint;
volatile size_t splitPointsSize;
volatile bool searching;
Position rootPos;
Search::RootMoveVector rootMoves;
Search::Stack stack[MAX_PLY+4];
HistoryStats History;
MovesStats Countermoves;
Depth depth;
};
@ -137,6 +89,7 @@ struct Thread : public ThreadBase {
struct MainThread : public Thread {
virtual void idle_loop();
void join();
void think();
volatile bool thinking = true; // Avoid a race with start_thinking()
};
@ -161,10 +114,8 @@ struct ThreadPool : public std::vector<Thread*> {
MainThread* main() { return static_cast<MainThread*>(at(0)); }
void read_uci_options();
Thread* available_slave(const SplitPoint* sp) const;
void start_thinking(const Position&, const Search::LimitsType&, Search::StateStackPtr&);
Depth minimumSplitDepth;
int64_t nodes_searched();
TimerThread* timer;
};

View file

@ -80,7 +80,7 @@ namespace {
/// inc > 0 && movestogo == 0 means: x basetime + z increment
/// inc > 0 && movestogo != 0 means: x moves in y minutes + z increment
void TimeManagement::init(Search::LimitsType& limits, Color us, int ply, TimePoint now)
void TimeManagement::init(Search::LimitsType& limits, Color us, int ply)
{
int minThinkingTime = Options["Minimum Thinking Time"];
int moveOverhead = Options["Move Overhead"];
@ -102,7 +102,7 @@ void TimeManagement::init(Search::LimitsType& limits, Color us, int ply, TimePoi
limits.npmsec = npmsec;
}
start = now;
startTime = limits.startTime;
unstablePvFactor = 1;
optimumTime = maximumTime = std::max(limits.time[us], minThinkingTime);

View file

@ -22,22 +22,23 @@
#include "misc.h"
#include "search.h"
#include "thread.h"
/// The TimeManagement class computes the optimal time to think depending on
/// the maximum available time, the game move number and other parameters.
class TimeManagement {
public:
void init(Search::LimitsType& limits, Color us, int ply, TimePoint now);
void init(Search::LimitsType& limits, Color us, int ply);
void pv_instability(double bestMoveChanges) { unstablePvFactor = 1 + bestMoveChanges; }
int available() const { return int(optimumTime * unstablePvFactor * 0.76); }
int maximum() const { return maximumTime; }
int elapsed() const { return int(Search::Limits.npmsec ? Search::RootPos.nodes_searched() : now() - start); }
int elapsed() const { return int(Search::Limits.npmsec ? Threads.nodes_searched() : now() - startTime); }
int64_t availableNodes; // When in 'nodes as time' mode
private:
TimePoint start;
TimePoint startTime;
int optimumTime;
int maximumTime;
double unstablePvFactor;

View file

@ -112,6 +112,8 @@ namespace {
Search::LimitsType limits;
string token;
limits.startTime = now(); // As early as possible!
while (is >> token)
if (token == "searchmoves")
while (is >> token)