1
0
Fork 0
mirror of https://github.com/sockspls/badfish synced 2025-07-11 19:49:14 +00:00

Implement proper stop signalling from root node

Previous behavior was to wait on all nodes to finish their search on their own TM and aggregate to root node via a blocking MPI_Allreduce call. This seems to be problematic.

In this commit a proper non-blocking signalling barrier was implemented to use TM from root node to control the cluster search, and disable TM on all non-root nodes.

Also includes some cosmetic fix to the nodes/NPS display.
This commit is contained in:
noobpwnftw 2018-07-11 09:09:48 +08:00 committed by Stéphane Nicolet
parent 3b7b632aa5
commit 8a95d269eb
6 changed files with 48 additions and 6 deletions

View file

@ -37,10 +37,13 @@ namespace Cluster {
static int world_rank = MPI_PROC_NULL;
static int world_size = 0;
static bool stop_signal = false;
static MPI_Request reqStop = MPI_REQUEST_NULL;
static MPI_Comm InputComm = MPI_COMM_NULL;
static MPI_Comm TTComm = MPI_COMM_NULL;
static MPI_Comm MoveComm = MPI_COMM_NULL;
static MPI_Comm StopComm = MPI_COMM_NULL;
static MPI_Datatype TTEntryDatatype = MPI_DATATYPE_NULL;
static std::vector<TTEntry> TTBuff;
@ -104,6 +107,7 @@ void init() {
MPI_Comm_dup(MPI_COMM_WORLD, &InputComm);
MPI_Comm_dup(MPI_COMM_WORLD, &TTComm);
MPI_Comm_dup(MPI_COMM_WORLD, &MoveComm);
MPI_Comm_dup(MPI_COMM_WORLD, &StopComm);
}
void finalize() {
@ -131,6 +135,32 @@ bool getline(std::istream& input, std::string& str) {
return state;
}
void sync_start() {
stop_signal = false;
// Start listening to stop signal
if (!is_root())
MPI_Ibarrier(StopComm, &reqStop);
}
void sync_stop() {
if (is_root()) {
if (!stop_signal && Threads.stop) {
// Signal the cluster about stopping
stop_signal = true;
MPI_Ibarrier(StopComm, &reqStop);
MPI_Wait(&reqStop, MPI_STATUS_IGNORE);
}
}
else {
int flagStop;
// Check if we've received any stop signal
MPI_Test(&reqStop, &flagStop, MPI_STATUS_IGNORE);
if (flagStop)
Threads.stop = true;
}
}
int size() {
return world_size;
}

View file

@ -69,6 +69,8 @@ inline bool is_root() { return rank() == 0; }
void save(Thread* thread, TTEntry* tte,
Key k, Value v, Bound b, Depth d, Move m, Value ev);
void reduce_moves(MoveInfo& mi);
void sync_start();
void sync_stop();
#else
@ -86,6 +88,8 @@ inline void save(Thread* thread, TTEntry* tte,
tte->save(k, v, b, d, m, ev);
}
inline void reduce_moves(MoveInfo&) { }
inline void sync_start() { }
inline void sync_stop() { }
#endif /* USE_MPI */

View file

@ -27,7 +27,6 @@
#include "tt.h"
#include "uci.h"
#include "syzygy/tbprobe.h"
#include "cluster.h"
namespace PSQT {
void init();

View file

@ -234,12 +234,15 @@ void MainThread::search() {
Threads.stopOnPonderhit = true;
while (!Threads.stop && (Threads.ponder || Limits.infinite))
{} // Busy wait for a stop or a ponder reset
{ } // Busy wait for a stop or a ponder reset
// Stop the threads if not already stopped (also raise the stop if
// "ponderhit" just reset Threads.ponder).
Threads.stop = true;
// Finish any outstanding barriers.
Cluster::sync_stop();
// Wait until all threads have finished
for (Thread* th : Threads)
if (th != this)
@ -292,8 +295,8 @@ void MainThread::search() {
previousScore = static_cast<Value>(mi.score);
// Send again PV info if we have a new best thread
if (Cluster::is_root()) {
// Send again PV info if we have a new best thread
if (bestThread != this)
sync_cout << UCI::pv(bestThread->rootPos, bestThread->completedDepth, -VALUE_INFINITE, VALUE_INFINITE) << sync_endl;
@ -1608,6 +1611,9 @@ void MainThread::check_time() {
if (Threads.ponder)
return;
// Check if root has reached a stop barrier
Cluster::sync_stop();
if ( (Limits.use_time_management() && elapsed > Time.maximum() - 10)
|| (Limits.movetime && elapsed >= Limits.movetime)
|| (Limits.nodes && Threads.nodes_searched() >= (uint64_t)Limits.nodes))
@ -1653,8 +1659,8 @@ string UCI::pv(const Position& pos, Depth depth, Value alpha, Value beta) {
if (!tb && i == pvIdx)
ss << (v >= beta ? " lowerbound" : v <= alpha ? " upperbound" : "");
ss << " nodes " << nodesSearched
<< " nps " << nodesSearched * 1000 / elapsed;
ss << " nodes " << nodesSearched * Cluster::size()
<< " nps " << nodesSearched * Cluster::size() * 1000 / elapsed;
if (elapsed > 1000) // Earlier makes little sense
ss << " hashfull " << TT.hashfull();

View file

@ -26,6 +26,7 @@
#include "misc.h"
#include "movepick.h"
#include "types.h"
#include "cluster.h"
class Position;
@ -89,7 +90,7 @@ struct LimitsType {
}
bool use_time_management() const {
return !(mate | movetime | depth | nodes | perft | infinite);
return Cluster::is_root() && !(mate | movetime | depth | nodes | perft | infinite);
}
std::vector<Move> searchmoves;

View file

@ -163,6 +163,8 @@ void ThreadPool::start_thinking(Position& pos, StateListPtr& states,
main()->wait_for_search_finished();
stopOnPonderhit = stop = false;
Cluster::sync_start();
ponder = ponderMode;
Search::Limits = limits;
Search::RootMoves rootMoves;