From 9ce0ef3ac093bc139e55ced2d8121f1c29033f7b Mon Sep 17 00:00:00 2001 From: tttak Date: Fri, 3 Jul 2020 23:01:37 +0900 Subject: [PATCH 1/2] merge "Provide WDL statistics" https://github.com/official-stockfish/Stockfish/commit/110068808b51344ac59f8c6a0846f5dfdf670392 https://github.com/official-stockfish/Stockfish/pull/2778 https://github.com/official-stockfish/Stockfish/pull/2788 --- Readme.md | 5 +++++ src/search.cpp | 3 +++ src/uci.cpp | 39 +++++++++++++++++++++++++++++++++++++++ src/uci.h | 1 + src/ucioption.cpp | 1 + 5 files changed, 49 insertions(+) diff --git a/Readme.md b/Readme.md index 2b1de86b..e60ac718 100644 --- a/Readme.md +++ b/Readme.md @@ -66,6 +66,11 @@ Currently, Stockfish has the following UCI options: If enabled by UCI_LimitStrength, aim for an engine strength of the given Elo. This Elo rating has been calibrated at a time control of 60s+0.6s and anchored to CCRL 40/4. + * #### UCI_ShowWDL + If enabled, show approximate WDL statistics as part of the engine output. + These WDL numbers model expected game outcomes for a given evaluation and + game ply for engine self-play at fishtest LTC conditions (60+0.6s per game). + * #### Move Overhead Assume a time delay of x ms due to network and GUI overheads. This is useful to avoid losses on time in those cases. diff --git a/src/search.cpp b/src/search.cpp index 68f97fca..5990905f 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1841,6 +1841,9 @@ string UCI::pv(const Position& pos, Depth depth, Value alpha, Value beta) { << " multipv " << i + 1 << " score " << UCI::value(v); + if (Options["UCI_ShowWDL"]) + ss << UCI::wdl(v, pos.game_ply()); + if (!tb && i == pvIdx) ss << (v >= beta ? " lowerbound" : v <= alpha ? " upperbound" : ""); diff --git a/src/uci.cpp b/src/uci.cpp index 13888d1a..a95a629d 100644 --- a/src/uci.cpp +++ b/src/uci.cpp @@ -19,6 +19,7 @@ */ #include +#include #include #include #include @@ -221,6 +222,28 @@ namespace { << "\nNodes/second : " << 1000 * nodes / elapsed << endl; } + // The win rate model returns the probability (per mille) of winning given an eval + // and a game-ply. The model fits rather accurately the LTC fishtest statistics. + int win_rate_model(Value v, int ply) { + + // The model captures only up to 240 plies, so limit input (and rescale) + double m = std::min(240, ply) / 64.0; + + // Coefficients of a 3rd order polynomial fit based on fishtest data + // for two parameters needed to transform eval to the argument of a + // logistic function. + double as[] = {-8.24404295, 64.23892342, -95.73056462, 153.86478679}; + double bs[] = {-3.37154371, 28.44489198, -56.67657741, 72.05858751}; + double a = (((as[0] * m + as[1]) * m + as[2]) * m) + as[3]; + double b = (((bs[0] * m + bs[1]) * m + bs[2]) * m) + bs[3]; + + // Transform eval to centipawns with limited range + double x = Utility::clamp(double(100 * v) / PawnValueEg, -1000.0, 1000.0); + + // Return win rate in per mille (rounded to nearest) + return int(0.5 + 1000 / (1 + std::exp((a - x) / b))); + } + // When you calculate check sum, save it and check the consistency later. uint64_t eval_sum; } // namespace @@ -437,6 +460,22 @@ string UCI::value(Value v) { } +/// UCI::wdl() report WDL statistics given an evaluation and a game ply, based on +/// data gathered for fishtest LTC games. + +string UCI::wdl(Value v, int ply) { + + stringstream ss; + + int wdl_w = win_rate_model( v, ply); + int wdl_l = win_rate_model(-v, ply); + int wdl_d = 1000 - wdl_w - wdl_l; + ss << " wdl " << wdl_w << " " << wdl_d << " " << wdl_l; + + return ss.str(); +} + + /// UCI::square() converts a Square to a string in algebraic notation (g1, a7, etc.) std::string UCI::square(Square s) { diff --git a/src/uci.h b/src/uci.h index d255db76..5073262e 100644 --- a/src/uci.h +++ b/src/uci.h @@ -73,6 +73,7 @@ std::string value(Value v); std::string square(Square s); std::string move(Move m, bool chess960); std::string pv(const Position& pos, Depth depth, Value alpha, Value beta); +std::string wdl(Value v, int ply); Move to_move(const Position& pos, std::string& str); // Flag that read the evaluation function. This is set to false when evaldir is changed. diff --git a/src/ucioption.cpp b/src/ucioption.cpp index c24884ce..d63caa9f 100644 --- a/src/ucioption.cpp +++ b/src/ucioption.cpp @@ -75,6 +75,7 @@ void init(OptionsMap& o) { o["UCI_AnalyseMode"] << Option(false); o["UCI_LimitStrength"] << Option(false); o["UCI_Elo"] << Option(1350, 1350, 2850); + o["UCI_ShowWDL"] << Option(false); o["SyzygyPath"] << Option("", on_tb_path); o["SyzygyProbeDepth"] << Option(1, 1, 100); o["Syzygy50MoveRule"] << Option(true); From c964e902c594ec69ab5d82a380ca292ba28d551c Mon Sep 17 00:00:00 2001 From: tttak Date: Fri, 3 Jul 2020 23:21:49 +0900 Subject: [PATCH 2/2] use winning_percentage_wdl in learn --- src/learn/learner.cpp | 40 ++++++++++++++++++++++++++++++++-------- src/uci.cpp | 10 +++++++--- src/uci.h | 1 + 3 files changed, 40 insertions(+), 11 deletions(-) diff --git a/src/learn/learner.cpp b/src/learn/learner.cpp index 1d724266..93b54ab2 100644 --- a/src/learn/learner.cpp +++ b/src/learn/learner.cpp @@ -116,6 +116,7 @@ bool use_draw_in_training_data_generation = false; bool use_draw_in_training = false; bool use_draw_in_validation = false; bool use_hash_in_training = true; +bool use_wdl = false; // ----------------------------------- // write phase file @@ -1025,6 +1026,16 @@ double sigmoid(double x) return 1.0 / (1.0 + std::exp(-x)); } +// A function that converts the evaluation value to the winning rate [0,1] +double winning_percentage_wdl(Value value, int ply) +{ + double wdl_w = UCI::win_rate_model( value, ply); + double wdl_l = UCI::win_rate_model(-value, ply); + double wdl_d = 1000.0 - wdl_w - wdl_l; + + return (wdl_w + wdl_d / 2.0) / 1000.0; +} + // A function that converts the evaluation value to the winning rate [0,1] double winning_percentage(double value) { @@ -1033,6 +1044,18 @@ double winning_percentage(double value) // = sigmoid(Eval/4*ln(10)) return sigmoid(value / PawnValueEg / 4.0 * log(10.0)); } + +// A function that converts the evaluation value to the winning rate [0,1] +double winning_percentage(Value value, int ply) +{ + if (use_wdl) { + return winning_percentage_wdl(value, ply); + } + else { + return winning_percentage(value); + } +} + double dsigmoid(double x) { // Sigmoid function @@ -1069,8 +1092,8 @@ double calc_grad(Value deep, Value shallow, PackedSfenValue& psv) // Also, the coefficient of 1/m is unnecessary if you use the update formula that has the automatic gradient adjustment function like Adam and AdaGrad. // Therefore, it is not necessary to save it in memory. - double p = winning_percentage(deep); - double q = winning_percentage(shallow); + double p = winning_percentage(deep, psv.gamePly); + double q = winning_percentage(shallow, psv.gamePly); return (q - p) * dsigmoid(double(shallow) / 600.0); } #endif @@ -1095,8 +1118,8 @@ double calc_grad(Value deep, Value shallow, const PackedSfenValue& psv) // = ... // = q-p. - double p = winning_percentage(deep); - double q = winning_percentage(shallow); + double p = winning_percentage(deep, psv.gamePly); + double q = winning_percentage(shallow, psv.gamePly); return q - p; } @@ -1127,8 +1150,8 @@ double calc_grad(Value deep, Value shallow , const PackedSfenValue& psv) // elmo (WCSC27) method // Correct with the actual game wins and losses. - const double q = winning_percentage(shallow); - const double p = winning_percentage(deep); + const double q = winning_percentage(shallow, psv.gamePly); + const double p = winning_percentage(deep, psv.gamePly); // Use 1 as the correction term if the expected win rate is 1, 0 if you lose, and 0.5 if you draw. // game_result = 1,0,-1 so add 1 and divide by 2. @@ -1150,8 +1173,8 @@ void calc_cross_entropy(Value deep, Value shallow, const PackedSfenValue& psv, double& cross_entropy_eval, double& cross_entropy_win, double& cross_entropy, double& entropy_eval, double& entropy_win, double& entropy) { - const double p /* teacher_winrate */ = winning_percentage(deep); - const double q /* eval_winrate */ = winning_percentage(shallow); + const double p /* teacher_winrate */ = winning_percentage(deep, psv.gamePly); + const double q /* eval_winrate */ = winning_percentage(shallow, psv.gamePly); const double t = double(psv.game_result + 1) / 2; constexpr double epsilon = 0.000001; @@ -2920,6 +2943,7 @@ void learn(Position&, istringstream& is) else if (option == "use_draw_in_training") is >> use_draw_in_training; else if (option == "use_draw_in_validation") is >> use_draw_in_validation; else if (option == "use_hash_in_training") is >> use_hash_in_training; + else if (option == "use_wdl") is >> use_wdl; // Discount rate else if (option == "discount_rate") is >> discount_rate; diff --git a/src/uci.cpp b/src/uci.cpp index a95a629d..8dd485b0 100644 --- a/src/uci.cpp +++ b/src/uci.cpp @@ -222,6 +222,12 @@ namespace { << "\nNodes/second : " << 1000 * nodes / elapsed << endl; } +// When you calculate check sum, save it and check the consistency later. + uint64_t eval_sum; +} // namespace + + +namespace UCI{ // The win rate model returns the probability (per mille) of winning given an eval // and a game-ply. The model fits rather accurately the LTC fishtest statistics. int win_rate_model(Value v, int ply) { @@ -243,10 +249,8 @@ namespace { // Return win rate in per mille (rounded to nearest) return int(0.5 + 1000 / (1 + std::exp((a - x) / b))); } +} // namespace UCI -// When you calculate check sum, save it and check the consistency later. - uint64_t eval_sum; -} // namespace // Make is_ready_cmd() callable from outside. (Because I want to call it from the bench command etc.) // Note that the phase is not initialized. diff --git a/src/uci.h b/src/uci.h index 5073262e..8e12c856 100644 --- a/src/uci.h +++ b/src/uci.h @@ -74,6 +74,7 @@ std::string square(Square s); std::string move(Move m, bool chess960); std::string pv(const Position& pos, Depth depth, Value alpha, Value beta); std::string wdl(Value v, int ply); +int win_rate_model(Value v, int ply); Move to_move(const Position& pos, std::string& str); // Flag that read the evaluation function. This is set to false when evaldir is changed.