From 9ce0ef3ac093bc139e55ced2d8121f1c29033f7b Mon Sep 17 00:00:00 2001
From: tttak <tttak@users.noreply.github.com>
Date: Fri, 3 Jul 2020 23:01:37 +0900
Subject: [PATCH 1/2] merge "Provide WDL statistics"

https://github.com/official-stockfish/Stockfish/commit/110068808b51344ac59f8c6a0846f5dfdf670392
https://github.com/official-stockfish/Stockfish/pull/2778
https://github.com/official-stockfish/Stockfish/pull/2788
---
 Readme.md         |  5 +++++
 src/search.cpp    |  3 +++
 src/uci.cpp       | 39 +++++++++++++++++++++++++++++++++++++++
 src/uci.h         |  1 +
 src/ucioption.cpp |  1 +
 5 files changed, 49 insertions(+)
diff --git a/Readme.md b/Readme.md
index 2b1de86b..e60ac718 100644
--- a/Readme.md
+++ b/Readme.md
@@ -66,6 +66,11 @@ Currently, Stockfish has the following UCI options:
     If enabled by UCI_LimitStrength, aim for an engine strength of the given Elo.
     This Elo rating has been calibrated at a time control of 60s+0.6s and anchored to CCRL 40/4.
 
+  * #### UCI_ShowWDL
+    If enabled, show approximate WDL statistics as part of the engine output.
+    These WDL numbers model expected game outcomes for a given evaluation and
+    game ply for engine self-play at fishtest LTC conditions (60+0.6s per game).
+
   * #### Move Overhead
     Assume a time delay of x ms due to network and GUI overheads. This is useful to
     avoid losses on time in those cases.
diff --git a/src/search.cpp b/src/search.cpp
index 68f97fca..5990905f 100644
--- a/src/search.cpp
+++ b/src/search.cpp
@@ -1841,6 +1841,9 @@ string UCI::pv(const Position& pos, Depth depth, Value alpha, Value beta) {
          << " multipv "  << i + 1
          << " score "    << UCI::value(v);
 
+      if (Options["UCI_ShowWDL"])
+          ss << UCI::wdl(v, pos.game_ply());
+
       if (!tb && i == pvIdx)
           ss << (v >= beta ? " lowerbound" : v <= alpha ? " upperbound" : "");
 
diff --git a/src/uci.cpp b/src/uci.cpp
index 13888d1a..a95a629d 100644
--- a/src/uci.cpp
+++ b/src/uci.cpp
@@ -19,6 +19,7 @@
 */
 
 #include <cassert>
+#include <cmath>
 #include <iostream>
 #include <sstream>
 #include <string>
@@ -221,6 +222,28 @@ namespace {
          << "\nNodes/second    : " << 1000 * nodes / elapsed << endl;
   }
 
+  // The win rate model returns the probability (per mille) of winning given an eval
+  // and a game-ply. The model fits rather accurately the LTC fishtest statistics.
+  int win_rate_model(Value v, int ply) {
+
+     // The model captures only up to 240 plies, so limit input (and rescale)
+     double m = std::min(240, ply) / 64.0;
+
+     // Coefficients of a 3rd order polynomial fit based on fishtest data
+     // for two parameters needed to transform eval to the argument of a
+     // logistic function.
+     double as[] = {-8.24404295, 64.23892342, -95.73056462, 153.86478679};
+     double bs[] = {-3.37154371, 28.44489198, -56.67657741,  72.05858751};
+     double a = (((as[0] * m + as[1]) * m + as[2]) * m) + as[3];
+     double b = (((bs[0] * m + bs[1]) * m + bs[2]) * m) + bs[3];
+
+     // Transform eval to centipawns with limited range
+     double x = Utility::clamp(double(100 * v) / PawnValueEg, -1000.0, 1000.0);
+
+     // Return win rate in per mille (rounded to nearest)
+     return int(0.5 + 1000 / (1 + std::exp((a - x) / b)));
+  }
+
 // When you calculate check sum, save it and check the consistency later.
   uint64_t eval_sum;
 } // namespace
@@ -437,6 +460,22 @@ string UCI::value(Value v) {
 }
 
 
+/// UCI::wdl() report WDL statistics given an evaluation and a game ply, based on
+/// data gathered for fishtest LTC games.
+
+string UCI::wdl(Value v, int ply) {
+
+  stringstream ss;
+
+  int wdl_w = win_rate_model( v, ply);
+  int wdl_l = win_rate_model(-v, ply);
+  int wdl_d = 1000 - wdl_w - wdl_l;
+  ss << " wdl " << wdl_w << " " << wdl_d << " " << wdl_l;
+
+  return ss.str();
+}
+
+
 /// UCI::square() converts a Square to a string in algebraic notation (g1, a7, etc.)
 
 std::string UCI::square(Square s) {
diff --git a/src/uci.h b/src/uci.h
index d255db76..5073262e 100644
--- a/src/uci.h
+++ b/src/uci.h
@@ -73,6 +73,7 @@ std::string value(Value v);
 std::string square(Square s);
 std::string move(Move m, bool chess960);
 std::string pv(const Position& pos, Depth depth, Value alpha, Value beta);
+std::string wdl(Value v, int ply);
 Move to_move(const Position& pos, std::string& str);
 
 // Flag that read the evaluation function. This is set to false when evaldir is changed.
diff --git a/src/ucioption.cpp b/src/ucioption.cpp
index c24884ce..d63caa9f 100644
--- a/src/ucioption.cpp
+++ b/src/ucioption.cpp
@@ -75,6 +75,7 @@ void init(OptionsMap& o) {
   o["UCI_AnalyseMode"]       << Option(false);
   o["UCI_LimitStrength"]     << Option(false);
   o["UCI_Elo"]               << Option(1350, 1350, 2850);
+  o["UCI_ShowWDL"]           << Option(false);
   o["SyzygyPath"]            << Option("<empty>", on_tb_path);
   o["SyzygyProbeDepth"]      << Option(1, 1, 100);
   o["Syzygy50MoveRule"]      << Option(true);

From c964e902c594ec69ab5d82a380ca292ba28d551c Mon Sep 17 00:00:00 2001
From: tttak <tttak@users.noreply.github.com>
Date: Fri, 3 Jul 2020 23:21:49 +0900
Subject: [PATCH 2/2] use winning_percentage_wdl in learn

---
 src/learn/learner.cpp | 40 ++++++++++++++++++++++++++++++++--------
 src/uci.cpp           | 10 +++++++---
 src/uci.h             |  1 +
 3 files changed, 40 insertions(+), 11 deletions(-)

diff --git a/src/learn/learner.cpp b/src/learn/learner.cpp
index 1d724266..93b54ab2 100644
--- a/src/learn/learner.cpp
+++ b/src/learn/learner.cpp
@@ -116,6 +116,7 @@ bool use_draw_in_training_data_generation = false;
 bool use_draw_in_training = false;
 bool use_draw_in_validation = false;
 bool use_hash_in_training = true;
+bool use_wdl = false;
 
 // -----------------------------------
 // write phase file
@@ -1025,6 +1026,16 @@ double sigmoid(double x)
 	return 1.0 / (1.0 + std::exp(-x));
 }
 
+// A function that converts the evaluation value to the winning rate [0,1]
+double winning_percentage_wdl(Value value, int ply)
+{
+	double wdl_w = UCI::win_rate_model( value, ply);
+	double wdl_l = UCI::win_rate_model(-value, ply);
+	double wdl_d = 1000.0 - wdl_w - wdl_l;
+
+	return (wdl_w + wdl_d / 2.0) / 1000.0;
+}
+
 // A function that converts the evaluation value to the winning rate [0,1]
 double winning_percentage(double value)
 {
@@ -1033,6 +1044,18 @@ double winning_percentage(double value)
 	// = sigmoid(Eval/4*ln(10))
 	return sigmoid(value / PawnValueEg / 4.0 * log(10.0));
 }
+
+// A function that converts the evaluation value to the winning rate [0,1]
+double winning_percentage(Value value, int ply)
+{
+	if (use_wdl) {
+		return winning_percentage_wdl(value, ply);
+	}
+	else {
+		return winning_percentage(value);
+	}
+}
+
 double dsigmoid(double x)
 {
 	// Sigmoid function
@@ -1069,8 +1092,8 @@ double calc_grad(Value deep, Value shallow, PackedSfenValue& psv)
 	// Also, the coefficient of 1/m is unnecessary if you use the update formula that has the automatic gradient adjustment function like Adam and AdaGrad.
 	// Therefore, it is not necessary to save it in memory.
 
-	double p = winning_percentage(deep);
-	double q = winning_percentage(shallow);
+	double p = winning_percentage(deep, psv.gamePly);
+	double q = winning_percentage(shallow, psv.gamePly);
 	return (q - p) * dsigmoid(double(shallow) / 600.0);
 }
 #endif
@@ -1095,8 +1118,8 @@ double calc_grad(Value deep, Value shallow, const PackedSfenValue& psv)
 	// = ...
 	// = q-p.
 
-	double p = winning_percentage(deep);
-	double q = winning_percentage(shallow);
+	double p = winning_percentage(deep, psv.gamePly);
+	double q = winning_percentage(shallow, psv.gamePly);
 
 	return q - p;
 }
@@ -1127,8 +1150,8 @@ double calc_grad(Value deep, Value shallow , const PackedSfenValue& psv)
 	// elmo (WCSC27) method
 	// Correct with the actual game wins and losses.
 
-	const double q = winning_percentage(shallow);
-	const double p = winning_percentage(deep);
+	const double q = winning_percentage(shallow, psv.gamePly);
+	const double p = winning_percentage(deep, psv.gamePly);
 
 	// Use 1 as the correction term if the expected win rate is 1, 0 if you lose, and 0.5 if you draw.
 	// game_result = 1,0,-1 so add 1 and divide by 2.
@@ -1150,8 +1173,8 @@ void calc_cross_entropy(Value deep, Value shallow, const PackedSfenValue& psv,
 	double& cross_entropy_eval, double& cross_entropy_win, double& cross_entropy,
 	double& entropy_eval, double& entropy_win, double& entropy)
 {
-	const double p /* teacher_winrate */ = winning_percentage(deep);
-	const double q /* eval_winrate    */ = winning_percentage(shallow);
+	const double p /* teacher_winrate */ = winning_percentage(deep, psv.gamePly);
+	const double q /* eval_winrate    */ = winning_percentage(shallow, psv.gamePly);
 	const double t = double(psv.game_result + 1) / 2;
 
 	constexpr double epsilon = 0.000001;
@@ -2920,6 +2943,7 @@ void learn(Position&, istringstream& is)
 		else if (option == "use_draw_in_training") is >> use_draw_in_training;
 		else if (option == "use_draw_in_validation") is >> use_draw_in_validation;
 		else if (option == "use_hash_in_training") is >> use_hash_in_training;
+		else if (option == "use_wdl") is >> use_wdl;
 		// Discount rate
 		else if (option == "discount_rate") is >> discount_rate;
 
diff --git a/src/uci.cpp b/src/uci.cpp
index a95a629d..8dd485b0 100644
--- a/src/uci.cpp
+++ b/src/uci.cpp
@@ -222,6 +222,12 @@ namespace {
          << "\nNodes/second    : " << 1000 * nodes / elapsed << endl;
   }
 
+// When you calculate check sum, save it and check the consistency later.
+  uint64_t eval_sum;
+} // namespace
+
+
+namespace UCI{
   // The win rate model returns the probability (per mille) of winning given an eval
   // and a game-ply. The model fits rather accurately the LTC fishtest statistics.
   int win_rate_model(Value v, int ply) {
@@ -243,10 +249,8 @@ namespace {
      // Return win rate in per mille (rounded to nearest)
      return int(0.5 + 1000 / (1 + std::exp((a - x) / b)));
   }
+} // namespace UCI
 
-// When you calculate check sum, save it and check the consistency later.
-  uint64_t eval_sum;
-} // namespace
 
 // Make is_ready_cmd() callable from outside. (Because I want to call it from the bench command etc.)
 // Note that the phase is not initialized.
diff --git a/src/uci.h b/src/uci.h
index 5073262e..8e12c856 100644
--- a/src/uci.h
+++ b/src/uci.h
@@ -74,6 +74,7 @@ std::string square(Square s);
 std::string move(Move m, bool chess960);
 std::string pv(const Position& pos, Depth depth, Value alpha, Value beta);
 std::string wdl(Value v, int ply);
+int win_rate_model(Value v, int ply);
 Move to_move(const Position& pos, std::string& str);
 
 // Flag that read the evaluation function. This is set to false when evaldir is changed.