use winning_percentage_wdl in learn

2025-07-12 03:59:15 +00:00 · 2020-07-03 23:21:49 +09:00 · 2020-07-03 23:21:49 +09:00 · c964e902c5
commit c964e902c5
parent 9ce0ef3ac0
3 changed files with 40 additions and 11 deletions
--- a/src/learn/learner.cpp
+++ b/src/learn/learner.cpp
@ -116,6 +116,7 @@ bool use_draw_in_training_data_generation = false;
 bool use_draw_in_training = false;
 bool use_draw_in_validation = false;
 bool use_hash_in_training = true;
+bool use_wdl = false;

 // -----------------------------------
 // write phase file
@ -1025,6 +1026,16 @@ double sigmoid(double x)
 	return 1.0 / (1.0 + std::exp(-x));
 }

+// A function that converts the evaluation value to the winning rate [0,1]
+double winning_percentage_wdl(Value value, int ply)
+{
+	double wdl_w = UCI::win_rate_model( value, ply);
+	double wdl_l = UCI::win_rate_model(-value, ply);
+	double wdl_d = 1000.0 - wdl_w - wdl_l;
+
+	return (wdl_w + wdl_d / 2.0) / 1000.0;
+}
+
 // A function that converts the evaluation value to the winning rate [0,1]
 double winning_percentage(double value)
 {
@ -1033,6 +1044,18 @@ double winning_percentage(double value)
 	// = sigmoid(Eval/4*ln(10))
 	return sigmoid(value / PawnValueEg / 4.0 * log(10.0));
 }
+
+// A function that converts the evaluation value to the winning rate [0,1]
+double winning_percentage(Value value, int ply)
+{
+	if (use_wdl) {
+		return winning_percentage_wdl(value, ply);
+	}
+	else {
+		return winning_percentage(value);
+	}
+}
+
 double dsigmoid(double x)
 {
 	// Sigmoid function
@ -1069,8 +1092,8 @@ double calc_grad(Value deep, Value shallow, PackedSfenValue& psv)
 	// Also, the coefficient of 1/m is unnecessary if you use the update formula that has the automatic gradient adjustment function like Adam and AdaGrad.
 	// Therefore, it is not necessary to save it in memory.

-	double p = winning_percentage(deep);
-	double q = winning_percentage(shallow);
+	double p = winning_percentage(deep, psv.gamePly);
+	double q = winning_percentage(shallow, psv.gamePly);
 	return (q - p) * dsigmoid(double(shallow) / 600.0);
 }
 #endif
@ -1095,8 +1118,8 @@ double calc_grad(Value deep, Value shallow, const PackedSfenValue& psv)
 	// = ...
 	// = q-p.

-	double p = winning_percentage(deep);
-	double q = winning_percentage(shallow);
+	double p = winning_percentage(deep, psv.gamePly);
+	double q = winning_percentage(shallow, psv.gamePly);

 	return q - p;
 }
@ -1127,8 +1150,8 @@ double calc_grad(Value deep, Value shallow , const PackedSfenValue& psv)
 	// elmo (WCSC27) method
 	// Correct with the actual game wins and losses.

-	const double q = winning_percentage(shallow);
-	const double p = winning_percentage(deep);
+	const double q = winning_percentage(shallow, psv.gamePly);
+	const double p = winning_percentage(deep, psv.gamePly);

 	// Use 1 as the correction term if the expected win rate is 1, 0 if you lose, and 0.5 if you draw.
 	// game_result = 1,0,-1 so add 1 and divide by 2.
@ -1150,8 +1173,8 @@ void calc_cross_entropy(Value deep, Value shallow, const PackedSfenValue& psv,
 	double& cross_entropy_eval, double& cross_entropy_win, double& cross_entropy,
 	double& entropy_eval, double& entropy_win, double& entropy)
 {
-	const double p /* teacher_winrate */ = winning_percentage(deep);
-	const double q /* eval_winrate    */ = winning_percentage(shallow);
+	const double p /* teacher_winrate */ = winning_percentage(deep, psv.gamePly);
+	const double q /* eval_winrate    */ = winning_percentage(shallow, psv.gamePly);
 	const double t = double(psv.game_result + 1) / 2;

 	constexpr double epsilon = 0.000001;
@ -2920,6 +2943,7 @@ void learn(Position&, istringstream& is)
 		else if (option == "use_draw_in_training") is >> use_draw_in_training;
 		else if (option == "use_draw_in_validation") is >> use_draw_in_validation;
 		else if (option == "use_hash_in_training") is >> use_hash_in_training;
+		else if (option == "use_wdl") is >> use_wdl;
 		// Discount rate
 		else if (option == "discount_rate") is >> discount_rate;

--- a/src/uci.cpp
+++ b/src/uci.cpp
@ -222,6 +222,12 @@ namespace {
         << "\nNodes/second    : " << 1000 * nodes / elapsed << endl;
  }

+// When you calculate check sum, save it and check the consistency later.
+  uint64_t eval_sum;
+} // namespace
+
+
+namespace UCI{
  // The win rate model returns the probability (per mille) of winning given an eval
  // and a game-ply. The model fits rather accurately the LTC fishtest statistics.
  int win_rate_model(Value v, int ply) {
@ -243,10 +249,8 @@ namespace {
     // Return win rate in per mille (rounded to nearest)
     return int(0.5 + 1000 / (1 + std::exp((a - x) / b)));
  }
+} // namespace UCI

-// When you calculate check sum, save it and check the consistency later.
-  uint64_t eval_sum;
-} // namespace

 // Make is_ready_cmd() callable from outside. (Because I want to call it from the bench command etc.)
 // Note that the phase is not initialized.
--- a/src/uci.h
+++ b/src/uci.h
@ -74,6 +74,7 @@ std::string square(Square s);
 std::string move(Move m, bool chess960);
 std::string pv(const Position& pos, Depth depth, Value alpha, Value beta);
 std::string wdl(Value v, int ply);
+int win_rate_model(Value v, int ply);
 Move to_move(const Position& pos, std::string& str);

 // Flag that read the evaluation function. This is set to false when evaldir is changed.