diff --git a/src/learn/learner.cpp b/src/learn/learner.cpp index 1d724266..93b54ab2 100644 --- a/src/learn/learner.cpp +++ b/src/learn/learner.cpp @@ -116,6 +116,7 @@ bool use_draw_in_training_data_generation = false; bool use_draw_in_training = false; bool use_draw_in_validation = false; bool use_hash_in_training = true; +bool use_wdl = false; // ----------------------------------- // write phase file @@ -1025,6 +1026,16 @@ double sigmoid(double x) return 1.0 / (1.0 + std::exp(-x)); } +// A function that converts the evaluation value to the winning rate [0,1] +double winning_percentage_wdl(Value value, int ply) +{ + double wdl_w = UCI::win_rate_model( value, ply); + double wdl_l = UCI::win_rate_model(-value, ply); + double wdl_d = 1000.0 - wdl_w - wdl_l; + + return (wdl_w + wdl_d / 2.0) / 1000.0; +} + // A function that converts the evaluation value to the winning rate [0,1] double winning_percentage(double value) { @@ -1033,6 +1044,18 @@ double winning_percentage(double value) // = sigmoid(Eval/4*ln(10)) return sigmoid(value / PawnValueEg / 4.0 * log(10.0)); } + +// A function that converts the evaluation value to the winning rate [0,1] +double winning_percentage(Value value, int ply) +{ + if (use_wdl) { + return winning_percentage_wdl(value, ply); + } + else { + return winning_percentage(value); + } +} + double dsigmoid(double x) { // Sigmoid function @@ -1069,8 +1092,8 @@ double calc_grad(Value deep, Value shallow, PackedSfenValue& psv) // Also, the coefficient of 1/m is unnecessary if you use the update formula that has the automatic gradient adjustment function like Adam and AdaGrad. // Therefore, it is not necessary to save it in memory. - double p = winning_percentage(deep); - double q = winning_percentage(shallow); + double p = winning_percentage(deep, psv.gamePly); + double q = winning_percentage(shallow, psv.gamePly); return (q - p) * dsigmoid(double(shallow) / 600.0); } #endif @@ -1095,8 +1118,8 @@ double calc_grad(Value deep, Value shallow, const PackedSfenValue& psv) // = ... // = q-p. - double p = winning_percentage(deep); - double q = winning_percentage(shallow); + double p = winning_percentage(deep, psv.gamePly); + double q = winning_percentage(shallow, psv.gamePly); return q - p; } @@ -1127,8 +1150,8 @@ double calc_grad(Value deep, Value shallow , const PackedSfenValue& psv) // elmo (WCSC27) method // Correct with the actual game wins and losses. - const double q = winning_percentage(shallow); - const double p = winning_percentage(deep); + const double q = winning_percentage(shallow, psv.gamePly); + const double p = winning_percentage(deep, psv.gamePly); // Use 1 as the correction term if the expected win rate is 1, 0 if you lose, and 0.5 if you draw. // game_result = 1,0,-1 so add 1 and divide by 2. @@ -1150,8 +1173,8 @@ void calc_cross_entropy(Value deep, Value shallow, const PackedSfenValue& psv, double& cross_entropy_eval, double& cross_entropy_win, double& cross_entropy, double& entropy_eval, double& entropy_win, double& entropy) { - const double p /* teacher_winrate */ = winning_percentage(deep); - const double q /* eval_winrate */ = winning_percentage(shallow); + const double p /* teacher_winrate */ = winning_percentage(deep, psv.gamePly); + const double q /* eval_winrate */ = winning_percentage(shallow, psv.gamePly); const double t = double(psv.game_result + 1) / 2; constexpr double epsilon = 0.000001; @@ -2920,6 +2943,7 @@ void learn(Position&, istringstream& is) else if (option == "use_draw_in_training") is >> use_draw_in_training; else if (option == "use_draw_in_validation") is >> use_draw_in_validation; else if (option == "use_hash_in_training") is >> use_hash_in_training; + else if (option == "use_wdl") is >> use_wdl; // Discount rate else if (option == "discount_rate") is >> discount_rate; diff --git a/src/uci.cpp b/src/uci.cpp index a95a629d..8dd485b0 100644 --- a/src/uci.cpp +++ b/src/uci.cpp @@ -222,6 +222,12 @@ namespace { << "\nNodes/second : " << 1000 * nodes / elapsed << endl; } +// When you calculate check sum, save it and check the consistency later. + uint64_t eval_sum; +} // namespace + + +namespace UCI{ // The win rate model returns the probability (per mille) of winning given an eval // and a game-ply. The model fits rather accurately the LTC fishtest statistics. int win_rate_model(Value v, int ply) { @@ -243,10 +249,8 @@ namespace { // Return win rate in per mille (rounded to nearest) return int(0.5 + 1000 / (1 + std::exp((a - x) / b))); } +} // namespace UCI -// When you calculate check sum, save it and check the consistency later. - uint64_t eval_sum; -} // namespace // Make is_ready_cmd() callable from outside. (Because I want to call it from the bench command etc.) // Note that the phase is not initialized. diff --git a/src/uci.h b/src/uci.h index 5073262e..8e12c856 100644 --- a/src/uci.h +++ b/src/uci.h @@ -74,6 +74,7 @@ std::string square(Square s); std::string move(Move m, bool chess960); std::string pv(const Position& pos, Depth depth, Value alpha, Value beta); std::string wdl(Value v, int ply); +int win_rate_model(Value v, int ply); Move to_move(const Position& pos, std::string& str); // Flag that read the evaluation function. This is set to false when evaldir is changed.