mirror of
https://github.com/sockspls/badfish
synced 2025-04-29 16:23:09 +00:00
Update the WDL model
Update the internal WDL model. After the dual net merge, the internal
evaluations have drifted upwards a bit. With this PR
`NormalizeToPawnValue` changes from `328` to `345`.
The new model was fitted based on about 200M positions extracted from
3.4M fishtest LTC games from the last two weeks, involving SF versions
from 6deb88728f
to current master.
Apart from the WDL model parameter update, this PR implements the
following changes:
WDL Model:
- an incorrect 8-move shift in master's WDL model has been fixed
- the polynomials `p_a` and `p_b` are fitted over the move range [8, 120]
- the coefficients for `p_a` and `p_b` are optimized by maximizing the
probability of predicting the observed outcome (credits to @vondele)
SF code:
- for wdl values, move will be clamped to `max(8, min(120, move))`
- no longer clamp the internal eval to [-4000,4000]
- compute `NormalizeToPawnValue` with `round`, not `trunc`
The PR only affects displayed `cp` and `wdl` values.
closes https://github.com/official-stockfish/Stockfish/pull/5002
No functional change
This commit is contained in:
parent
ad9fcbc496
commit
a901474bf9
1 changed files with 9 additions and 12 deletions
21
src/uci.cpp
21
src/uci.cpp
|
@ -43,7 +43,7 @@
|
|||
namespace Stockfish {
|
||||
|
||||
constexpr auto StartFEN = "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1";
|
||||
constexpr int NormalizeToPawnValue = 328;
|
||||
constexpr int NormalizeToPawnValue = 345;
|
||||
constexpr int MaxHashMB = Is64Bit ? 33554432 : 2048;
|
||||
|
||||
UCI::UCI(int argc, char** argv) :
|
||||
|
@ -421,26 +421,23 @@ namespace {
|
|||
// eval and a game ply. It fits the LTC fishtest statistics rather accurately.
|
||||
int win_rate_model(Value v, int ply) {
|
||||
|
||||
// The model only captures up to 240 plies, so limit the input and then rescale
|
||||
double m = std::min(240, ply) / 64.0;
|
||||
// The fitted model only uses data for moves in [8, 120], and is anchored at move 32.
|
||||
double m = std::clamp(ply / 2 + 1, 8, 120) / 32.0;
|
||||
|
||||
// The coefficients of a third-order polynomial fit is based on the fishtest data
|
||||
// for two parameters that need to transform eval to the argument of a logistic
|
||||
// function.
|
||||
constexpr double as[] = {0.38036525, -2.82015070, 23.17882135, 307.36768407};
|
||||
constexpr double bs[] = {-2.29434733, 13.27689788, -14.26828904, 63.45318330};
|
||||
constexpr double as[] = {-2.00568292, 10.45906746, 1.67438883, 334.45864705};
|
||||
constexpr double bs[] = {-4.97134419, 36.15096345, -82.25513499, 117.35186805};
|
||||
|
||||
// Enforce that NormalizeToPawnValue corresponds to a 50% win rate at ply 64
|
||||
static_assert(NormalizeToPawnValue == int(as[0] + as[1] + as[2] + as[3]));
|
||||
// Enforce that NormalizeToPawnValue corresponds to a 50% win rate at move 32.
|
||||
static_assert(NormalizeToPawnValue == int(0.5 + as[0] + as[1] + as[2] + as[3]));
|
||||
|
||||
double a = (((as[0] * m + as[1]) * m + as[2]) * m) + as[3];
|
||||
double b = (((bs[0] * m + bs[1]) * m + bs[2]) * m) + bs[3];
|
||||
|
||||
// Transform the eval to centipawns with limited range
|
||||
double x = std::clamp(double(v), -4000.0, 4000.0);
|
||||
|
||||
// Return the win rate in per mille units, rounded to the nearest integer
|
||||
return int(0.5 + 1000 / (1 + std::exp((a - x) / b)));
|
||||
// Return the win rate in per mille units, rounded to the nearest integer.
|
||||
return int(0.5 + 1000 / (1 + std::exp((a - double(v)) / b)));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue