From e229015127df8264fc23cdc594e906aebb429096 Mon Sep 17 00:00:00 2001 From: tttak Date: Sat, 27 Jun 2020 15:24:20 +0900 Subject: [PATCH] =?UTF-8?q?learn=20convert=5Fbin=5Ffrom=5Fpgn-extract?= =?UTF-8?q?=E3=82=B3=E3=83=9E=E3=83=B3=E3=83=89=E3=82=92=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit http://rebel13.nl/download/data.html Download Selected Lichess games pgn-extract --fencomments -Wlalg --nochecks --nomovenumbers --noresults -w500000 -N -V -o comp-2019-06.txt comp-2019-06.pgn stockfish.exe setoption name SkipLoadingEval value true isready learn convert_bin_from_pgn-extract output_file_name fens_comp-2019-06.bin comp-2019-06.txt --- src/learn/learner.cpp | 202 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 201 insertions(+), 1 deletion(-) diff --git a/src/learn/learner.cpp b/src/learn/learner.cpp index 98a310c4..0567af76 100644 --- a/src/learn/learner.cpp +++ b/src/learn/learner.cpp @@ -17,6 +17,7 @@ #include #include +#include #include "learn.h" #include "multi_think.h" @@ -2494,7 +2495,196 @@ void convert_bin(const vector& filenames, const string& output_file_name std::cout << "all done" << std::endl; fs.close(); } - + +static inline void ltrim(std::string &s) { + s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](int ch) { + return !std::isspace(ch); + })); +} + +static inline void rtrim(std::string &s) { + s.erase(std::find_if(s.rbegin(), s.rend(), [](int ch) { + return !std::isspace(ch); + }).base(), s.end()); +} + +static inline void trim(std::string &s) { + ltrim(s); + rtrim(s); +} + +int parse_game_result_from_pgn_extract(std::string result) { + // White Win + if (result == "\"1-0\"") { + return 1; + } + // Black Win + else if (result == "\"0-1\"") { + return -1; + } + // Draw + else { + return 0; + } +} + +// 0.25 --> 25 +// #-4 --> -mate_in(4) +// #3 --> mate_in(3) +Value parse_score_from_pgn_extract(std::string eval) { + if (eval.substr(0, 1) == "#") { + if (eval.substr(1, 1) == "-") { + return -mate_in(stoi(eval.substr(2, eval.length() - 2))); + } + else { + return mate_in(stoi(eval.substr(1, eval.length() - 1))); + } + } + else { + return Value(stod(eval) * 100.0f); + } +} + +// pgn-extract形式の教師をやねうら王用のPackedSfenValueに変換する +void convert_bin_from_pgn_extract(const vector& filenames, const string& output_file_name) +{ + auto th = Threads.main(); + auto &pos = th->rootPos; + + std::fstream ofs; + ofs.open(output_file_name, ios::out | ios::binary); + + int game_count = 0; + int fen_count = 0; + + for (auto filename : filenames) { + std::cout << now_string() << " convert " << filename << std::endl; + ifstream ifs; + ifs.open(filename); + + int game_result = 0; + + std::string line; + while (std::getline(ifs, line)) { + + if (line.empty()) { + continue; + } + + else if (line.substr(0, 1) == "[") { + std::regex pattern_result(R"(\[Result (.+?)\])"); + std::smatch match; + + // example: [Result "1-0"] + if (std::regex_search(line, match, pattern_result)) { + game_result = parse_game_result_from_pgn_extract(match.str(1)); + //std::cout << "game_result=" << game_result << std::endl; + + game_count++; + if (game_count % 10000 == 0) { + std::cout << now_string() << " game_count=" << game_count << ", fen_count=" << fen_count << std::endl; + } + } + + continue; + } + + else { + int gamePly = 0; + + PackedSfenValue psv; + memset((char*)&psv, 0, sizeof(PackedSfenValue)); + + auto itr = line.cbegin(); + + while (true) { + gamePly++; + + std::regex pattern_bracket(R"(\{(.+?)\})"); + std::regex pattern_eval(R"(\[\%eval (.+?)\])"); + std::regex pattern_move(R"((.+?)\{)"); + std::smatch match; + + // example: { [%eval 0.25] [%clk 0:10:00] } + if (!std::regex_search(itr, line.cend(), match, pattern_bracket)) { + break; + } + + itr += match.position(0) + match.length(0); + std::string str_eval_clk = match.str(1); + trim(str_eval_clk); + //std::cout << "str_eval_clk="<< str_eval_clk << std::endl; + + // example: [%eval 0.25] + // example: [%eval #-4] + // example: [%eval #3] + if (!std::regex_search(str_eval_clk, match, pattern_eval)) { + continue; + } + else { + std::string str_eval = match.str(1); + trim(str_eval); + psv.score = parse_score_from_pgn_extract(str_eval); + //std::cout << "psv.score=" << psv.score << std::endl; + } + + // example: { rnbqkbnr/pppppppp/8/8/3P4/8/PPP1PPPP/RNBQKBNR b KQkq d3 0 1 } + if (!std::regex_search(itr, line.cend(), match, pattern_bracket)) { + break; + } + + itr += match.position(0) + match.length(0); + std::string str_fen = match.str(1); + trim(str_fen); + //std::cout << "str_fen=" << str_fen << std::endl; + + StateInfo si; + pos.set(str_fen, false, &si, th); + pos.sfen_pack(psv.sfen); + + // example: d7d5 { + if (!std::regex_search(itr, line.cend(), match, pattern_move)) { + break; + } + + itr += match.position(0) + match.length(0) - 1; + std::string str_move = match.str(1); + trim(str_move); + //std::cout << "str_move=" << str_move << std::endl; + psv.move = UCI::to_move(pos, str_move); + + // + psv.gamePly = gamePly; + psv.game_result = game_result; + + if (pos.side_to_move() == BLACK) { + psv.score *= -1; + psv.game_result *= -1; + } + + //std::cout << "write: " + // << "score=" << psv.score + // << ", move=" << psv.move + // << ", gamePly=" << psv.gamePly + // << ", game_result=" << (int)psv.game_result + // << std::endl; + + ofs.write((char*)&psv, sizeof(PackedSfenValue)); + memset((char*)&psv, 0, sizeof(PackedSfenValue)); + + fen_count++; + } + + game_result = 0; + } + } + } + + std::cout << now_string() << " game_count=" << game_count << ", fen_count=" << fen_count << std::endl; + std::cout << now_string() << " all done" << std::endl; + ofs.close(); +} + //void convert_plain(const vector& filenames , const string& output_file_name) //{ // Position tpos; @@ -2581,6 +2771,8 @@ void learn(Position&, istringstream& is) int ply_minimum = 0; int ply_maximum = 114514; bool interpolate_eval = 0; + // pgn-extract形式の教師をやねうら王のbinに変換する + bool use_convert_bin_from_pgn_extract = false; // それらのときに書き出すファイル名(デフォルトでは"shuffled_sfen.bin") string output_file_name = "shuffled_sfen.bin"; @@ -2715,6 +2907,7 @@ void learn(Position&, istringstream& is) else if (option == "convert_plain") use_convert_plain = true; else if (option == "convert_bin") use_convert_bin = true; else if (option == "interpolate_eval") is >> interpolate_eval; + else if (option == "convert_bin_from_pgn-extract") use_convert_bin_from_pgn_extract = true; // さもなくば、それはファイル名である。 else filenames.push_back(option); @@ -2828,6 +3021,13 @@ void learn(Position&, istringstream& is) return; } + if (use_convert_bin_from_pgn_extract) + { + is_ready(true); + cout << "convert_bin_from_pgn-extract.." << endl; + convert_bin_from_pgn_extract(filenames, output_file_name); + return; + } cout << "loop : " << loop << endl; cout << "eval_limit : " << eval_limit << endl;