From 4c926b8eb4ccdd129bdf83b354cb092d2206b4a0 Mon Sep 17 00:00:00 2001 From: rqs Date: Sat, 27 Jun 2020 13:08:12 +0900 Subject: [PATCH] add pgn_to_plain --- script/README.md | 52 ++++++++++++++++++++++++++++++++ script/pgn_to_plain.py | 68 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 120 insertions(+) create mode 100644 script/README.md create mode 100644 script/pgn_to_plain.py diff --git a/script/README.md b/script/README.md new file mode 100644 index 00000000..feb57ca2 --- /dev/null +++ b/script/README.md @@ -0,0 +1,52 @@ +# `pgn_to_plain` +This script converts pgn files into text file to apply `learn convert_bin` command. You need to import [python-chess](https://pypi.org/project/python-chess/) to use this script. + + + pip install python-chess + + +# Example of Qhapaq's finetune using `pgn_to_plain` + +## Download data +You can download data from [here](http://rebel13.nl/index.html) + +## Convert pgn files + +**Important : convert text will be superheavy (approx 200 byte / position)** + + python pgn_to_plain.py --pgn "pgn/*.pgn" --start_ply 1 --output converted_pgn.txt + + +`--pgn` option supports wildcard. When you use pgn files with elo >= 3300, You will get 1.7 GB text file. + + +## Convert into training data + + +### Example build command + + make nnue-learn ARCH=x86-64 + +See `src/Makefile` for detail. + + +### Convert + + ./stockfish + learn convert_bin converted_pgn.txt output_file_name pgn_bin.bin + learn shuffle pgn_bin.bin + +You also need to prepare validation data for training like following. + + python pgn_to_plain.py --pgn "pgn/ccrl-40-15-3400.pgn" --start_ply 1 --output ccrl-40-15-3400.txt + ./stockfish + learn convert_bin ccrl-40-15-3400.txt ccrl-40-15-3400_plain.bin + + +### Learn + + ./stockfish + setoption name Threads value 8 + learn shuffled_sfen.bin newbob_decay 0.5 validation_set_file_name ccrl-40-15-3400_plain.bin nn_batch_size 50000 batchsize 1000000 eval_save_interval 8000000 eta 0.05 lambda 0.0 eval_limit 3000 mirror_percentage 0 use_draw_in_training 1 + + diff --git a/script/pgn_to_plain.py b/script/pgn_to_plain.py new file mode 100644 index 00000000..61aa9917 --- /dev/null +++ b/script/pgn_to_plain.py @@ -0,0 +1,68 @@ +import chess.pgn +import argparse +import glob +from typing import List + +# todo close in c++ tools using pgn-extract +# https://www.cs.kent.ac.uk/people/staff/djb/pgn-extract/help.html#-w + +def parse_result(result_str:str, board:chess.Board) -> int: + if result_str == "1/2-1/2": + return 0 + if result_str == "0-1": + if board.turn == chess.WHITE: + return -1 + else: + return 1 + elif result_str == "1-0": + if board.turn == chess.WHITE: + return 1 + else: + return 0 + else: + print("illeagal result", result_str) + raise ValueError + +def game_sanity_check(game: chess.pgn.Game) -> bool: + if not game.headers["Result"] in ["1/2-1/2", "0-1", "1-0"]: + print("invalid result", game.headers["Result"]) + return False + return True + +def parse_game(game: chess.pgn.Game, writer, start_play: int=1)->None: + board: chess.Board = game.board() + if not game_sanity_check(game): + return + result: str = game.headers["Result"] + for ply, move in enumerate(game.mainline_moves()): + if ply >= start_play: + writer.write("fen " + board.fen() + "\n") + writer.write("move " + str(move) + "\n") + writer.write("score 0\n") + writer.write("ply " + str(ply)+"\n") + writer.write("result " + str(parse_result(result, board)) +"\n") + writer.write("e\n") + + board.push(move) + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--pgn", type=str, required=True) + parser.add_argument("--start_ply", type=int, default=1) + parser.add_argument("--output", type=str, default="plain.txt") + args = parser.parse_args() + + pgn_files: List[str] = glob.glob(args.pgn) + f = open(args.output, 'w') + for pgn_file in pgn_files: + print("parse", pgn_file) + pgn_loader = open(pgn_file) + while True: + game = chess.pgn.read_game(pgn_loader) + if game is None: + break + parse_game(game, f, args.start_ply) + f.close() + +if __name__=="__main__": + main()