add pgn_to_plain

2025-07-13 04:29:15 +00:00 · 2020-06-27 13:08:12 +09:00 · 2020-06-27 13:08:12 +09:00 · 4c926b8eb4
commit 4c926b8eb4
parent 0761d9504e
2 changed files with 120 additions and 0 deletions
--- a/script/README.md
+++ b/script/README.md
@ -0,0 +1,52 @@
+# `pgn_to_plain`
+This script converts pgn files into text file to apply `learn convert_bin` command. You need to import [python-chess](https://pypi.org/project/python-chess/) to use this script.
+
+
+    pip install python-chess
+	
+
+# Example of Qhapaq's finetune using `pgn_to_plain`
+
+## Download data
+You can download data from [here](http://rebel13.nl/index.html)
+
+## Convert pgn files
+
+**Important : convert text will be superheavy (approx 200 byte / position)** 
+
+    python pgn_to_plain.py --pgn "pgn/*.pgn" --start_ply 1 --output converted_pgn.txt
+
+
+`--pgn` option supports wildcard. When you use pgn files with elo >= 3300, You will get 1.7 GB text file.
+	
+	
+## Convert into training data
+
+
+### Example build command
+
+    make nnue-learn ARCH=x86-64
+
+See `src/Makefile` for detail.
+
+
+### Convert
+
+    ./stockfish
+    learn convert_bin converted_pgn.txt output_file_name pgn_bin.bin
+	learn shuffle pgn_bin.bin
+	
+You also need to prepare validation data for training like following.
+	
+	python pgn_to_plain.py --pgn "pgn/ccrl-40-15-3400.pgn" --start_ply 1 --output ccrl-40-15-3400.txt
+	./stockfish
+    learn convert_bin ccrl-40-15-3400.txt ccrl-40-15-3400_plain.bin
+	
+	
+### Learn
+
+    ./stockfish
+	setoption name Threads value 8
+    learn shuffled_sfen.bin newbob_decay 0.5  validation_set_file_name ccrl-40-15-3400_plain.bin  nn_batch_size 50000 batchsize 1000000 eval_save_interval 8000000 eta 0.05 lambda 0.0 eval_limit 3000 mirror_percentage 0 use_draw_in_training 1
+
+
--- a/script/pgn_to_plain.py
+++ b/script/pgn_to_plain.py
@ -0,0 +1,68 @@
+import chess.pgn
+import argparse
+import glob
+from typing import List
+
+# todo close in c++ tools using pgn-extract
+# https://www.cs.kent.ac.uk/people/staff/djb/pgn-extract/help.html#-w
+
+def parse_result(result_str:str, board:chess.Board) -> int:
+    if result_str == "1/2-1/2":
+        return 0
+    if result_str == "0-1":
+        if board.turn == chess.WHITE:
+            return -1
+        else:
+            return 1
+    elif result_str == "1-0":
+        if board.turn == chess.WHITE:
+            return 1
+        else:
+            return 0
+    else:
+        print("illeagal result", result_str)
+        raise ValueError
+
+def game_sanity_check(game: chess.pgn.Game) -> bool:
+    if not game.headers["Result"] in ["1/2-1/2", "0-1", "1-0"]:
+        print("invalid result", game.headers["Result"])
+        return False
+    return True
+    
+def parse_game(game: chess.pgn.Game, writer, start_play: int=1)->None:
+    board: chess.Board = game.board()
+    if not game_sanity_check(game):
+        return
+    result: str = game.headers["Result"]
+    for ply, move in enumerate(game.mainline_moves()):
+        if ply >= start_play:
+            writer.write("fen " + board.fen() + "\n")
+            writer.write("move " + str(move) + "\n")
+            writer.write("score 0\n")
+            writer.write("ply " + str(ply)+"\n")
+            writer.write("result " + str(parse_result(result, board)) +"\n")
+            writer.write("e\n")
+
+        board.push(move)
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--pgn", type=str, required=True)
+    parser.add_argument("--start_ply", type=int, default=1)
+    parser.add_argument("--output", type=str, default="plain.txt")
+    args = parser.parse_args()
+
+    pgn_files: List[str] = glob.glob(args.pgn)
+    f = open(args.output, 'w')
+    for pgn_file in pgn_files:
+        print("parse", pgn_file)
+        pgn_loader = open(pgn_file)
+        while True:
+            game = chess.pgn.read_game(pgn_loader)
+            if game is None:
+                break
+            parse_game(game, f, args.start_ply)
+    f.close()
+    
+if __name__=="__main__":
+    main()