1
0
Fork 0
mirror of https://github.com/sockspls/badfish synced 2025-04-30 16:53:09 +00:00

add pgn_to_plain

This commit is contained in:
rqs 2020-06-27 13:08:12 +09:00 committed by nodchip
parent 0761d9504e
commit 4c926b8eb4
2 changed files with 120 additions and 0 deletions

52
script/README.md Normal file
View file

@ -0,0 +1,52 @@
# `pgn_to_plain`
This script converts pgn files into text file to apply `learn convert_bin` command. You need to import [python-chess](https://pypi.org/project/python-chess/) to use this script.
pip install python-chess
# Example of Qhapaq's finetune using `pgn_to_plain`
## Download data
You can download data from [here](http://rebel13.nl/index.html)
## Convert pgn files
**Important : convert text will be superheavy (approx 200 byte / position)**
python pgn_to_plain.py --pgn "pgn/*.pgn" --start_ply 1 --output converted_pgn.txt
`--pgn` option supports wildcard. When you use pgn files with elo >= 3300, You will get 1.7 GB text file.
## Convert into training data
### Example build command
make nnue-learn ARCH=x86-64
See `src/Makefile` for detail.
### Convert
./stockfish
learn convert_bin converted_pgn.txt output_file_name pgn_bin.bin
learn shuffle pgn_bin.bin
You also need to prepare validation data for training like following.
python pgn_to_plain.py --pgn "pgn/ccrl-40-15-3400.pgn" --start_ply 1 --output ccrl-40-15-3400.txt
./stockfish
learn convert_bin ccrl-40-15-3400.txt ccrl-40-15-3400_plain.bin
### Learn
./stockfish
setoption name Threads value 8
learn shuffled_sfen.bin newbob_decay 0.5 validation_set_file_name ccrl-40-15-3400_plain.bin nn_batch_size 50000 batchsize 1000000 eval_save_interval 8000000 eta 0.05 lambda 0.0 eval_limit 3000 mirror_percentage 0 use_draw_in_training 1

68
script/pgn_to_plain.py Normal file
View file

@ -0,0 +1,68 @@
import chess.pgn
import argparse
import glob
from typing import List
# todo close in c++ tools using pgn-extract
# https://www.cs.kent.ac.uk/people/staff/djb/pgn-extract/help.html#-w
def parse_result(result_str:str, board:chess.Board) -> int:
if result_str == "1/2-1/2":
return 0
if result_str == "0-1":
if board.turn == chess.WHITE:
return -1
else:
return 1
elif result_str == "1-0":
if board.turn == chess.WHITE:
return 1
else:
return 0
else:
print("illeagal result", result_str)
raise ValueError
def game_sanity_check(game: chess.pgn.Game) -> bool:
if not game.headers["Result"] in ["1/2-1/2", "0-1", "1-0"]:
print("invalid result", game.headers["Result"])
return False
return True
def parse_game(game: chess.pgn.Game, writer, start_play: int=1)->None:
board: chess.Board = game.board()
if not game_sanity_check(game):
return
result: str = game.headers["Result"]
for ply, move in enumerate(game.mainline_moves()):
if ply >= start_play:
writer.write("fen " + board.fen() + "\n")
writer.write("move " + str(move) + "\n")
writer.write("score 0\n")
writer.write("ply " + str(ply)+"\n")
writer.write("result " + str(parse_result(result, board)) +"\n")
writer.write("e\n")
board.push(move)
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--pgn", type=str, required=True)
parser.add_argument("--start_ply", type=int, default=1)
parser.add_argument("--output", type=str, default="plain.txt")
args = parser.parse_args()
pgn_files: List[str] = glob.glob(args.pgn)
f = open(args.output, 'w')
for pgn_file in pgn_files:
print("parse", pgn_file)
pgn_loader = open(pgn_file)
while True:
game = chess.pgn.read_game(pgn_loader)
if game is None:
break
parse_game(game, f, args.start_ply)
f.close()
if __name__=="__main__":
main()