mirror of
https://github.com/sockspls/badfish
synced 2025-04-30 00:33:09 +00:00
add pgn_to_plain
This commit is contained in:
parent
0761d9504e
commit
4c926b8eb4
2 changed files with 120 additions and 0 deletions
52
script/README.md
Normal file
52
script/README.md
Normal file
|
@ -0,0 +1,52 @@
|
|||
# `pgn_to_plain`
|
||||
This script converts pgn files into text file to apply `learn convert_bin` command. You need to import [python-chess](https://pypi.org/project/python-chess/) to use this script.
|
||||
|
||||
|
||||
pip install python-chess
|
||||
|
||||
|
||||
# Example of Qhapaq's finetune using `pgn_to_plain`
|
||||
|
||||
## Download data
|
||||
You can download data from [here](http://rebel13.nl/index.html)
|
||||
|
||||
## Convert pgn files
|
||||
|
||||
**Important : convert text will be superheavy (approx 200 byte / position)**
|
||||
|
||||
python pgn_to_plain.py --pgn "pgn/*.pgn" --start_ply 1 --output converted_pgn.txt
|
||||
|
||||
|
||||
`--pgn` option supports wildcard. When you use pgn files with elo >= 3300, You will get 1.7 GB text file.
|
||||
|
||||
|
||||
## Convert into training data
|
||||
|
||||
|
||||
### Example build command
|
||||
|
||||
make nnue-learn ARCH=x86-64
|
||||
|
||||
See `src/Makefile` for detail.
|
||||
|
||||
|
||||
### Convert
|
||||
|
||||
./stockfish
|
||||
learn convert_bin converted_pgn.txt output_file_name pgn_bin.bin
|
||||
learn shuffle pgn_bin.bin
|
||||
|
||||
You also need to prepare validation data for training like following.
|
||||
|
||||
python pgn_to_plain.py --pgn "pgn/ccrl-40-15-3400.pgn" --start_ply 1 --output ccrl-40-15-3400.txt
|
||||
./stockfish
|
||||
learn convert_bin ccrl-40-15-3400.txt ccrl-40-15-3400_plain.bin
|
||||
|
||||
|
||||
### Learn
|
||||
|
||||
./stockfish
|
||||
setoption name Threads value 8
|
||||
learn shuffled_sfen.bin newbob_decay 0.5 validation_set_file_name ccrl-40-15-3400_plain.bin nn_batch_size 50000 batchsize 1000000 eval_save_interval 8000000 eta 0.05 lambda 0.0 eval_limit 3000 mirror_percentage 0 use_draw_in_training 1
|
||||
|
||||
|
68
script/pgn_to_plain.py
Normal file
68
script/pgn_to_plain.py
Normal file
|
@ -0,0 +1,68 @@
|
|||
import chess.pgn
|
||||
import argparse
|
||||
import glob
|
||||
from typing import List
|
||||
|
||||
# todo close in c++ tools using pgn-extract
|
||||
# https://www.cs.kent.ac.uk/people/staff/djb/pgn-extract/help.html#-w
|
||||
|
||||
def parse_result(result_str:str, board:chess.Board) -> int:
|
||||
if result_str == "1/2-1/2":
|
||||
return 0
|
||||
if result_str == "0-1":
|
||||
if board.turn == chess.WHITE:
|
||||
return -1
|
||||
else:
|
||||
return 1
|
||||
elif result_str == "1-0":
|
||||
if board.turn == chess.WHITE:
|
||||
return 1
|
||||
else:
|
||||
return 0
|
||||
else:
|
||||
print("illeagal result", result_str)
|
||||
raise ValueError
|
||||
|
||||
def game_sanity_check(game: chess.pgn.Game) -> bool:
|
||||
if not game.headers["Result"] in ["1/2-1/2", "0-1", "1-0"]:
|
||||
print("invalid result", game.headers["Result"])
|
||||
return False
|
||||
return True
|
||||
|
||||
def parse_game(game: chess.pgn.Game, writer, start_play: int=1)->None:
|
||||
board: chess.Board = game.board()
|
||||
if not game_sanity_check(game):
|
||||
return
|
||||
result: str = game.headers["Result"]
|
||||
for ply, move in enumerate(game.mainline_moves()):
|
||||
if ply >= start_play:
|
||||
writer.write("fen " + board.fen() + "\n")
|
||||
writer.write("move " + str(move) + "\n")
|
||||
writer.write("score 0\n")
|
||||
writer.write("ply " + str(ply)+"\n")
|
||||
writer.write("result " + str(parse_result(result, board)) +"\n")
|
||||
writer.write("e\n")
|
||||
|
||||
board.push(move)
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--pgn", type=str, required=True)
|
||||
parser.add_argument("--start_ply", type=int, default=1)
|
||||
parser.add_argument("--output", type=str, default="plain.txt")
|
||||
args = parser.parse_args()
|
||||
|
||||
pgn_files: List[str] = glob.glob(args.pgn)
|
||||
f = open(args.output, 'w')
|
||||
for pgn_file in pgn_files:
|
||||
print("parse", pgn_file)
|
||||
pgn_loader = open(pgn_file)
|
||||
while True:
|
||||
game = chess.pgn.read_game(pgn_loader)
|
||||
if game is None:
|
||||
break
|
||||
parse_game(game, f, args.start_ply)
|
||||
f.close()
|
||||
|
||||
if __name__=="__main__":
|
||||
main()
|
Loading…
Add table
Reference in a new issue