diff --git a/AUTHORS b/AUTHORS
index f08d71d3..9e9d6c35 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -5,6 +5,10 @@ Marco Costalba (mcostalba)
 Joona Kiiski (zamar)
 Gary Linscott (glinscott)
 
+Yu Nasu (ynasu87)              # The original inventor of NNUE
+Motohiro Isozaki (yaneurao)    # The author of the training data generator and the trainer
+Hisayori Noda (nodchip)        # Ported NNUE to Stockfish.
+
 Aditya (absimaldata)
 Adrian Petrescu (apetresc)
 Ajith Chandy Jose (ajithcj)
diff --git a/script/README.md b/script/README.md
deleted file mode 100644
index feb57ca2..00000000
--- a/script/README.md
+++ /dev/null
@@ -1,52 +0,0 @@
-# `pgn_to_plain`
-This script converts pgn files into text file to apply `learn convert_bin` command. You need to import [python-chess](https://pypi.org/project/python-chess/) to use this script.
-
-
-    pip install python-chess
-	
-
-# Example of Qhapaq's finetune using `pgn_to_plain`
-
-## Download data
-You can download data from [here](http://rebel13.nl/index.html)
-
-## Convert pgn files
-
-**Important : convert text will be superheavy (approx 200 byte / position)** 
-
-    python pgn_to_plain.py --pgn "pgn/*.pgn" --start_ply 1 --output converted_pgn.txt
-
-
-`--pgn` option supports wildcard. When you use pgn files with elo >= 3300, You will get 1.7 GB text file.
-	
-	
-## Convert into training data
-
-
-### Example build command
-
-    make nnue-learn ARCH=x86-64
-
-See `src/Makefile` for detail.
-
-
-### Convert
-
-    ./stockfish
-    learn convert_bin converted_pgn.txt output_file_name pgn_bin.bin
-	learn shuffle pgn_bin.bin
-	
-You also need to prepare validation data for training like following.
-	
-	python pgn_to_plain.py --pgn "pgn/ccrl-40-15-3400.pgn" --start_ply 1 --output ccrl-40-15-3400.txt
-	./stockfish
-    learn convert_bin ccrl-40-15-3400.txt ccrl-40-15-3400_plain.bin
-	
-	
-### Learn
-
-    ./stockfish
-	setoption name Threads value 8
-    learn shuffled_sfen.bin newbob_decay 0.5  validation_set_file_name ccrl-40-15-3400_plain.bin  nn_batch_size 50000 batchsize 1000000 eval_save_interval 8000000 eta 0.05 lambda 0.0 eval_limit 3000 mirror_percentage 0 use_draw_in_training 1
-
-
diff --git a/script/pgn_to_plain.py b/script/pgn_to_plain.py
deleted file mode 100644
index 5f9300cb..00000000
--- a/script/pgn_to_plain.py
+++ /dev/null
@@ -1,68 +0,0 @@
-import chess.pgn
-import argparse
-import glob
-from typing import List
-
-# todo close in c++ tools using pgn-extract
-# https://www.cs.kent.ac.uk/people/staff/djb/pgn-extract/help.html#-w
-
-def parse_result(result_str:str, board:chess.Board) -> int:
-    if result_str == "1/2-1/2":
-        return 0
-    if result_str == "0-1":
-        if board.turn == chess.WHITE:
-            return -1
-        else:
-            return 1
-    elif result_str == "1-0":
-        if board.turn == chess.WHITE:
-            return 1
-        else:
-            return -1
-    else:
-        print("illeagal result", result_str)
-        raise ValueError
-
-def game_sanity_check(game: chess.pgn.Game) -> bool:
-    if not game.headers["Result"] in ["1/2-1/2", "0-1", "1-0"]:
-        print("invalid result", game.headers["Result"])
-        return False
-    return True
-    
-def parse_game(game: chess.pgn.Game, writer, start_play: int=1)->None:
-    board: chess.Board = game.board()
-    if not game_sanity_check(game):
-        return
-    result: str = game.headers["Result"]
-    for ply, move in enumerate(game.mainline_moves()):
-        if ply >= start_play:
-            writer.write("fen " + board.fen() + "\n")
-            writer.write("move " + str(move) + "\n")
-            writer.write("score 0\n")
-            writer.write("ply " + str(ply)+"\n")
-            writer.write("result " + str(parse_result(result, board)) +"\n")
-            writer.write("e\n")
-
-        board.push(move)
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--pgn", type=str, required=True)
-    parser.add_argument("--start_ply", type=int, default=1)
-    parser.add_argument("--output", type=str, default="plain.txt")
-    args = parser.parse_args()
-
-    pgn_files: List[str] = glob.glob(args.pgn)
-    f = open(args.output, 'w')
-    for pgn_file in pgn_files:
-        print("parse", pgn_file)
-        pgn_loader = open(pgn_file)
-        while True:
-            game = chess.pgn.read_game(pgn_loader)
-            if game is None:
-                break
-            parse_game(game, f, args.start_ply)
-    f.close()
-    
-if __name__=="__main__":
-    main()
diff --git a/src/Makefile b/src/Makefile
index 585d93a4..19bee102 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -39,21 +39,7 @@ PGOBENCH = ./$(EXE) bench
 SRCS = benchmark.cpp bitbase.cpp bitboard.cpp endgame.cpp evaluate.cpp main.cpp \
 	material.cpp misc.cpp movegen.cpp movepick.cpp pawns.cpp position.cpp psqt.cpp \
 	search.cpp thread.cpp timeman.cpp tt.cpp uci.cpp ucioption.cpp tune.cpp syzygy/tbprobe.cpp \
-	eval/evaluate_mir_inv_tools.cpp \
-	eval/nnue/evaluate_nnue.cpp \
-	eval/nnue/evaluate_nnue_learner.cpp \
-	eval/nnue/features/half_kp.cpp \
-	eval/nnue/features/half_relative_kp.cpp \
-	eval/nnue/features/k.cpp \
-	eval/nnue/features/p.cpp \
-	eval/nnue/features/castling_right.cpp \
-	eval/nnue/features/enpassant.cpp \
-	eval/nnue/nnue_test_command.cpp \
-	extra/sfen_packer.cpp \
-	learn/gensfen2019.cpp \
-	learn/learner.cpp \
-	learn/learning_tools.cpp \
-	learn/multi_think.cpp
+	nnue/evaluate_nnue.cpp nnue/features/half_kp.cpp
 
 OBJS = $(SRCS:.cpp=.o)
 
@@ -211,7 +197,7 @@ endif
 ### ==========================================================================
 
 ### 3.1 Selecting compiler (default = gcc)
-CXXFLAGS += -Wall -Wcast-qual -fno-exceptions -std=c++17 $(EXTRACXXFLAGS) $(NNUECXXFLAGS)
+CXXFLAGS += -Wall -Wcast-qual -fno-exceptions -std=c++17 $(EXTRACXXFLAGS)
 DEPENDFLAGS += -std=c++17
 LDFLAGS += $(EXTRALDFLAGS)
 
@@ -564,12 +550,12 @@ clean: objclean profileclean
 
 # clean binaries and objects
 objclean:
-	@rm -f $(EXE) *.o ./syzygy/*.o ./learn/*.o ./extra/*.o ./eval/*.o ./eval/nnue/*.o ./eval/nnue/features/*.o
+	@rm -f $(EXE) *.o ./syzygy/*.o ./nnue/*.o ./nnue/features/*.o
 
 # clean auxiliary profiling files
 profileclean:
 	@rm -rf profdir
-	@rm -f bench.txt *.gcda *.gcno ./syzygy/*.gcda ./learn/*.gcda ./extra/*.gcda ./eval/*.gcda ./eval/nnue/*.gcda ./eval/nnue/features/*.gcda
+	@rm -f bench.txt *.gcda *.gcno ./syzygy/*.gcda ./nnue/*.gcda ./nnue/features/*.gcda
 	@rm -f stockfish.profdata *.profraw
 
 default:
@@ -663,19 +649,6 @@ icc-profile-use:
 	EXTRACXXFLAGS='-prof_use -prof_dir ./profdir' \
 	all
 
-nnue: config-sanity
-	$(MAKE) CXXFLAGS='$(CXXFLAGS) -DEVAL_NNUE -DENABLE_TEST_CMD -fopenmp' LDFLAGS='$(LDFLAGS) -fopenmp' build
-
-profile-nnue: export NNUECXXFLAGS = -DEVAL_NNUE -DENABLE_TEST_CMD
-profile-nnue: config-sanity
-	$(MAKE) profile-build
-
-nnue-gen-sfen-from-original-eval: config-sanity
-	$(MAKE) CXXFLAGS='$(CXXFLAGS) -DEVAL_LEARN -DUSE_EVAL_HASH -DENABLE_TEST_CMD -fopenmp' LDFLAGS='$(LDFLAGS) -fopenmp' build
-
-nnue-learn: config-sanity
-	$(MAKE) CXXFLAGS='$(CXXFLAGS) -DEVAL_LEARN -DEVAL_NNUE -DUSE_EVAL_HASH -DENABLE_TEST_CMD -DUSE_BLAS -I/mingw64/include/OpenBLAS -fopenmp' LDFLAGS='$(LDFLAGS) -lopenblas -fopenmp' build
-
 .depend:
 	-@$(CXX) $(DEPENDFLAGS) -MM $(OBJS:.o=.cpp) > $@ 2> /dev/null
 
diff --git a/src/endgame.cpp b/src/endgame.cpp
index 40f49dce..a8ceb648 100644
--- a/src/endgame.cpp
+++ b/src/endgame.cpp
@@ -589,8 +589,8 @@ ScaleFactor Endgame<KPsK>::operator()(const Position& pos) const {
   Bitboard strongPawns = pos.pieces(strongSide, PAWN);
 
   // If all pawns are ahead of the king on a single rook file, it's a draw.
-  if (!((strongPawns & ~FileABB) || (strongPawns & ~FileHBB)) &&
-      !(strongPawns & ~passed_pawn_span(weakSide, weakKing)))
+  if (   !(strongPawns & ~(FileABB | FileHBB))
+      && !(strongPawns & ~passed_pawn_span(weakSide, weakKing)))
       return SCALE_FACTOR_DRAW;
 
   return SCALE_FACTOR_NONE;
diff --git a/src/eval/evaluate_common.h b/src/eval/evaluate_common.h
deleted file mode 100644
index b043f2e1..00000000
--- a/src/eval/evaluate_common.h
+++ /dev/null
@@ -1,82 +0,0 @@
-﻿#ifndef _EVALUATE_COMMON_H_
-#define _EVALUATE_COMMON_H_
-
-// A common header-like function for modern evaluation functions (EVAL_KPPT and EVAL_KPP_KKPT).
-
-#if defined(EVAL_NNUE) || defined(EVAL_LEARN)
-#include <functional>
-
-// KK file name
-#define KK_BIN "KK_synthesized.bin"
-
-// KKP file name
-#define KKP_BIN "KKP_synthesized.bin"
-
-// KPP file name
-#define KPP_BIN "KPP_synthesized.bin"
-
-namespace Eval
-{
-
-#if defined(USE_EVAL_HASH)
-	// prefetch function
-	void prefetch_evalhash(const Key key);
-#endif
-
-	// An operator that applies the function f to each parameter of the evaluation function.
-	// Used for parameter analysis etc.
-	// type indicates the survey target.
-	// type = -1 :KK,KKP,KPP all
-	// type = 0: KK only
-	// type = 1: KKP only
-	// type = 2: KPP only
-	void foreach_eval_param(std::function<void(int32_t, int32_t)>f, int type = -1);
-
-	// --------------------------
-	// for learning
-	// --------------------------
-
-#if defined(EVAL_LEARN)
-	// Initialize the gradient array during learning
-	// Pass the learning rate as an argument. If 0.0, the default value is used.
-	// The epoch of update_weights() gradually changes from eta to eta2 until eta_epoch.
-	// After eta2_epoch, gradually change from eta2 to eta3.
-	void init_grad(double eta1, uint64_t eta_epoch, double eta2, uint64_t eta2_epoch, double eta3);
-
-	// Add the gradient difference value to the gradient array for all features that appear in the current phase.
-	// freeze[0]: Flag that kk does not learn
-	// freeze[1]: Flag that kkp does not learn
-	// freeze[2]: Flag that kpp does not learn
-	// freeze[3]: Flag that kppp does not learn
-	void add_grad(Position& pos, Color rootColor, double delt_grad, const std::array<bool, 4>& freeze);
-
-	// Do SGD or AdaGrad or something based on the current gradient.
-	// epoch: Generation counter (starting from 0)
-	// freeze[0]: Flag that kk does not learn
-	// freeze[1]: Flag that kkp does not learn
-	// freeze[2]: Flag that kpp does not learn
-	// freeze[3]: Flag that kppp does not learn
-	void update_weights(uint64_t epoch, const std::array<bool, 4>& freeze);
-
-	// Save the evaluation function parameters to a file.
-	// You can specify the extension added to the end of the file.
-	void save_eval(std::string suffix);
-
-	// Get the current eta.
-	double get_eta();
-
-	// --learning related commands
-
-	// A function that normalizes KK. Note that it is not completely equivalent to the original evaluation function.
-	// By making the values ​​of kkp and kpp as close to zero as possible, the value of the feature factor (which is zero) that did not appear during learning
-	// The idea of ​​ensuring it is valid.
-	void regularize_kk();
-
-#endif
-
-
-}
-
-#endif  // defined(EVAL_NNUE) || defined(EVAL_LEARN)
-
-#endif // _EVALUATE_KPPT_COMMON_H_
diff --git a/src/eval/evaluate_mir_inv_tools.cpp b/src/eval/evaluate_mir_inv_tools.cpp
deleted file mode 100644
index 3b5d3a36..00000000
--- a/src/eval/evaluate_mir_inv_tools.cpp
+++ /dev/null
@@ -1,190 +0,0 @@
-﻿#if defined(EVAL_NNUE) || defined(EVAL_LEARN)
-
-#include "evaluate_mir_inv_tools.h"
-
-namespace Eval
-{
-
-	// --- tables
-
-	// Value when a certain BonaPiece is seen from the other side
-	// BONA_PIECE_INIT is -1, so it must be a signed type.
-	// Even if KPPT is expanded, BonaPiece will not exceed 2^15 for the time being, so int16_t is good.
-	int16_t inv_piece_[Eval::fe_end];
-
-	// Returns the one at the position where a BonaPiece on the board is mirrored.
-	int16_t mir_piece_[Eval::fe_end];
-
-
-	// --- methods
-
-// Returns the value when a certain BonaPiece is seen from the other side
-	Eval::BonaPiece inv_piece(Eval::BonaPiece p) { return (Eval::BonaPiece)inv_piece_[p]; }
-
-	// Returns the one at the position where a BonaPiece on the board is mirrored.
-	Eval::BonaPiece mir_piece(Eval::BonaPiece p) { return (Eval::BonaPiece)mir_piece_[p]; }
-
-	std::function<void()> mir_piece_init_function;
-
-	void init_mir_inv_tables()
-	{
-		// Initialize the mirror and inverse tables.
-
-		// Initialization is limited to once.
-		static bool first = true;
-		if (!first) return;
-		first = false;
-
-		// exchange f and e
-		int t[] = {
-			f_pawn             , e_pawn            ,
-			f_knight           , e_knight          ,
-			f_bishop           , e_bishop          ,
-			f_rook             , e_rook            ,
-			f_queen            , e_queen           ,
-		};
-
-		// Insert uninitialized value.
-		for (BonaPiece p = BONA_PIECE_ZERO; p < fe_end; ++p)
-		{
-			inv_piece_[p] = BONA_PIECE_NOT_INIT;
-
-			// mirror does not work for hand pieces. Just return the original value.
-			mir_piece_[p] = (p < f_pawn) ? p : BONA_PIECE_NOT_INIT;
-		}
-
-		for (BonaPiece p = BONA_PIECE_ZERO; p < fe_end; ++p)
-		{
-			for (int i = 0; i < 32 /* t.size() */; i += 2)
-			{
-				if (t[i] <= p && p < t[i + 1])
-				{
-					Square sq = (Square)(p - t[i]);
-
-					// found!!
-					BonaPiece q = (p < fe_hand_end) ? BonaPiece(sq + t[i + 1]) : (BonaPiece)(Inv(sq) + t[i + 1]);
-					inv_piece_[p] = q;
-					inv_piece_[q] = p;
-
-					/*
-					It's a bit tricky, but regarding p
-										p >= fe_hand_end
-										When.
-
-					For this p, let n be an integer (i in the above code can only be an even number),
-					a) When t[2n + 0] <= p <t[2n + 1], the first piece
-					b) When t[2n + 1] <= p <t[2n + 2], the back piece
-					Is.
-
-					Therefore, if p in the range of a) is set to q = Inv(p-t[2n+0]) + t[2n+1], it becomes the back piece in the box rotated 180 degrees.
-					So inv_piece[] is initialized by swapping p and q.
-					*/
-
-					// There is no mirror for hand pieces.
-					if (p < fe_hand_end)
-						continue;
-
-					BonaPiece r1 = (BonaPiece)(Mir(sq) + t[i]);
-					mir_piece_[p] = r1;
-					mir_piece_[r1] = p;
-
-					BonaPiece p2 = (BonaPiece)(sq + t[i + 1]);
-					BonaPiece r2 = (BonaPiece)(Mir(sq) + t[i + 1]);
-					mir_piece_[p2] = r2;
-					mir_piece_[r2] = p2;
-
-					break;
-				}
-			}
-		}
-
-		if (mir_piece_init_function)
-			mir_piece_init_function();
-
-		for (BonaPiece p = BONA_PIECE_ZERO; p < fe_end; ++p)
-		{
-			// It remains uninitialized. The initialization code in the table above is incorrect.
-			assert(mir_piece_[p] != BONA_PIECE_NOT_INIT && mir_piece_[p] < fe_end);
-			assert(inv_piece_[p] != BONA_PIECE_NOT_INIT && inv_piece_[p] < fe_end);
-
-			// mir and inv return to their original coordinates after being applied twice.
-			assert(mir_piece_[mir_piece_[p]] == p);
-			assert(inv_piece_[inv_piece_[p]] == p);
-
-			// mir->inv->mir->inv must be the original location.
-			assert(p == inv_piece(mir_piece(inv_piece(mir_piece(p)))));
-
-			// inv->mir->inv->mir must be the original location.
-			assert(p == mir_piece(inv_piece(mir_piece(inv_piece(p)))));
-		}
-
-#if 0
-		// Pre-verification that it is okay to mirror the evaluation function
-		// When writing a value, there is an assertion, so if you can't mirror it,
-		// Should get caught in the assert.
-
-		// Apery's WCSC26 evaluation function, kpp p1==0 or p1==20 (0th step on the back)
-		// There is dust in it, and if you don't avoid it, it will get caught in the assert.
-
-		std::unordered_set<BonaPiece> s;
-		vector<int> a = {
-			f_hand_pawn - 1,e_hand_pawn - 1,
-			f_hand_lance - 1, e_hand_lance - 1,
-			f_hand_knight - 1, e_hand_knight - 1,
-			f_hand_silver - 1, e_hand_silver - 1,
-			f_hand_gold - 1, e_hand_gold - 1,
-			f_hand_bishop - 1, e_hand_bishop - 1,
-			f_hand_rook - 1, e_hand_rook - 1,
-		};
-		for (auto b : a)
-			s.insert((BonaPiece)b);
-
-		// Excludes walks, incense, and katsura on the board that do not appear further (Apery also contains garbage here)
-		for (Rank r = RANK_1; r <= RANK_2; ++r)
-			for (File f = FILE_1; f <= FILE_9; ++f)
-			{
-				if (r == RANK_1)
-				{
-					// first step
-					BonaPiece b1 = BonaPiece(f_pawn + (f | r));
-					s.insert(b1);
-					s.insert(inv_piece[b1]);
-
-					// 1st stage incense
-					BonaPiece b2 = BonaPiece(f_lance + (f | r));
-					s.insert(b2);
-					s.insert(inv_piece[b2]);
-				}
-
-				// Katsura on the 1st and 2nd steps
-				BonaPiece b = BonaPiece(f_knight + (f | r));
-				s.insert(b);
-				s.insert(inv_piece[b]);
-			}
-
-		cout << "\nchecking kpp_write()..";
-		for (auto sq : SQ)
-		{
-			cout << sq << ' ';
-			for (BonaPiece p1 = BONA_PIECE_ZERO; p1 < fe_end; ++p1)
-				for (BonaPiece p2 = BONA_PIECE_ZERO; p2 < fe_end; ++p2)
-					if (!s.count(p1) && !s.count(p2))
-						kpp_write(sq, p1, p2, kpp[sq][p1][p2]);
-		}
-		cout << "\nchecking kkp_write()..";
-
-		for (auto sq1 : SQ)
-		{
-			cout << sq1 << ' ';
-			for (auto sq2 : SQ)
-				for (BonaPiece p1 = BONA_PIECE_ZERO; p1 < fe_end; ++p1)
-					if (!s.count(p1))
-						kkp_write(sq1, sq2, p1, kkp[sq1][sq2][p1]);
-		}
-		cout << "..done!" << endl;
-#endif
-	}
-
-}
-
-#endif  // defined(EVAL_NNUE) || defined(EVAL_LEARN)
diff --git a/src/eval/evaluate_mir_inv_tools.h b/src/eval/evaluate_mir_inv_tools.h
deleted file mode 100644
index 826164bf..00000000
--- a/src/eval/evaluate_mir_inv_tools.h
+++ /dev/null
@@ -1,47 +0,0 @@
-﻿#ifndef _EVALUATE_MIR_INV_TOOLS_
-#define _EVALUATE_MIR_INV_TOOLS_
-
-#if defined(EVAL_NNUE) || defined(EVAL_LEARN)
-
-// BonaPiece's mirror (horizontal flip) and inverse (180° on the board) tools to get pieces.
-
-#include "../types.h"
-#include "../evaluate.h"
-#include <functional>
-
-namespace Eval
-{
-	// -------------------------------------------------
-	//                  tables
-	// -------------------------------------------------
-
-	// --- Provide Mirror and Inverse to BonaPiece.
-
-	// These arrays are initialized by calling init() or init_mir_inv_tables();.
-	// If you want to use only this table from the evaluation function,
-	// Call init_mir_inv_tables().
-	// These arrays are referenced from the KK/KKP/KPP classes below.
-
-	// Returns the value when a certain BonaPiece is seen from the other side
-	extern Eval::BonaPiece inv_piece(Eval::BonaPiece p);
-
-	// Returns the one at the position where a BonaPiece on the board is mirrored.
-	extern Eval::BonaPiece mir_piece(Eval::BonaPiece p);
-
-
-	// callback called when initializing mir_piece/inv_piece
-	// Used when extending fe_end on the user side.
-	// Inv_piece_ and inv_piece_ are exposed because they are necessary for this initialization.
-	// At the timing when mir_piece_init_function is called, until fe_old_end
-	// It is guaranteed that these tables have been initialized.
-	extern std::function<void()> mir_piece_init_function;
-	extern int16_t mir_piece_[Eval::fe_end];
-	extern int16_t inv_piece_[Eval::fe_end];
-
-	// The table above will be initialized when you call this function explicitly or call init().
-	extern void init_mir_inv_tables();
-}
-
-#endif  // defined(EVAL_NNUE) || defined(EVAL_LEARN)
-
-#endif
diff --git a/src/eval/nnue/architectures/halfkp-cr-ep_256x2-32-32.h b/src/eval/nnue/architectures/halfkp-cr-ep_256x2-32-32.h
deleted file mode 100644
index 37b155d5..00000000
--- a/src/eval/nnue/architectures/halfkp-cr-ep_256x2-32-32.h
+++ /dev/null
@@ -1,42 +0,0 @@
-// Definition of input features and network structure used in NNUE evaluation function
-
-#ifndef HALFKP_CR_EP_256X2_32_32_H
-#define HALFKP_CR_EP_256X2_32_32_H
-
-#include "../features/feature_set.h"
-#include "../features/half_kp.h"
-#include "../features/castling_right.h"
-#include "../features/enpassant.h"
-
-#include "../layers/input_slice.h"
-#include "../layers/affine_transform.h"
-#include "../layers/clipped_relu.h"
-
-namespace Eval {
-
-  namespace NNUE {
-
-    // Input features used in evaluation function
-    using RawFeatures = Features::FeatureSet<
-      Features::HalfKP<Features::Side::kFriend>, Features::CastlingRight,
-      Features::EnPassant>;
-
-    // Number of input feature dimensions after conversion
-    constexpr IndexType kTransformedFeatureDimensions = 256;
-
-    namespace Layers {
-
-      // define network structure
-      using InputLayer = InputSlice<kTransformedFeatureDimensions * 2>;
-      using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
-      using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
-      using OutputLayer = AffineTransform<HiddenLayer2, 1>;
-
-    }  // namespace Layers
-
-    using Network = Layers::OutputLayer;
-
-  }  // namespace NNUE
-
-}  // namespace Eval
-#endif // HALFKP_CR_EP_256X2_32_32_H
diff --git a/src/eval/nnue/architectures/k-p-cr-ep_256x2-32-32.h b/src/eval/nnue/architectures/k-p-cr-ep_256x2-32-32.h
deleted file mode 100644
index e178b57b..00000000
--- a/src/eval/nnue/architectures/k-p-cr-ep_256x2-32-32.h
+++ /dev/null
@@ -1,42 +0,0 @@
-// Definition of input features and network structure used in NNUE evaluation function
-
-#ifndef K_P_CR_EP_256X2_32_32_H
-#define K_P_CR_EP_256X2_32_32_H
-
-#include "../features/feature_set.h"
-#include "../features/k.h"
-#include "../features/p.h"
-#include "../features/castling_right.h"
-#include "../features/enpassant.h"
-
-#include "../layers/input_slice.h"
-#include "../layers/affine_transform.h"
-#include "../layers/clipped_relu.h"
-
-namespace Eval {
-
-  namespace NNUE {
-
-    // Input features used in evaluation function
-    using RawFeatures = Features::FeatureSet<Features::K, Features::P,
-      Features::CastlingRight, Features::EnPassant>;
-
-    // Number of input feature dimensions after conversion
-    constexpr IndexType kTransformedFeatureDimensions = 256;
-
-    namespace Layers {
-
-      // define network structure
-      using InputLayer = InputSlice<kTransformedFeatureDimensions * 2>;
-      using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
-      using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
-      using OutputLayer = AffineTransform<HiddenLayer2, 1>;
-
-    }  // namespace Layers
-
-    using Network = Layers::OutputLayer;
-
-  }  // namespace NNUE
-
-}  // namespace Eval
-#endif // K_P_CR_EP_256X2_32_32_H
diff --git a/src/eval/nnue/architectures/k-p-cr_256x2-32-32.h b/src/eval/nnue/architectures/k-p-cr_256x2-32-32.h
deleted file mode 100644
index d3c187c0..00000000
--- a/src/eval/nnue/architectures/k-p-cr_256x2-32-32.h
+++ /dev/null
@@ -1,41 +0,0 @@
-// Definition of input features and network structure used in NNUE evaluation function
-
-#ifndef K_P_CR_256X2_32_32_H
-#define K_P_CR_256X2_32_32_H
-
-#include "../features/feature_set.h"
-#include "../features/k.h"
-#include "../features/p.h"
-#include "../features/castling_right.h"
-
-#include "../layers/input_slice.h"
-#include "../layers/affine_transform.h"
-#include "../layers/clipped_relu.h"
-
-namespace Eval {
-
-  namespace NNUE {
-
-    // Input features used in evaluation function
-    using RawFeatures = Features::FeatureSet<Features::K, Features::P,
-      Features::CastlingRight>;
-
-    // Number of input feature dimensions after conversion
-    constexpr IndexType kTransformedFeatureDimensions = 256;
-
-    namespace Layers {
-
-      // define network structure
-      using InputLayer = InputSlice<kTransformedFeatureDimensions * 2>;
-      using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
-      using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
-      using OutputLayer = AffineTransform<HiddenLayer2, 1>;
-
-    }  // namespace Layers
-
-    using Network = Layers::OutputLayer;
-
-  }  // namespace NNUE
-
-}  // namespace Eval
-#endif // K_P_CR_256X2_32_32_H
diff --git a/src/eval/nnue/architectures/k-p_256x2-32-32.h b/src/eval/nnue/architectures/k-p_256x2-32-32.h
deleted file mode 100644
index 00b14d47..00000000
--- a/src/eval/nnue/architectures/k-p_256x2-32-32.h
+++ /dev/null
@@ -1,38 +0,0 @@
-﻿// Definition of input features and network structure used in NNUE evaluation function
-#ifndef K_P_256X2_32_32_H
-#define K_P_256X2_32_32_H
-
-#include "../features/feature_set.h"
-#include "../features/k.h"
-#include "../features/p.h"
-
-#include "../layers/input_slice.h"
-#include "../layers/affine_transform.h"
-#include "../layers/clipped_relu.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-// Input features used in evaluation function
-using RawFeatures = Features::FeatureSet<Features::K, Features::P>;
-
-// Number of input feature dimensions after conversion
-constexpr IndexType kTransformedFeatureDimensions = 256;
-
-namespace Layers {
-
-// define network structure
-using InputLayer = InputSlice<kTransformedFeatureDimensions * 2>;
-using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
-using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
-using OutputLayer = AffineTransform<HiddenLayer2, 1>;
-
-}  // namespace Layers
-
-using Network = Layers::OutputLayer;
-
-}  // namespace NNUE
-
-}  // namespace Eval
-#endif // K_P_256X2_32_32_H
diff --git a/src/eval/nnue/evaluate_nnue.cpp b/src/eval/nnue/evaluate_nnue.cpp
deleted file mode 100644
index 55e627d0..00000000
--- a/src/eval/nnue/evaluate_nnue.cpp
+++ /dev/null
@@ -1,326 +0,0 @@
-﻿// Code for calculating NNUE evaluation function
-
-#if defined(EVAL_NNUE)
-
-#include <fstream>
-#include <iostream>
-
-#include "../../evaluate.h"
-#include "../../position.h"
-#include "../../misc.h"
-#include "../../uci.h"
-
-#include "evaluate_nnue.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-// Input feature converter
-AlignedPtr<FeatureTransformer> feature_transformer;
-
-// Evaluation function
-AlignedPtr<Network> network;
-
-// Evaluation function file name
-std::string fileName = "nn.bin";
-
-// Saved evaluation function file name
-std::string savedfileName = "nn.bin";
-
-// Get a string that represents the structure of the evaluation function
-std::string GetArchitectureString() {
-  return "Features=" + FeatureTransformer::GetStructureString() +
-      ",Network=" + Network::GetStructureString();
-}
-
-namespace {
-
-namespace Detail {
-
-// Initialize the evaluation function parameters
-template <typename T>
-void Initialize(AlignedPtr<T>& pointer) {
-  pointer.reset(reinterpret_cast<T*>(aligned_malloc(sizeof(T), alignof(T))));
-  std::memset(pointer.get(), 0, sizeof(T));
-}
-
-// read evaluation function parameters
-template <typename T>
-bool ReadParameters(std::istream& stream, const AlignedPtr<T>& pointer) {
-  std::uint32_t header;
-  stream.read(reinterpret_cast<char*>(&header), sizeof(header));
-  if (!stream || header != T::GetHashValue()) return false;
-  return pointer->ReadParameters(stream);
-}
-
-// write evaluation function parameters
-template <typename T>
-bool WriteParameters(std::ostream& stream, const AlignedPtr<T>& pointer) {
-  constexpr std::uint32_t header = T::GetHashValue();
-  stream.write(reinterpret_cast<const char*>(&header), sizeof(header));
-  return pointer->WriteParameters(stream);
-}
-
-}  // namespace Detail
-
-// Initialize the evaluation function parameters
-void Initialize() {
-  Detail::Initialize(feature_transformer);
-  Detail::Initialize(network);
-}
-
-}  // namespace
-
-// read the header
-bool ReadHeader(std::istream& stream,
-  std::uint32_t* hash_value, std::string* architecture) {
-  std::uint32_t version, size;
-  stream.read(reinterpret_cast<char*>(&version), sizeof(version));
-  stream.read(reinterpret_cast<char*>(hash_value), sizeof(*hash_value));
-  stream.read(reinterpret_cast<char*>(&size), sizeof(size));
-  if (!stream || version != kVersion) return false;
-  architecture->resize(size);
-  stream.read(&(*architecture)[0], size);
-  return !stream.fail();
-}
-
-// write the header
-bool WriteHeader(std::ostream& stream,
-  std::uint32_t hash_value, const std::string& architecture) {
-  stream.write(reinterpret_cast<const char*>(&kVersion), sizeof(kVersion));
-  stream.write(reinterpret_cast<const char*>(&hash_value), sizeof(hash_value));
-  const std::uint32_t size = static_cast<std::uint32_t>(architecture.size());
-  stream.write(reinterpret_cast<const char*>(&size), sizeof(size));
-  stream.write(architecture.data(), size);
-  return !stream.fail();
-}
-
-// read evaluation function parameters
-bool ReadParameters(std::istream& stream) {
-  std::uint32_t hash_value;
-  std::string architecture;
-  if (!ReadHeader(stream, &hash_value, &architecture)) return false;
-  if (hash_value != kHashValue) return false;
-  if (!Detail::ReadParameters(stream, feature_transformer)) return false;
-  if (!Detail::ReadParameters(stream, network)) return false;
-  return stream && stream.peek() == std::ios::traits_type::eof();
-}
-
-// write evaluation function parameters
-bool WriteParameters(std::ostream& stream) {
-  if (!WriteHeader(stream, kHashValue, GetArchitectureString())) return false;
-  if (!Detail::WriteParameters(stream, feature_transformer)) return false;
-  if (!Detail::WriteParameters(stream, network)) return false;
-  return !stream.fail();
-}
-
-// proceed if you can calculate the difference
-static void UpdateAccumulatorIfPossible(const Position& pos) {
-  feature_transformer->UpdateAccumulatorIfPossible(pos);
-}
-
-// Calculate the evaluation value
-static Value ComputeScore(const Position& pos, bool refresh = false) {
-  auto& accumulator = pos.state()->accumulator;
-  if (!refresh && accumulator.computed_score) {
-    return accumulator.score;
-  }
-
-  alignas(kCacheLineSize) TransformedFeatureType
-      transformed_features[FeatureTransformer::kBufferSize];
-  feature_transformer->Transform(pos, transformed_features, refresh);
-  alignas(kCacheLineSize) char buffer[Network::kBufferSize];
-  const auto output = network->Propagate(transformed_features, buffer);
-
-  // When a value larger than VALUE_MAX_EVAL is returned, aspiration search fails high
-  // It should be guaranteed that it is less than VALUE_MAX_EVAL because the search will not end.
-
-  // Even if this phenomenon occurs, if the seconds are fixed when playing, the search will be aborted there, so
-  // The best move in the previous iteration is pointed to as bestmove, so apparently
-  // no problem. The situation in which this VALUE_MAX_EVAL is returned is almost at a dead end,
-  // Since such a jamming phase often appears at the end, there is a big difference in the situation
-  // Doesn't really affect the outcome.
-
-  // However, when searching with a fixed depth such as when creating a teacher, it will not return from the search
-  // Waste the computation time for that thread. Also, it will be timed out with fixed depth game.
-
-  auto score = static_cast<Value>(output[0] / FV_SCALE);
-
-  // 1) I feel that if I clip too poorly, it will have an effect on my learning...
-  // 2) Since accumulator.score is not used at the time of difference calculation, it can be rewritten without any problem.
-  score = Math::clamp(score , -VALUE_MAX_EVAL , VALUE_MAX_EVAL);
-
-  accumulator.score = score;
-  accumulator.computed_score = true;
-  return accumulator.score;
-}
-
-} // namespace NNUE
-
-#if defined(USE_EVAL_HASH)
-// Class used to store evaluation values ​​in HashTable
-struct alignas(16) ScoreKeyValue {
-#if defined(USE_SSE2)
-  ScoreKeyValue() = default;
-  ScoreKeyValue(const ScoreKeyValue& other) {
-    static_assert(sizeof(ScoreKeyValue) == sizeof(__m128i),
-                  "sizeof(ScoreKeyValue) should be equal to sizeof(__m128i)");
-    _mm_store_si128(&as_m128i, other.as_m128i);
-  }
-  ScoreKeyValue& operator=(const ScoreKeyValue& other) {
-    _mm_store_si128(&as_m128i, other.as_m128i);
-    return *this;
-  }
-#endif
-
-  // It is necessary to be able to operate atomically with evaluate hash, so the manipulator for that
-  void encode() {
-#if defined(USE_SSE2)
-    // ScoreKeyValue is copied to atomic, so if the key matches, the data matches.
-#else
-    key ^= score;
-#endif
-  }
-  // decode() is the reverse conversion of encode(), but since it is xor, the reverse conversion is the same.
-  void decode() { encode(); }
-
-  union {
-    struct {
-      std::uint64_t key;
-      std::uint64_t score;
-    };
-#if defined(USE_SSE2)
-    __m128i as_m128i;
-#endif
-  };
-};
-
-// Simple HashTable implementation.
-// Size is a power of 2.
-template <typename T, size_t Size>
-struct HashTable {
-  HashTable() { clear(); }
-  T* operator [] (const Key k) { return entries_ + (static_cast<size_t>(k) & (Size - 1)); }
-  void clear() { memset(entries_, 0, sizeof(T)*Size); }
-
-  // Check that Size is a power of 2
-  static_assert((Size & (Size - 1)) == 0, "");
-
- private:
-  T entries_[Size];
-};
-
-//HashTable to save the evaluated ones (following ehash)
-
-#if !defined(USE_LARGE_EVAL_HASH)
-// 134MB (setting other than witch's AVX2)
-struct EvaluateHashTable : HashTable<ScoreKeyValue, 0x800000> {};
-#else
-// If you have prefetch, it's better to have a big one...
-// → It doesn't change much and the memory is wasteful, so is it okay to set ↑ by default?
-// 1GB (setting for witch's AVX2)
-struct EvaluateHashTable : HashTable<ScoreKeyValue, 0x4000000> {};
-#endif
-
-EvaluateHashTable g_evalTable;
-
-// Prepare a function to prefetch.
-void prefetch_evalhash(const Key key) {
-  constexpr auto mask = ~((uint64_t)0x1f);
-  prefetch((void*)((uint64_t)g_evalTable[key] & mask));
-}
-#endif
-
-// read the evaluation function file
-// Save and restore Options with bench command etc., so EvalDir is changed at this time,
-// This function may be called twice to flag that the evaluation function needs to be reloaded.
-void load_eval() {
-
-  // Must be done!
-  NNUE::Initialize();
-
-  if (Options["SkipLoadingEval"])
-  {
-      std::cout << "info string SkipLoadingEval set to true, Net not loaded!" << std::endl;
-      return;
-  }
-
-  const std::string file_name = Options["EvalFile"];
-  NNUE::fileName = file_name;
-
-  std::ifstream stream(file_name, std::ios::binary);
-  const bool result = NNUE::ReadParameters(stream);
-
-  if (!result)
-      // It's a problem if it doesn't finish when there is a read error.
-      std::cout << "Error! " << NNUE::fileName << " not found or wrong format" << std::endl;
-
-  else
-      std::cout << "info string NNUE " << NNUE::fileName << " found & loaded" << std::endl;
-}
-
-// Initialization
-void init() {
-}
-
-// Evaluation function. Perform full calculation instead of difference calculation.
-// Called only once with Position::set(). (The difference calculation after that)
-// Note that the evaluation value seen from the turn side is returned. (Design differs from other evaluation functions in this respect)
-// Since, we will not try to optimize this function.
-Value compute_eval(const Position& pos) {
-  return NNUE::ComputeScore(pos, true);
-}
-
-// Evaluation function
-Value evaluate(const Position& pos) {
-  const auto& accumulator = pos.state()->accumulator;
-  if (accumulator.computed_score) {
-    return accumulator.score;
-  }
-
-#if defined(USE_GLOBAL_OPTIONS)
-  // If Global Options is set not to use eval hash
-  // Skip the query to the eval hash.
-  if (!GlobalOptions.use_eval_hash) {
-    ASSERT_LV5(pos.state()->materialValue == Eval::material(pos));
-    return NNUE::ComputeScore(pos);
-  }
-#endif
-
-#if defined(USE_EVAL_HASH)
-  // May be in the evaluate hash table.
-  const Key key = pos.key();
-  ScoreKeyValue entry = *g_evalTable[key];
-  entry.decode();
-  if (entry.key == key) {
-    // there were!
-    return Value(entry.score);
-  }
-#endif
-
-  Value score = NNUE::ComputeScore(pos);
-#if defined(USE_EVAL_HASH)
-  // Since it was calculated carefully, save it in the evaluate hash table.
-  entry.key = key;
-  entry.score = score;
-  entry.encode();
-  *g_evalTable[key] = entry;
-#endif
-
-  return score;
-}
-
-// proceed if you can calculate the difference
-void evaluate_with_no_return(const Position& pos) {
-  NNUE::UpdateAccumulatorIfPossible(pos);
-}
-
-// display the breakdown of the evaluation value of the current phase
-void print_eval_stat(Position& /*pos*/) {
-  std::cout << "--- EVAL STAT: not implemented" << std::endl;
-}
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
diff --git a/src/eval/nnue/evaluate_nnue.h b/src/eval/nnue/evaluate_nnue.h
deleted file mode 100644
index d474a8ae..00000000
--- a/src/eval/nnue/evaluate_nnue.h
+++ /dev/null
@@ -1,67 +0,0 @@
-﻿// header used in NNUE evaluation function
-
-#ifndef _EVALUATE_NNUE_H_
-#define _EVALUATE_NNUE_H_
-
-#if defined(EVAL_NNUE)
-
-#include "nnue_feature_transformer.h"
-#include "nnue_architecture.h"
-
-#include <memory>
-
-namespace Eval {
-
-namespace NNUE {
-
-// hash value of evaluation function structure
-constexpr std::uint32_t kHashValue =
-    FeatureTransformer::GetHashValue() ^ Network::GetHashValue();
-
-// Deleter for automating release of memory area
-template <typename T>
-struct AlignedDeleter {
-  void operator()(T* ptr) const {
-    ptr->~T();
-    aligned_free(ptr);
-  }
-};
-template <typename T>
-using AlignedPtr = std::unique_ptr<T, AlignedDeleter<T>>;
-
-// Input feature converter
-extern AlignedPtr<FeatureTransformer> feature_transformer;
-
-// Evaluation function
-extern AlignedPtr<Network> network;
-
-// Evaluation function file name
-extern std::string fileName;
-
-// Saved evaluation function file name
-extern std::string savedfileName;
-
-// Get a string that represents the structure of the evaluation function
-std::string GetArchitectureString();
-
-// read the header
-bool ReadHeader(std::istream& stream,
-    std::uint32_t* hash_value, std::string* architecture);
-
-// write the header
-bool WriteHeader(std::ostream& stream,
-    std::uint32_t hash_value, const std::string& architecture);
-
-// read evaluation function parameters
-bool ReadParameters(std::istream& stream);
-
-// write evaluation function parameters
-bool WriteParameters(std::ostream& stream);
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
diff --git a/src/eval/nnue/evaluate_nnue_learner.cpp b/src/eval/nnue/evaluate_nnue_learner.cpp
deleted file mode 100644
index 3297037d..00000000
--- a/src/eval/nnue/evaluate_nnue_learner.cpp
+++ /dev/null
@@ -1,231 +0,0 @@
-﻿// Code for learning NNUE evaluation function
-
-#if defined(EVAL_LEARN) && defined(EVAL_NNUE)
-
-#include <random>
-#include <fstream>
-
-#include "../../learn/learn.h"
-#include "../../learn/learning_tools.h"
-
-#include "../../position.h"
-#include "../../uci.h"
-#include "../../misc.h"
-#include "../../thread_win32_osx.h"
-
-#include "../evaluate_common.h"
-
-#include "evaluate_nnue.h"
-#include "evaluate_nnue_learner.h"
-#include "trainer/features/factorizer_feature_set.h"
-#include "trainer/features/factorizer_half_kp.h"
-#include "trainer/trainer_feature_transformer.h"
-#include "trainer/trainer_input_slice.h"
-#include "trainer/trainer_affine_transform.h"
-#include "trainer/trainer_clipped_relu.h"
-#include "trainer/trainer_sum.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-namespace {
-
-// learning data
-std::vector<Example> examples;
-
-// Mutex for exclusive control of examples
-std::mutex examples_mutex;
-
-// number of samples in mini-batch
-uint64_t batch_size;
-
-// random number generator
-std::mt19937 rng;
-
-// learner
-std::shared_ptr<Trainer<Network>> trainer;
-
-// Learning rate scale
-double global_learning_rate_scale;
-
-// Get the learning rate scale
-double GetGlobalLearningRateScale() {
-  return global_learning_rate_scale;
-}
-
-// Tell the learner options such as hyperparameters
-void SendMessages(std::vector<Message> messages) {
-  for (auto& message : messages) {
-    trainer->SendMessage(&message);
-    assert(message.num_receivers > 0);
-  }
-}
-
-}  // namespace
-
-// Initialize learning
-void InitializeTraining(double eta1, uint64_t eta1_epoch,
-                        double eta2, uint64_t eta2_epoch, double eta3) {
-  std::cout << "Initializing NN training for "
-            << GetArchitectureString() << std::endl;
-
-  assert(feature_transformer);
-  assert(network);
-  trainer = Trainer<Network>::Create(network.get(), feature_transformer.get());
-
-  if (Options["SkipLoadingEval"]) {
-    trainer->Initialize(rng);
-  }
-
-  global_learning_rate_scale = 1.0;
-  EvalLearningTools::Weight::init_eta(eta1, eta2, eta3, eta1_epoch, eta2_epoch);
-}
-
-// set the number of samples in the mini-batch
-void SetBatchSize(uint64_t size) {
-  assert(size > 0);
-  batch_size = size;
-}
-
-// set the learning rate scale
-void SetGlobalLearningRateScale(double scale) {
-  global_learning_rate_scale = scale;
-}
-
-// Set options such as hyperparameters
-void SetOptions(const std::string& options) {
-  std::vector<Message> messages;
-  for (const auto& option : Split(options, ',')) {
-    const auto fields = Split(option, '=');
-    assert(fields.size() == 1 || fields.size() == 2);
-    if (fields.size() == 1) {
-      messages.emplace_back(fields[0]);
-    } else {
-      messages.emplace_back(fields[0], fields[1]);
-    }
-  }
-  SendMessages(std::move(messages));
-}
-
-// Reread the evaluation function parameters for learning from the file
-void RestoreParameters(const std::string& dir_name) {
-  const std::string file_name = Path::Combine(dir_name, NNUE::savedfileName);
-  std::ifstream stream(file_name, std::ios::binary);
-  bool result = ReadParameters(stream);
-  assert(result);
-
-  SendMessages({{"reset"}});
-}
-
-// Add 1 sample of learning data
-void AddExample(Position& pos, Color rootColor,
-                const Learner::PackedSfenValue& psv, double weight) {
-  Example example;
-  if (rootColor == pos.side_to_move()) {
-    example.sign = 1;
-  } else {
-    example.sign = -1;
-  }
-  example.psv = psv;
-  example.weight = weight;
-
-  Features::IndexList active_indices[2];
-  for (const auto trigger : kRefreshTriggers) {
-    RawFeatures::AppendActiveIndices(pos, trigger, active_indices);
-  }
-  if (pos.side_to_move() != WHITE) {
-    active_indices[0].swap(active_indices[1]);
-  }
-  for (const auto color : Colors) {
-    std::vector<TrainingFeature> training_features;
-    for (const auto base_index : active_indices[color]) {
-      static_assert(Features::Factorizer<RawFeatures>::GetDimensions() <
-                    (1 << TrainingFeature::kIndexBits), "");
-      Features::Factorizer<RawFeatures>::AppendTrainingFeatures(
-          base_index, &training_features);
-    }
-    std::sort(training_features.begin(), training_features.end());
-
-    auto& unique_features = example.training_features[color];
-    for (const auto& feature : training_features) {
-      if (!unique_features.empty() &&
-          feature.GetIndex() == unique_features.back().GetIndex()) {
-        unique_features.back() += feature;
-      } else {
-        unique_features.push_back(feature);
-      }
-    }
-  }
-
-  std::lock_guard<std::mutex> lock(examples_mutex);
-  examples.push_back(std::move(example));
-}
-
-// update the evaluation function parameters
-void UpdateParameters(uint64_t epoch) {
-  assert(batch_size > 0);
-
-  EvalLearningTools::Weight::calc_eta(epoch);
-  const auto learning_rate = static_cast<LearnFloatType>(
-      get_eta() / batch_size);
-
-  std::lock_guard<std::mutex> lock(examples_mutex);
-  std::shuffle(examples.begin(), examples.end(), rng);
-  while (examples.size() >= batch_size) {
-    std::vector<Example> batch(examples.end() - batch_size, examples.end());
-    examples.resize(examples.size() - batch_size);
-
-    const auto network_output = trainer->Propagate(batch);
-
-    std::vector<LearnFloatType> gradients(batch.size());
-    for (std::size_t b = 0; b < batch.size(); ++b) {
-      const auto shallow = static_cast<Value>(Round<std::int32_t>(
-          batch[b].sign * network_output[b] * kPonanzaConstant));
-      const auto& psv = batch[b].psv;
-      const double gradient = batch[b].sign * Learner::calc_grad(shallow, psv);
-      gradients[b] = static_cast<LearnFloatType>(gradient * batch[b].weight);
-    }
-
-    trainer->Backpropagate(gradients.data(), learning_rate);
-  }
-  SendMessages({{"quantize_parameters"}});
-}
-
-// Check if there are any problems with learning
-void CheckHealth() {
-  SendMessages({{"check_health"}});
-}
-
-}  // namespace NNUE
-
-// save merit function parameters to a file
-void save_eval(std::string dir_name) {
-  auto eval_dir = Path::Combine(Options["EvalSaveDir"], dir_name);
-  std::cout << "save_eval() start. folder = " << eval_dir << std::endl;
-
-  // mkdir() will fail if this folder already exists, but
-  // Apart from that. If not, I just want you to make it.
-  // Also, assume that the folders up to EvalSaveDir have been dug.
-  Dependency::mkdir(eval_dir);
-
-  if (Options["SkipLoadingEval"] && NNUE::trainer) {
-    NNUE::SendMessages({{"clear_unobserved_feature_weights"}});
-  }
-
-  const std::string file_name = Path::Combine(eval_dir, NNUE::savedfileName);
-  std::ofstream stream(file_name, std::ios::binary);
-  const bool result = NNUE::WriteParameters(stream);
-  assert(result);
-
-  std::cout << "save_eval() finished. folder = " << eval_dir << std::endl;
-}
-
-// get the current eta
-double get_eta() {
-  return NNUE::GetGlobalLearningRateScale() * EvalLearningTools::Weight::eta;
-}
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_LEARN) && defined(EVAL_NNUE)
diff --git a/src/eval/nnue/evaluate_nnue_learner.h b/src/eval/nnue/evaluate_nnue_learner.h
deleted file mode 100644
index ace66524..00000000
--- a/src/eval/nnue/evaluate_nnue_learner.h
+++ /dev/null
@@ -1,46 +0,0 @@
-﻿// Interface used for learning NNUE evaluation function
-
-#ifndef _EVALUATE_NNUE_LEARNER_H_
-#define _EVALUATE_NNUE_LEARNER_H_
-
-#if defined(EVAL_LEARN) && defined(EVAL_NNUE)
-
-#include "../../learn/learn.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-// Initialize learning
-void InitializeTraining(double eta1, uint64_t eta1_epoch,
-                        double eta2, uint64_t eta2_epoch, double eta3);
-
-// set the number of samples in the mini-batch
-void SetBatchSize(uint64_t size);
-
-// set the learning rate scale
-void SetGlobalLearningRateScale(double scale);
-
-// Set options such as hyperparameters
-void SetOptions(const std::string& options);
-
-// Reread the evaluation function parameters for learning from the file
-void RestoreParameters(const std::string& dir_name);
-
-// Add 1 sample of learning data
-void AddExample(Position& pos, Color rootColor,
-                const Learner::PackedSfenValue& psv, double weight);
-
-// update the evaluation function parameters
-void UpdateParameters(uint64_t epoch);
-
-// Check if there are any problems with learning
-void CheckHealth();
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_LEARN) && defined(EVAL_NNUE)
-
-#endif
diff --git a/src/eval/nnue/features/castling_right.cpp b/src/eval/nnue/features/castling_right.cpp
deleted file mode 100644
index ee7b6576..00000000
--- a/src/eval/nnue/features/castling_right.cpp
+++ /dev/null
@@ -1,73 +0,0 @@
-//Definition of input feature quantity K of NNUE evaluation function
-
-#if defined(EVAL_NNUE)
-
-#include "castling_right.h"
-#include "index_list.h"
-
-namespace Eval {
-
-  namespace NNUE {
-
-    namespace Features {
-
-      // Get a list of indices with a value of 1 among the features
-      void CastlingRight::AppendActiveIndices(
-        const Position& pos, Color perspective, IndexList* active) {
-        // do nothing if array size is small to avoid compiler warning
-        if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;
-
-        int castling_rights = pos.state()->castlingRights;
-        int relative_castling_rights;
-        if (perspective == WHITE) {
-          relative_castling_rights = castling_rights;
-        }
-        else {
-          // Invert the perspective.
-          relative_castling_rights = ((castling_rights & 3) << 2)
-            & ((castling_rights >> 2) & 3);
-        }
-
-        for (int i = 0; i <kDimensions; ++i) {
-          if (relative_castling_rights & (i << 1)) {
-            active->push_back(i);
-          }
-        }
-      }
-
-      // Get a list of indices whose values ​​have changed from the previous one in the feature quantity
-      void CastlingRight::AppendChangedIndices(
-        const Position& pos, Color perspective,
-        IndexList* removed, IndexList* added) {
-
-        int previous_castling_rights = pos.state()->previous->castlingRights;
-        int current_castling_rights = pos.state()->castlingRights;
-        int relative_previous_castling_rights;
-        int relative_current_castling_rights;
-        if (perspective == WHITE) {
-          relative_previous_castling_rights = previous_castling_rights;
-          relative_current_castling_rights = current_castling_rights;
-        }
-        else {
-          // Invert the perspective.
-          relative_previous_castling_rights = ((previous_castling_rights & 3) << 2)
-            & ((previous_castling_rights >> 2) & 3);
-          relative_current_castling_rights = ((current_castling_rights & 3) << 2)
-            & ((current_castling_rights >> 2) & 3);
-        }
-
-        for (int i = 0; i < kDimensions; ++i) {
-          if ((relative_previous_castling_rights & (i << 1)) &&
-            (relative_current_castling_rights & (i << 1)) == 0) {
-            removed->push_back(i);
-          }
-        }
-      }
-
-    }  // namespace Features
-
-  }  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
diff --git a/src/eval/nnue/features/castling_right.h b/src/eval/nnue/features/castling_right.h
deleted file mode 100644
index 709d4688..00000000
--- a/src/eval/nnue/features/castling_right.h
+++ /dev/null
@@ -1,48 +0,0 @@
-//Definition of input feature quantity K of NNUE evaluation function
-
-#ifndef _NNUE_FEATURES_CASTLING_RIGHT_H_
-#define _NNUE_FEATURES_CASTLING_RIGHT_H_
-
-#if defined(EVAL_NNUE)
-
-#include "../../../evaluate.h"
-#include "features_common.h"
-
-namespace Eval {
-
-  namespace NNUE {
-
-    namespace Features {
-
-      // Feature K: Ball position
-      class CastlingRight {
-      public:
-        // feature quantity name
-        static constexpr const char* kName = "CastlingRight";
-        // Hash value embedded in the evaluation function file
-        static constexpr std::uint32_t kHashValue = 0x913968AAu;
-        // number of feature dimensions
-        static constexpr IndexType kDimensions = 4;
-        // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
-        static constexpr IndexType kMaxActiveDimensions = 4;
-        // Timing of full calculation instead of difference calculation
-        static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kNone;
-
-        // Get a list of indices with a value of 1 among the features
-        static void AppendActiveIndices(const Position& pos, Color perspective,
-          IndexList* active);
-
-        // Get a list of indices whose values ??have changed from the previous one in the feature quantity
-        static void AppendChangedIndices(const Position& pos, Color perspective,
-          IndexList* removed, IndexList* added);
-      };
-
-    }  // namespace Features
-
-  }  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
diff --git a/src/eval/nnue/features/enpassant.cpp b/src/eval/nnue/features/enpassant.cpp
deleted file mode 100644
index 82a4158e..00000000
--- a/src/eval/nnue/features/enpassant.cpp
+++ /dev/null
@@ -1,47 +0,0 @@
-//Definition of input feature quantity K of NNUE evaluation function
-
-#if defined(EVAL_NNUE)
-
-#include "enpassant.h"
-#include "index_list.h"
-
-namespace Eval {
-
-  namespace NNUE {
-
-    namespace Features {
-
-      // Get a list of indices with a value of 1 among the features
-      void EnPassant::AppendActiveIndices(
-        const Position& pos, Color perspective, IndexList* active) {
-        // do nothing if array size is small to avoid compiler warning
-        if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;
-
-        auto epSquare = pos.state()->epSquare;
-        if (epSquare == SQ_NONE) {
-          return;
-        }
-
-        if (perspective == BLACK) {
-          epSquare = Inv(epSquare);
-        }
-
-        auto file = file_of(epSquare);
-        active->push_back(file);
-      }
-
-      // Get a list of indices whose values ??have changed from the previous one in the feature quantity
-      void EnPassant::AppendChangedIndices(
-        const Position& pos, Color perspective,
-        IndexList* removed, IndexList* added) {
-        // Not implemented.
-        assert(false);
-      }
-
-    }  // namespace Features
-
-  }  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
diff --git a/src/eval/nnue/features/enpassant.h b/src/eval/nnue/features/enpassant.h
deleted file mode 100644
index 51880bb4..00000000
--- a/src/eval/nnue/features/enpassant.h
+++ /dev/null
@@ -1,48 +0,0 @@
-//Definition of input feature quantity K of NNUE evaluation function
-
-#ifndef _NNUE_FEATURES_ENPASSANT_H_
-#define _NNUE_FEATURES_ENPASSANT_H_
-
-#if defined(EVAL_NNUE)
-
-#include "../../../evaluate.h"
-#include "features_common.h"
-
-namespace Eval {
-
-  namespace NNUE {
-
-    namespace Features {
-
-      // Feature K: Ball position
-      class EnPassant {
-      public:
-        // feature quantity name
-        static constexpr const char* kName = "EnPassant";
-        // Hash value embedded in the evaluation function file
-        static constexpr std::uint32_t kHashValue = 0x02924F91u;
-        // number of feature dimensions
-        static constexpr IndexType kDimensions = 8;
-        // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
-        static constexpr IndexType kMaxActiveDimensions = 1;
-        // Timing of full calculation instead of difference calculation
-        static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kAnyPieceMoved;
-
-        // Get a list of indices with a value of 1 among the features
-        static void AppendActiveIndices(const Position& pos, Color perspective,
-          IndexList* active);
-
-        // Get a list of indices whose values ??have changed from the previous one in the feature quantity
-        static void AppendChangedIndices(const Position& pos, Color perspective,
-          IndexList* removed, IndexList* added);
-      };
-
-    }  // namespace Features
-
-  }  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
diff --git a/src/eval/nnue/features/feature_set.h b/src/eval/nnue/features/feature_set.h
deleted file mode 100644
index 0430ebfe..00000000
--- a/src/eval/nnue/features/feature_set.h
+++ /dev/null
@@ -1,249 +0,0 @@
-﻿// A class template that represents the input feature set of the NNUE evaluation function
-
-#ifndef _NNUE_FEATURE_SET_H_
-#define _NNUE_FEATURE_SET_H_
-
-#if defined(EVAL_NNUE)
-
-#include "features_common.h"
-#include <array>
-
-namespace Eval {
-
-namespace NNUE {
-
-namespace Features {
-
-// A class template that represents a list of values
-template <typename T, T... Values>
-struct CompileTimeList;
-template <typename T, T First, T... Remaining>
-struct CompileTimeList<T, First, Remaining...> {
-  static constexpr bool Contains(T value) {
-    return value == First || CompileTimeList<T, Remaining...>::Contains(value);
-  }
-  static constexpr std::array<T, sizeof...(Remaining) + 1>
-      kValues = {{First, Remaining...}};
-};
-template <typename T, T First, T... Remaining>
-constexpr std::array<T, sizeof...(Remaining) + 1>
-    CompileTimeList<T, First, Remaining...>::kValues;
-template <typename T>
-struct CompileTimeList<T> {
-  static constexpr bool Contains(T /*value*/) {
-    return false;
-  }
-  static constexpr std::array<T, 0> kValues = {{}};
-};
-
-// Class template that adds to the beginning of the list
-template <typename T, typename ListType, T Value>
-struct AppendToList;
-template <typename T, T... Values, T AnotherValue>
-struct AppendToList<T, CompileTimeList<T, Values...>, AnotherValue> {
-  using Result = CompileTimeList<T, AnotherValue, Values...>;
-};
-
-// Class template for adding to a sorted, unique list
-template <typename T, typename ListType, T Value>
-struct InsertToSet;
-template <typename T, T First, T... Remaining, T AnotherValue>
-struct InsertToSet<T, CompileTimeList<T, First, Remaining...>, AnotherValue> {
-  using Result = std::conditional_t<
-      CompileTimeList<T, First, Remaining...>::Contains(AnotherValue),
-      CompileTimeList<T, First, Remaining...>,
-      std::conditional_t<(AnotherValue <First),
-          CompileTimeList<T, AnotherValue, First, Remaining...>,
-          typename AppendToList<T, typename InsertToSet<
-              T, CompileTimeList<T, Remaining...>, AnotherValue>::Result,
-              First>::Result>>;
-};
-template <typename T, T Value>
-struct InsertToSet<T, CompileTimeList<T>, Value> {
-  using Result = CompileTimeList<T, Value>;
-};
-
-// Base class of feature set
-template <typename Derived>
-class FeatureSetBase {
- public:
-  // Get a list of indices with a value of 1 among the features
-  template <typename IndexListType>
-  static void AppendActiveIndices(
-      const Position& pos, TriggerEvent trigger, IndexListType active[2]) {
-    for (const auto perspective :Colors) {
-      Derived::CollectActiveIndices(
-          pos, trigger, perspective, &active[perspective]);
-    }
-  }
-
-  // Get a list of indices whose values ​​have changed from the previous one in the feature quantity
-  template <typename PositionType, typename IndexListType>
-  static void AppendChangedIndices(
-      const PositionType& pos, TriggerEvent trigger,
-      IndexListType removed[2], IndexListType added[2], bool reset[2]) {
-    const auto& dp = pos.state()->dirtyPiece;
-    if (dp.dirty_num == 0) return;
-
-    for (const auto perspective :Colors) {
-      reset[perspective] = false;
-      switch (trigger) {
-        case TriggerEvent::kNone:
-          break;
-        case TriggerEvent::kFriendKingMoved:
-          reset[perspective] =
-              dp.pieceNo[0] == PIECE_NUMBER_KING + perspective;
-          break;
-        case TriggerEvent::kEnemyKingMoved:
-          reset[perspective] =
-              dp.pieceNo[0] == PIECE_NUMBER_KING + ~perspective;
-          break;
-        case TriggerEvent::kAnyKingMoved:
-          reset[perspective] = dp.pieceNo[0] >= PIECE_NUMBER_KING;
-          break;
-        case TriggerEvent::kAnyPieceMoved:
-          reset[perspective] = true;
-          break;
-        default:
-          assert(false);
-          break;
-      }
-      if (reset[perspective]) {
-        Derived::CollectActiveIndices(
-            pos, trigger, perspective, &added[perspective]);
-      } else {
-        Derived::CollectChangedIndices(
-            pos, trigger, perspective,
-            &removed[perspective], &added[perspective]);
-      }
-    }
-  }
-};
-
-// Class template that represents the feature set
-// do internal processing in reverse order of template arguments in order to linearize the amount of calculation at runtime
-template <typename FirstFeatureType, typename... RemainingFeatureTypes>
-class FeatureSet<FirstFeatureType, RemainingFeatureTypes...> :
-    public FeatureSetBase<
-        FeatureSet<FirstFeatureType, RemainingFeatureTypes...>> {
- private:
-  using Head = FirstFeatureType;
-  using Tail = FeatureSet<RemainingFeatureTypes...>;
-
- public:
-  // Hash value embedded in the evaluation function file
-  static constexpr std::uint32_t kHashValue =
-      Head::kHashValue ^ (Tail::kHashValue << 1) ^ (Tail::kHashValue >> 31);
-  // number of feature dimensions
-  static constexpr IndexType kDimensions =
-      Head::kDimensions + Tail::kDimensions;
-  // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
-  static constexpr IndexType kMaxActiveDimensions =
-      Head::kMaxActiveDimensions + Tail::kMaxActiveDimensions;
-  // List of timings to perform all calculations instead of difference calculation
-  using SortedTriggerSet = typename InsertToSet<TriggerEvent,
-      typename Tail::SortedTriggerSet, Head::kRefreshTrigger>::Result;
-  static constexpr auto kRefreshTriggers = SortedTriggerSet::kValues;
-
-  // Get the feature quantity name
-  static std::string GetName() {
-    return std::string(Head::kName) + "+" + Tail::GetName();
-  }
-
- private:
-  // Get a list of indices with a value of 1 among the features
-  template <typename IndexListType>
-  static void CollectActiveIndices(
-      const Position& pos, const TriggerEvent trigger, const Color perspective,
-      IndexListType* const active) {
-    Tail::CollectActiveIndices(pos, trigger, perspective, active);
-    if (Head::kRefreshTrigger == trigger) {
-      const auto start = active->size();
-      Head::AppendActiveIndices(pos, perspective, active);
-      for (auto i = start; i < active->size(); ++i) {
-        (*active)[i] += Tail::kDimensions;
-      }
-    }
-  }
-
-  // Get a list of indices whose values ​​have changed from the previous one in the feature quantity
-  template <typename IndexListType>
-  static void CollectChangedIndices(
-      const Position& pos, const TriggerEvent trigger, const Color perspective,
-      IndexListType* const removed, IndexListType* const added) {
-    Tail::CollectChangedIndices(pos, trigger, perspective, removed, added);
-    if (Head::kRefreshTrigger == trigger) {
-      const auto start_removed = removed->size();
-      const auto start_added = added->size();
-      Head::AppendChangedIndices(pos, perspective, removed, added);
-      for (auto i = start_removed; i < removed->size(); ++i) {
-        (*removed)[i] += Tail::kDimensions;
-      }
-      for (auto i = start_added; i < added->size(); ++i) {
-        (*added)[i] += Tail::kDimensions;
-      }
-    }
-  }
-
-  // Make the base class and the class template that recursively uses itself a friend
-  friend class FeatureSetBase<FeatureSet>;
-  template <typename... FeatureTypes>
-  friend class FeatureSet;
-};
-
-// Class template that represents the feature set
-// Specialization with one template argument
-template <typename FeatureType>
-class FeatureSet<FeatureType> : public FeatureSetBase<FeatureSet<FeatureType>> {
- public:
-  // Hash value embedded in the evaluation function file
-  static constexpr std::uint32_t kHashValue = FeatureType::kHashValue;
-  // number of feature dimensions
-  static constexpr IndexType kDimensions = FeatureType::kDimensions;
-  // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
-  static constexpr IndexType kMaxActiveDimensions =
-      FeatureType::kMaxActiveDimensions;
-  // List of timings to perform all calculations instead of difference calculation
-  using SortedTriggerSet =
-      CompileTimeList<TriggerEvent, FeatureType::kRefreshTrigger>;
-  static constexpr auto kRefreshTriggers = SortedTriggerSet::kValues;
-
-  // Get the feature quantity name
-  static std::string GetName() {
-    return FeatureType::kName;
-  }
-
- private:
-  // Get a list of indices with a value of 1 among the features
-  static void CollectActiveIndices(
-      const Position& pos, const TriggerEvent trigger, const Color perspective,
-      IndexList* const active) {
-    if (FeatureType::kRefreshTrigger == trigger) {
-      FeatureType::AppendActiveIndices(pos, perspective, active);
-    }
-  }
-
-  // Get a list of indices whose values ​​have changed from the previous one in the feature quantity
-  static void CollectChangedIndices(
-      const Position& pos, const TriggerEvent trigger, const Color perspective,
-      IndexList* const removed, IndexList* const added) {
-    if (FeatureType::kRefreshTrigger == trigger) {
-      FeatureType::AppendChangedIndices(pos, perspective, removed, added);
-    }
-  }
-
-  // Make the base class and the class template that recursively uses itself a friend
-  friend class FeatureSetBase<FeatureSet>;
-  template <typename... FeatureTypes>
-  friend class FeatureSet;
-};
-
-}  // namespace Features
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
diff --git a/src/eval/nnue/features/features_common.h b/src/eval/nnue/features/features_common.h
deleted file mode 100644
index 8d2ca4a2..00000000
--- a/src/eval/nnue/features/features_common.h
+++ /dev/null
@@ -1,47 +0,0 @@
-﻿//Common header of input features of NNUE evaluation function
-
-#ifndef _NNUE_FEATURES_COMMON_H_
-#define _NNUE_FEATURES_COMMON_H_
-
-#if defined(EVAL_NNUE)
-
-#include "../../../evaluate.h"
-#include "../nnue_common.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-namespace Features {
-
-// Index list type
-class IndexList;
-
-// Class template that represents the feature set
-template <typename... FeatureTypes>
-class FeatureSet;
-
-// Type of timing to perform all calculations instead of difference calculation
-enum class TriggerEvent {
-  kNone, // Calculate the difference whenever possible
-  kFriendKingMoved, // calculate all when own ball moves
-  kEnemyKingMoved, // do all calculations when enemy balls move
-  kAnyKingMoved, // do all calculations if either ball moves
-  kAnyPieceMoved, // always do all calculations
-};
-
-// turn side or other side
-enum class Side {
-  kFriend, // turn side
-  kEnemy, // opponent
-};
-
-}  // namespace Features
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
diff --git a/src/eval/nnue/features/half_kp.cpp b/src/eval/nnue/features/half_kp.cpp
deleted file mode 100644
index cba2c9cd..00000000
--- a/src/eval/nnue/features/half_kp.cpp
+++ /dev/null
@@ -1,84 +0,0 @@
-﻿//Definition of input features HalfKP of NNUE evaluation function
-
-#if defined(EVAL_NNUE)
-
-#include "half_kp.h"
-#include "index_list.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-namespace Features {
-
-// Find the index of the feature quantity from the ball position and BonaPiece
-template <Side AssociatedKing>
-inline IndexType HalfKP<AssociatedKing>::MakeIndex(Square sq_k, BonaPiece p) {
-  return static_cast<IndexType>(fe_end) * static_cast<IndexType>(sq_k) + p;
-}
-
-// Get the piece information
-template <Side AssociatedKing>
-inline void HalfKP<AssociatedKing>::GetPieces(
-    const Position& pos, Color perspective,
-    BonaPiece** pieces, Square* sq_target_k) {
-  *pieces = (perspective == BLACK) ?
-      pos.eval_list()->piece_list_fb() :
-      pos.eval_list()->piece_list_fw();
-  const PieceNumber target = (AssociatedKing == Side::kFriend) ?
-      static_cast<PieceNumber>(PIECE_NUMBER_KING + perspective) :
-      static_cast<PieceNumber>(PIECE_NUMBER_KING + ~perspective);
-  *sq_target_k = static_cast<Square>(((*pieces)[target] - f_king) % SQUARE_NB);
-}
-
-// Get a list of indices with a value of 1 among the features
-template <Side AssociatedKing>
-void HalfKP<AssociatedKing>::AppendActiveIndices(
-    const Position& pos, Color perspective, IndexList* active) {
-  // do nothing if array size is small to avoid compiler warning
-  if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;
-
-  BonaPiece* pieces;
-  Square sq_target_k;
-  GetPieces(pos, perspective, &pieces, &sq_target_k);
-  for (PieceNumber i = PIECE_NUMBER_ZERO; i < PIECE_NUMBER_KING; ++i) {
-    if (pieces[i] != Eval::BONA_PIECE_ZERO) {
-      active->push_back(MakeIndex(sq_target_k, pieces[i]));
-    }
-  }
-}
-
-// Get a list of indices whose values ​​have changed from the previous one in the feature quantity
-template <Side AssociatedKing>
-void HalfKP<AssociatedKing>::AppendChangedIndices(
-    const Position& pos, Color perspective,
-    IndexList* removed, IndexList* added) {
-  BonaPiece* pieces;
-  Square sq_target_k;
-  GetPieces(pos, perspective, &pieces, &sq_target_k);
-  const auto& dp = pos.state()->dirtyPiece;
-  for (int i = 0; i < dp.dirty_num; ++i) {
-    if (dp.pieceNo[i] >= PIECE_NUMBER_KING) continue;
-    const auto old_p = static_cast<BonaPiece>(
-        dp.changed_piece[i].old_piece.from[perspective]);
-    if (old_p != Eval::BONA_PIECE_ZERO) {
-      removed->push_back(MakeIndex(sq_target_k, old_p));
-    }
-    const auto new_p = static_cast<BonaPiece>(
-        dp.changed_piece[i].new_piece.from[perspective]);
-    if (new_p != Eval::BONA_PIECE_ZERO) {
-      added->push_back(MakeIndex(sq_target_k, new_p));
-    }
-  }
-}
-
-template class HalfKP<Side::kFriend>;
-template class HalfKP<Side::kEnemy>;
-
-}  // namespace Features
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
diff --git a/src/eval/nnue/features/half_kp.h b/src/eval/nnue/features/half_kp.h
deleted file mode 100644
index cc9cd660..00000000
--- a/src/eval/nnue/features/half_kp.h
+++ /dev/null
@@ -1,62 +0,0 @@
-﻿//Definition of input features HalfKP of NNUE evaluation function
-
-#ifndef _NNUE_FEATURES_HALF_KP_H_
-#define _NNUE_FEATURES_HALF_KP_H_
-
-#if defined(EVAL_NNUE)
-
-#include "../../../evaluate.h"
-#include "features_common.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-namespace Features {
-
-// Feature HalfKP: Combination of the position of own ball or enemy ball and the position of pieces other than balls
-template <Side AssociatedKing>
-class HalfKP {
- public:
-  // feature quantity name
-  static constexpr const char* kName =
-      (AssociatedKing == Side::kFriend) ? "HalfKP(Friend)" : "HalfKP(Enemy)";
-  // Hash value embedded in the evaluation function file
-  static constexpr std::uint32_t kHashValue =
-      0x5D69D5B9u ^ (AssociatedKing == Side::kFriend);
-  // number of feature dimensions
-  static constexpr IndexType kDimensions =
-      static_cast<IndexType>(SQUARE_NB) * static_cast<IndexType>(fe_end);
-  // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
-  static constexpr IndexType kMaxActiveDimensions = PIECE_NUMBER_KING;
-  // Timing of full calculation instead of difference calculation
-  static constexpr TriggerEvent kRefreshTrigger =
-      (AssociatedKing == Side::kFriend) ?
-      TriggerEvent::kFriendKingMoved : TriggerEvent::kEnemyKingMoved;
-
-  // Get a list of indices with a value of 1 among the features
-  static void AppendActiveIndices(const Position& pos, Color perspective,
-                                  IndexList* active);
-
-  // Get a list of indices whose values ​​have changed from the previous one in the feature quantity
-  static void AppendChangedIndices(const Position& pos, Color perspective,
-                                   IndexList* removed, IndexList* added);
-
-  // Find the index of the feature quantity from the ball position and BonaPiece
-  static IndexType MakeIndex(Square sq_k, BonaPiece p);
-
- private:
-  // Get the piece information
-  static void GetPieces(const Position& pos, Color perspective,
-                        BonaPiece** pieces, Square* sq_target_k);
-};
-
-}  // namespace Features
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
diff --git a/src/eval/nnue/features/half_relative_kp.cpp b/src/eval/nnue/features/half_relative_kp.cpp
deleted file mode 100644
index 623b839c..00000000
--- a/src/eval/nnue/features/half_relative_kp.cpp
+++ /dev/null
@@ -1,97 +0,0 @@
-﻿//Definition of input features HalfRelativeKP of NNUE evaluation function
-
-#if defined(EVAL_NNUE)
-
-#include "half_relative_kp.h"
-#include "index_list.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-namespace Features {
-
-// Find the index of the feature quantity from the ball position and BonaPiece
-template <Side AssociatedKing>
-inline IndexType HalfRelativeKP<AssociatedKing>::MakeIndex(
-    Square sq_k, BonaPiece p) {
-  constexpr IndexType W = kBoardWidth;
-  constexpr IndexType H = kBoardHeight;
-  const IndexType piece_index = (p - fe_hand_end) / SQUARE_NB;
-  const Square sq_p = static_cast<Square>((p - fe_hand_end) % SQUARE_NB);
-  const IndexType relative_file = file_of(sq_p) - file_of(sq_k) + (W / 2);
-  const IndexType relative_rank = rank_of(sq_p) - rank_of(sq_k) + (H / 2);
-  return H * W * piece_index + H * relative_file + relative_rank;
-}
-
-// Get the piece information
-template <Side AssociatedKing>
-inline void HalfRelativeKP<AssociatedKing>::GetPieces(
-    const Position& pos, Color perspective,
-    BonaPiece** pieces, Square* sq_target_k) {
-  *pieces = (perspective == BLACK) ?
-      pos.eval_list()->piece_list_fb() :
-      pos.eval_list()->piece_list_fw();
-  const PieceNumber target = (AssociatedKing == Side::kFriend) ?
-      static_cast<PieceNumber>(PIECE_NUMBER_KING + perspective) :
-      static_cast<PieceNumber>(PIECE_NUMBER_KING + ~perspective);
-  *sq_target_k = static_cast<Square>(((*pieces)[target] - f_king) % SQUARE_NB);
-}
-
-// Get a list of indices with a value of 1 among the features
-template <Side AssociatedKing>
-void HalfRelativeKP<AssociatedKing>::AppendActiveIndices(
-    const Position& pos, Color perspective, IndexList* active) {
-  // do nothing if array size is small to avoid compiler warning
-  if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;
-
-  BonaPiece* pieces;
-  Square sq_target_k;
-  GetPieces(pos, perspective, &pieces, &sq_target_k);
-  for (PieceNumber i = PIECE_NUMBER_ZERO; i < PIECE_NUMBER_KING; ++i) {
-    if (pieces[i] >= fe_hand_end) {
-      if (pieces[i] != Eval::BONA_PIECE_ZERO) {
-        active->push_back(MakeIndex(sq_target_k, pieces[i]));
-      }
-    }
-  }
-}
-
-// Get a list of indices whose values ​​have changed from the previous one in the feature quantity
-template <Side AssociatedKing>
-void HalfRelativeKP<AssociatedKing>::AppendChangedIndices(
-    const Position& pos, Color perspective,
-    IndexList* removed, IndexList* added) {
-  BonaPiece* pieces;
-  Square sq_target_k;
-  GetPieces(pos, perspective, &pieces, &sq_target_k);
-  const auto& dp = pos.state()->dirtyPiece;
-  for (int i = 0; i < dp.dirty_num; ++i) {
-    if (dp.pieceNo[i] >= PIECE_NUMBER_KING) continue;
-    const auto old_p = static_cast<BonaPiece>(
-        dp.changed_piece[i].old_piece.from[perspective]);
-    if (old_p >= fe_hand_end) {
-      if (old_p != Eval::BONA_PIECE_ZERO) {
-        removed->push_back(MakeIndex(sq_target_k, old_p));
-      }
-    }
-    const auto new_p = static_cast<BonaPiece>(
-        dp.changed_piece[i].new_piece.from[perspective]);
-    if (new_p >= fe_hand_end) {
-      if (new_p != Eval::BONA_PIECE_ZERO) {
-        added->push_back(MakeIndex(sq_target_k, new_p));
-      }
-    }
-  }
-}
-
-template class HalfRelativeKP<Side::kFriend>;
-template class HalfRelativeKP<Side::kEnemy>;
-
-}  // namespace Features
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
diff --git a/src/eval/nnue/features/half_relative_kp.h b/src/eval/nnue/features/half_relative_kp.h
deleted file mode 100644
index 2f967745..00000000
--- a/src/eval/nnue/features/half_relative_kp.h
+++ /dev/null
@@ -1,68 +0,0 @@
-﻿//Definition of input features HalfRelativeKP of NNUE evaluation function
-
-#ifndef _NNUE_FEATURES_HALF_RELATIVE_KP_H_
-#define _NNUE_FEATURES_HALF_RELATIVE_KP_H_
-
-#if defined(EVAL_NNUE)
-
-#include "../../../evaluate.h"
-#include "features_common.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-namespace Features {
-
-// Feature HalfRelativeKP: Relative position of each piece other than the ball based on own ball or enemy ball
-template <Side AssociatedKing>
-class HalfRelativeKP {
- public:
-  // feature quantity name
-  static constexpr const char* kName = (AssociatedKing == Side::kFriend) ?
-      "HalfRelativeKP(Friend)" : "HalfRelativeKP(Enemy)";
-  // Hash value embedded in the evaluation function file
-  static constexpr std::uint32_t kHashValue =
-      0xF9180919u ^ (AssociatedKing == Side::kFriend);
-  // Piece type excluding balls
-  static constexpr IndexType kNumPieceKinds = (fe_end - fe_hand_end) / SQUARE_NB;
-  // width of the virtual board with the ball in the center
-  static constexpr IndexType kBoardWidth = FILE_NB * 2 - 1;
-  // height of a virtual board with balls in the center
-  static constexpr IndexType kBoardHeight = RANK_NB * 2 - 1;
-  // number of feature dimensions
-  static constexpr IndexType kDimensions =
-      kNumPieceKinds * kBoardHeight * kBoardWidth;
-  // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
-  static constexpr IndexType kMaxActiveDimensions = PIECE_NUMBER_KING;
-  // Timing of full calculation instead of difference calculation
-  static constexpr TriggerEvent kRefreshTrigger =
-      (AssociatedKing == Side::kFriend) ?
-      TriggerEvent::kFriendKingMoved : TriggerEvent::kEnemyKingMoved;
-
-  // Get a list of indices with a value of 1 among the features
-  static void AppendActiveIndices(const Position& pos, Color perspective,
-                                  IndexList* active);
-
-  // Get a list of indices whose values ​​have changed from the previous one in the feature quantity
-  static void AppendChangedIndices(const Position& pos, Color perspective,
-                                   IndexList* removed, IndexList* added);
-
-  // Find the index of the feature quantity from the ball position and BonaPiece
-  static IndexType MakeIndex(Square sq_k, BonaPiece p);
-
- private:
-  // Get the piece information
-  static void GetPieces(const Position& pos, Color perspective,
-                        BonaPiece** pieces, Square* sq_target_k);
-};
-
-}  // namespace Features
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
diff --git a/src/eval/nnue/features/index_list.h b/src/eval/nnue/features/index_list.h
deleted file mode 100644
index 39e66a09..00000000
--- a/src/eval/nnue/features/index_list.h
+++ /dev/null
@@ -1,55 +0,0 @@
-﻿// Definition of index list of input features
-
-#ifndef _NNUE_FEATURES_INDEX_LIST_H_
-#define _NNUE_FEATURES_INDEX_LIST_H_
-
-#if defined(EVAL_NNUE)
-
-#include "../../../position.h"
-#include "../nnue_architecture.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-namespace Features {
-
-// Class template used for feature index list
-template <typename T, std::size_t MaxSize>
-class ValueList {
- public:
-  std::size_t size() const { return size_; }
-  void resize(std::size_t size) { size_ = size; }
-  void push_back(const T& value) { values_[size_++] = value; }
-  T& operator[](std::size_t index) { return values_[index]; }
-  T* begin() { return values_; }
-  T* end() { return values_ + size_; }
-  const T& operator[](std::size_t index) const { return values_[index]; }
-  const T* begin() const { return values_; }
-  const T* end() const { return values_ + size_; }
-  void swap(ValueList& other) {
-    const std::size_t max_size = std::max(size_, other.size_);
-    for (std::size_t i = 0; i < max_size; ++i) {
-      std::swap(values_[i], other.values_[i]);
-    }
-    std::swap(size_, other.size_);
-  }
- private:
-  T values_[MaxSize];
-  std::size_t size_ = 0;
-};
-
-//Type of feature index list
-class IndexList
-    : public ValueList<IndexType, RawFeatures::kMaxActiveDimensions> {
-};
-
-}  // namespace Features
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
diff --git a/src/eval/nnue/features/k.cpp b/src/eval/nnue/features/k.cpp
deleted file mode 100644
index dc01eb92..00000000
--- a/src/eval/nnue/features/k.cpp
+++ /dev/null
@@ -1,49 +0,0 @@
-﻿//Definition of input feature quantity K of NNUE evaluation function
-
-#if defined(EVAL_NNUE)
-
-#include "k.h"
-#include "index_list.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-namespace Features {
-
-// Get a list of indices with a value of 1 among the features
-void K::AppendActiveIndices(
-    const Position& pos, Color perspective, IndexList* active) {
-  // do nothing if array size is small to avoid compiler warning
-  if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;
-
-  const BonaPiece* pieces = (perspective == BLACK) ?
-      pos.eval_list()->piece_list_fb() :
-      pos.eval_list()->piece_list_fw();
-  assert(pieces[PIECE_NUMBER_BKING] != BONA_PIECE_ZERO);
-  assert(pieces[PIECE_NUMBER_WKING] != BONA_PIECE_ZERO);
-  for (PieceNumber i = PIECE_NUMBER_KING; i < PIECE_NUMBER_NB; ++i) {
-    active->push_back(pieces[i] - fe_end);
-  }
-}
-
-// Get a list of indices whose values ​​have changed from the previous one in the feature quantity
-void K::AppendChangedIndices(
-    const Position& pos, Color perspective,
-    IndexList* removed, IndexList* added) {
-  const auto& dp = pos.state()->dirtyPiece;
-  if (dp.pieceNo[0] >= PIECE_NUMBER_KING) {
-    removed->push_back(
-        dp.changed_piece[0].old_piece.from[perspective] - fe_end);
-    added->push_back(
-        dp.changed_piece[0].new_piece.from[perspective] - fe_end);
-  }
-}
-
-}  // namespace Features
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
diff --git a/src/eval/nnue/features/k.h b/src/eval/nnue/features/k.h
deleted file mode 100644
index d7a6f4aa..00000000
--- a/src/eval/nnue/features/k.h
+++ /dev/null
@@ -1,48 +0,0 @@
-﻿//Definition of input feature quantity K of NNUE evaluation function
-
-#ifndef _NNUE_FEATURES_K_H_
-#define _NNUE_FEATURES_K_H_
-
-#if defined(EVAL_NNUE)
-
-#include "../../../evaluate.h"
-#include "features_common.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-namespace Features {
-
-// Feature K: Ball position
-class K {
- public:
-  // feature quantity name
-  static constexpr const char* kName = "K";
-  // Hash value embedded in the evaluation function file
-  static constexpr std::uint32_t kHashValue = 0xD3CEE169u;
-  // number of feature dimensions
-  static constexpr IndexType kDimensions = SQUARE_NB * 2;
-  // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
-  static constexpr IndexType kMaxActiveDimensions = 2;
-  // Timing of full calculation instead of difference calculation
-  static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kNone;
-
-  // Get a list of indices with a value of 1 among the features
-  static void AppendActiveIndices(const Position& pos, Color perspective,
-                                  IndexList* active);
-
-  // Get a list of indices whose values ​​have changed from the previous one in the feature quantity
-  static void AppendChangedIndices(const Position& pos, Color perspective,
-                                   IndexList* removed, IndexList* added);
-};
-
-}  // namespace Features
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
diff --git a/src/eval/nnue/features/p.cpp b/src/eval/nnue/features/p.cpp
deleted file mode 100644
index 68527119..00000000
--- a/src/eval/nnue/features/p.cpp
+++ /dev/null
@@ -1,52 +0,0 @@
-﻿//Definition of input feature P of NNUE evaluation function
-
-#if defined(EVAL_NNUE)
-
-#include "p.h"
-#include "index_list.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-namespace Features {
-
-// Get a list of indices with a value of 1 among the features
-void P::AppendActiveIndices(
-    const Position& pos, Color perspective, IndexList* active) {
-  // do nothing if array size is small to avoid compiler warning
-  if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;
-
-  const BonaPiece* pieces = (perspective == BLACK) ?
-      pos.eval_list()->piece_list_fb() :
-      pos.eval_list()->piece_list_fw();
-  for (PieceNumber i = PIECE_NUMBER_ZERO; i < PIECE_NUMBER_KING; ++i) {
-    if (pieces[i] != Eval::BONA_PIECE_ZERO) {
-      active->push_back(pieces[i]);
-    }
-  }
-}
-
-// Get a list of indices whose values ​​have changed from the previous one in the feature quantity
-void P::AppendChangedIndices(
-    const Position& pos, Color perspective,
-    IndexList* removed, IndexList* added) {
-  const auto& dp = pos.state()->dirtyPiece;
-  for (int i = 0; i < dp.dirty_num; ++i) {
-    if (dp.pieceNo[i] >= PIECE_NUMBER_KING) continue;
-    if (dp.changed_piece[i].old_piece.from[perspective] != Eval::BONA_PIECE_ZERO) {
-      removed->push_back(dp.changed_piece[i].old_piece.from[perspective]);
-    }
-    if (dp.changed_piece[i].new_piece.from[perspective] != Eval::BONA_PIECE_ZERO) {
-      added->push_back(dp.changed_piece[i].new_piece.from[perspective]);
-    }
-  }
-}
-
-}  // namespace Features
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
diff --git a/src/eval/nnue/features/p.h b/src/eval/nnue/features/p.h
deleted file mode 100644
index 27a944fa..00000000
--- a/src/eval/nnue/features/p.h
+++ /dev/null
@@ -1,48 +0,0 @@
-﻿//Definition of input feature P of NNUE evaluation function
-
-#ifndef _NNUE_FEATURES_P_H_
-#define _NNUE_FEATURES_P_H_
-
-#if defined(EVAL_NNUE)
-
-#include "../../../evaluate.h"
-#include "features_common.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-namespace Features {
-
-// Feature P: BonaPiece of pieces other than balls
-class P {
- public:
-  // feature quantity name
-  static constexpr const char* kName = "P";
-  // Hash value embedded in the evaluation function file
-  static constexpr std::uint32_t kHashValue = 0x764CFB4Bu;
-  // number of feature dimensions
-  static constexpr IndexType kDimensions = fe_end;
-  // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
-  static constexpr IndexType kMaxActiveDimensions = PIECE_NUMBER_KING;
-  // Timing of full calculation instead of difference calculation
-  static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kNone;
-
-  // Get a list of indices with a value of 1 among the features
-  static void AppendActiveIndices(const Position& pos, Color perspective,
-                                  IndexList* active);
-
-  // Get a list of indices whose values ​​have changed from the previous one in the feature quantity
-  static void AppendChangedIndices(const Position& pos, Color perspective,
-                                   IndexList* removed, IndexList* added);
-};
-
-}  // namespace Features
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
diff --git a/src/eval/nnue/layers/affine_transform.h b/src/eval/nnue/layers/affine_transform.h
deleted file mode 100644
index cb56b07d..00000000
--- a/src/eval/nnue/layers/affine_transform.h
+++ /dev/null
@@ -1,178 +0,0 @@
-﻿// Definition of layer AffineTransform of NNUE evaluation function
-
-#ifndef _NNUE_LAYERS_AFFINE_TRANSFORM_H_
-#define _NNUE_LAYERS_AFFINE_TRANSFORM_H_
-
-#if defined(EVAL_NNUE)
-
-#include "../nnue_common.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-namespace Layers {
-
-// affine transformation layer
-template <typename PreviousLayer, IndexType OutputDimensions>
-class AffineTransform {
- public:
-  // Input/output type
-  using InputType = typename PreviousLayer::OutputType;
-  using OutputType = std::int32_t;
-  static_assert(std::is_same<InputType, std::uint8_t>::value, "");
-
-  // number of input/output dimensions
-  static constexpr IndexType kInputDimensions =
-      PreviousLayer::kOutputDimensions;
-  static constexpr IndexType kOutputDimensions = OutputDimensions;
-  static constexpr IndexType kPaddedInputDimensions =
-      CeilToMultiple<IndexType>(kInputDimensions, kMaxSimdWidth);
-
-  // Size of forward propagation buffer used in this layer
-  static constexpr std::size_t kSelfBufferSize =
-      CeilToMultiple(kOutputDimensions * sizeof(OutputType), kCacheLineSize);
-
-  // Size of the forward propagation buffer used from the input layer to this layer
-  static constexpr std::size_t kBufferSize =
-      PreviousLayer::kBufferSize + kSelfBufferSize;
-
-  // Hash value embedded in the evaluation function file
-  static constexpr std::uint32_t GetHashValue() {
-    std::uint32_t hash_value = 0xCC03DAE4u;
-    hash_value += kOutputDimensions;
-    hash_value ^= PreviousLayer::GetHashValue() >> 1;
-    hash_value ^= PreviousLayer::GetHashValue() << 31;
-    return hash_value;
-  }
-
-  // A string that represents the structure from the input layer to this layer
-  static std::string GetStructureString() {
-    return "AffineTransform[" +
-        std::to_string(kOutputDimensions) + "<-" +
-        std::to_string(kInputDimensions) + "](" +
-        PreviousLayer::GetStructureString() + ")";
-  }
-
-  // read parameters
-  bool ReadParameters(std::istream& stream) {
-    if (!previous_layer_.ReadParameters(stream)) return false;
-    stream.read(reinterpret_cast<char*>(biases_),
-                kOutputDimensions * sizeof(BiasType));
-    stream.read(reinterpret_cast<char*>(weights_),
-                kOutputDimensions * kPaddedInputDimensions *
-                sizeof(WeightType));
-    return !stream.fail();
-  }
-
-  // write parameters
-  bool WriteParameters(std::ostream& stream) const {
-    if (!previous_layer_.WriteParameters(stream)) return false;
-    stream.write(reinterpret_cast<const char*>(biases_),
-                 kOutputDimensions * sizeof(BiasType));
-    stream.write(reinterpret_cast<const char*>(weights_),
-                 kOutputDimensions * kPaddedInputDimensions *
-                 sizeof(WeightType));
-    return !stream.fail();
-  }
-
-  // forward propagation
-  const OutputType* Propagate(
-      const TransformedFeatureType* transformed_features, char* buffer) const {
-    const auto input = previous_layer_.Propagate(
-        transformed_features, buffer + kSelfBufferSize);
-    const auto output = reinterpret_cast<OutputType*>(buffer);
-#if defined(USE_AVX2)
-    constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
-    const __m256i kOnes = _mm256_set1_epi16(1);
-    const auto input_vector = reinterpret_cast<const __m256i*>(input);
-#elif defined(USE_SSSE3)
-    constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
-    const __m128i kOnes = _mm_set1_epi16(1);
-    const auto input_vector = reinterpret_cast<const __m128i*>(input);
-#elif defined(IS_ARM)
-    constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
-    const auto input_vector = reinterpret_cast<const int8x8_t*>(input);
-#endif
-    for (IndexType i = 0; i < kOutputDimensions; ++i) {
-      const IndexType offset = i * kPaddedInputDimensions;
-#if defined(USE_AVX2)
-      __m256i sum = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, biases_[i]);
-      const auto row = reinterpret_cast<const __m256i*>(&weights_[offset]);
-      for (IndexType j = 0; j < kNumChunks; ++j) {
-        __m256i product = _mm256_maddubs_epi16(
-#if defined(__MINGW32__) || defined(__MINGW64__)
-          // HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Because the binary
-          //       compiled with g++ in MSYS2 crashes here because the output memory is not aligned
-          //       even though alignas is specified.
-          _mm256_loadu_si256
-#else
-          _mm256_load_si256
-#endif
-          (&input_vector[j]), _mm256_load_si256(&row[j]));
-        product = _mm256_madd_epi16(product, kOnes);
-        sum = _mm256_add_epi32(sum, product);
-      }
-      sum = _mm256_hadd_epi32(sum, sum);
-      sum = _mm256_hadd_epi32(sum, sum);
-      const __m128i lo = _mm256_extracti128_si256(sum, 0);
-      const __m128i hi = _mm256_extracti128_si256(sum, 1);
-      output[i] = _mm_cvtsi128_si32(lo) + _mm_cvtsi128_si32(hi);
-#elif defined(USE_SSSE3)
-      __m128i sum = _mm_cvtsi32_si128(biases_[i]);
-      const auto row = reinterpret_cast<const __m128i*>(&weights_[offset]);
-      for (IndexType j = 0; j < kNumChunks; ++j) {
-        __m128i product = _mm_maddubs_epi16(
-            _mm_load_si128(&input_vector[j]), _mm_load_si128(&row[j]));
-        product = _mm_madd_epi16(product, kOnes);
-        sum = _mm_add_epi32(sum, product);
-      }
-      sum = _mm_hadd_epi32(sum, sum);
-      sum = _mm_hadd_epi32(sum, sum);
-      output[i] = _mm_cvtsi128_si32(sum);
-#elif defined(IS_ARM)
-      int32x4_t sum = {biases_[i]};
-      const auto row = reinterpret_cast<const int8x8_t*>(&weights_[offset]);
-      for (IndexType j = 0; j < kNumChunks; ++j) {
-        int16x8_t product = vmull_s8(input_vector[j * 2], row[j * 2]);
-        product = vmlal_s8(product, input_vector[j * 2 + 1], row[j * 2 + 1]);
-        sum = vpadalq_s16(sum, product);
-      }
-      output[i] = sum[0] + sum[1] + sum[2] + sum[3];
-#else
-      OutputType sum = biases_[i];
-      for (IndexType j = 0; j < kInputDimensions; ++j) {
-        sum += weights_[offset + j] * input[j];
-      }
-      output[i] = sum;
-#endif
-    }
-    return output;
-  }
-
- private:
-  // parameter type
-  using BiasType = OutputType;
-  using WeightType = std::int8_t;
-
-  // Make the learning class a friend
-  friend class Trainer<AffineTransform>;
-
-  // the layer immediately before this layer
-  PreviousLayer previous_layer_;
-
-  // parameter
-  alignas(kCacheLineSize) BiasType biases_[kOutputDimensions];
-  alignas(kCacheLineSize)
-      WeightType weights_[kOutputDimensions * kPaddedInputDimensions];
-};
-
-}  // namespace Layers
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
diff --git a/src/eval/nnue/layers/clipped_relu.h b/src/eval/nnue/layers/clipped_relu.h
deleted file mode 100644
index fe4bedaa..00000000
--- a/src/eval/nnue/layers/clipped_relu.h
+++ /dev/null
@@ -1,177 +0,0 @@
-﻿// Definition of layer ClippedReLU of NNUE evaluation function
-
-#ifndef _NNUE_LAYERS_CLIPPED_RELU_H_
-#define _NNUE_LAYERS_CLIPPED_RELU_H_
-
-#if defined(EVAL_NNUE)
-
-#include "../nnue_common.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-namespace Layers {
-
-// Clipped ReLU
-template <typename PreviousLayer>
-class ClippedReLU {
- public:
-  // Input/output type
-  using InputType = typename PreviousLayer::OutputType;
-  using OutputType = std::uint8_t;
-  static_assert(std::is_same<InputType, std::int32_t>::value, "");
-
-  // number of input/output dimensions
-  static constexpr IndexType kInputDimensions =
-      PreviousLayer::kOutputDimensions;
-  static constexpr IndexType kOutputDimensions = kInputDimensions;
-
-  // Size of forward propagation buffer used in this layer
-  static constexpr std::size_t kSelfBufferSize =
-      CeilToMultiple(kOutputDimensions * sizeof(OutputType), kCacheLineSize);
-
-  // Size of the forward propagation buffer used from the input layer to this layer
-  static constexpr std::size_t kBufferSize =
-      PreviousLayer::kBufferSize + kSelfBufferSize;
-
-  // Hash value embedded in the evaluation function file
-  static constexpr std::uint32_t GetHashValue() {
-    std::uint32_t hash_value = 0x538D24C7u;
-    hash_value += PreviousLayer::GetHashValue();
-    return hash_value;
-  }
-
-  // A string that represents the structure from the input layer to this layer
-  static std::string GetStructureString() {
-    return "ClippedReLU[" +
-        std::to_string(kOutputDimensions) + "](" +
-        PreviousLayer::GetStructureString() + ")";
-  }
-
-  // read parameters
-  bool ReadParameters(std::istream& stream) {
-    return previous_layer_.ReadParameters(stream);
-  }
-
-  // write parameters
-  bool WriteParameters(std::ostream& stream) const {
-    return previous_layer_.WriteParameters(stream);
-  }
-
-  // forward propagation
-  const OutputType* Propagate(
-      const TransformedFeatureType* transformed_features, char* buffer) const {
-    const auto input = previous_layer_.Propagate(
-        transformed_features, buffer + kSelfBufferSize);
-    const auto output = reinterpret_cast<OutputType*>(buffer);
-#if defined(USE_AVX2)
-    constexpr IndexType kNumChunks = kInputDimensions / kSimdWidth;
-    const __m256i kZero = _mm256_setzero_si256();
-    const __m256i kOffsets = _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0);
-    const auto in = reinterpret_cast<const __m256i*>(input);
-    const auto out = reinterpret_cast<__m256i*>(output);
-    for (IndexType i = 0; i < kNumChunks; ++i) {
-      const __m256i words0 = _mm256_srai_epi16(_mm256_packs_epi32(
-#if defined(__MINGW32__) || defined(__MINGW64__)
-        // HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Because the binary
-        //       compiled with g++ in MSYS2 crashes here because the output memory is not aligned
-        //       even though alignas is specified.
-        _mm256_loadu_si256
-#else
-        _mm256_load_si256
-#endif
-        (&in[i * 4 + 0]),
-#if defined(__MINGW32__) || defined(__MINGW64__)
-        _mm256_loadu_si256
-#else
-        _mm256_load_si256
-#endif
-        (&in[i * 4 + 1])), kWeightScaleBits);
-      const __m256i words1 = _mm256_srai_epi16(_mm256_packs_epi32(
-#if defined(__MINGW32__) || defined(__MINGW64__)
-        _mm256_loadu_si256
-#else
-        _mm256_load_si256
-#endif
-        (&in[i * 4 + 2]),
-#if defined(__MINGW32__) || defined(__MINGW64__)
-        _mm256_loadu_si256
-#else
-        _mm256_load_si256
-#endif
-        (&in[i * 4 + 3])), kWeightScaleBits);
-#if defined(__MINGW32__) || defined(__MINGW64__)
-      _mm256_storeu_si256
-#else
-      _mm256_store_si256
-#endif
-        (&out[i], _mm256_permutevar8x32_epi32(_mm256_max_epi8(
-          _mm256_packs_epi16(words0, words1), kZero), kOffsets));
-    }
-    constexpr IndexType kStart = kNumChunks * kSimdWidth;
-#elif defined(USE_SSSE3)
-    constexpr IndexType kNumChunks = kInputDimensions / kSimdWidth;
-    const __m128i kZero = _mm_setzero_si128();
-#ifndef USE_SSE41
-    const __m128i k0x80s = _mm_set1_epi8(-128);
-#endif
-    const auto in = reinterpret_cast<const __m128i*>(input);
-    const auto out = reinterpret_cast<__m128i*>(output);
-    for (IndexType i = 0; i < kNumChunks; ++i) {
-      const __m128i words0 = _mm_srai_epi16(_mm_packs_epi32(
-          _mm_load_si128(&in[i * 4 + 0]),
-          _mm_load_si128(&in[i * 4 + 1])), kWeightScaleBits);
-      const __m128i words1 = _mm_srai_epi16(_mm_packs_epi32(
-          _mm_load_si128(&in[i * 4 + 2]),
-          _mm_load_si128(&in[i * 4 + 3])), kWeightScaleBits);
-      const __m128i packedbytes = _mm_packs_epi16(words0, words1);
-      _mm_store_si128(&out[i], 
-#ifdef USE_SSE41
-        _mm_max_epi8(packedbytes, kZero)
-#else
-        _mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s)
-#endif
-      );
-    }
-    constexpr IndexType kStart = kNumChunks * kSimdWidth;
-#elif defined(IS_ARM)
-    constexpr IndexType kNumChunks = kInputDimensions / (kSimdWidth / 2);
-    const int8x8_t kZero = {0};
-    const auto in = reinterpret_cast<const int32x4_t*>(input);
-    const auto out = reinterpret_cast<int8x8_t*>(output);
-    for (IndexType i = 0; i < kNumChunks; ++i) {
-      int16x8_t shifted;
-      const auto pack = reinterpret_cast<int16x4_t*>(&shifted);
-      pack[0] = vqshrn_n_s32(in[i * 2 + 0], kWeightScaleBits);
-      pack[1] = vqshrn_n_s32(in[i * 2 + 1], kWeightScaleBits);
-      out[i] = vmax_s8(vqmovn_s16(shifted), kZero);
-    }
-    constexpr IndexType kStart = kNumChunks * (kSimdWidth / 2);
-#else
-    constexpr IndexType kStart = 0;
-#endif
-    for (IndexType i = kStart; i < kInputDimensions; ++i) {
-      output[i] = static_cast<OutputType>(
-          std::max(0, std::min(127, input[i] >> kWeightScaleBits)));
-    }
-    return output;
-  }
-
- private:
-  // Make the learning class a friend
-  friend class Trainer<ClippedReLU>;
-
-  // the layer immediately before this layer
-  PreviousLayer previous_layer_;
-};
-
-}  // namespace Layers
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
diff --git a/src/eval/nnue/layers/sum.h b/src/eval/nnue/layers/sum.h
deleted file mode 100644
index d8c7bf93..00000000
--- a/src/eval/nnue/layers/sum.h
+++ /dev/null
@@ -1,163 +0,0 @@
-﻿// Definition of layer Sum of NNUE evaluation function
-
-#ifndef _NNUE_LAYERS_SUM_H_
-#define _NNUE_LAYERS_SUM_H_
-
-#if defined(EVAL_NNUE)
-
-#include "../nnue_common.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-namespace Layers {
-
-// Layer that sums the output of multiple layers
-template <typename FirstPreviousLayer, typename... RemainingPreviousLayers>
-class Sum : public Sum<RemainingPreviousLayers...> {
- private:
-  using Head = FirstPreviousLayer;
-  using Tail = Sum<RemainingPreviousLayers...>;
-
- public:
-  // Input/output type
-  using InputType = typename Head::OutputType;
-  using OutputType = InputType;
-  static_assert(std::is_same<InputType, typename Tail::InputType>::value, "");
-
-  // number of input/output dimensions
-  static constexpr IndexType kInputDimensions = Head::kOutputDimensions;
-  static constexpr IndexType kOutputDimensions = kInputDimensions;
-  static_assert(kInputDimensions == Tail::kInputDimensions ,"");
-
-  // Size of forward propagation buffer used in this layer
-  static constexpr std::size_t kSelfBufferSize =
-      CeilToMultiple(kOutputDimensions * sizeof(OutputType), kCacheLineSize);
-
-  // Size of the forward propagation buffer used from the input layer to this layer
-  static constexpr std::size_t kBufferSize =
-      std::max(Head::kBufferSize + kSelfBufferSize, Tail::kBufferSize);
-
-  // Hash value embedded in the evaluation function file
-  static constexpr std::uint32_t GetHashValue() {
-    std::uint32_t hash_value = 0xBCE400B4u;
-    hash_value ^= Head::GetHashValue() >> 1;
-    hash_value ^= Head::GetHashValue() << 31;
-    hash_value ^= Tail::GetHashValue() >> 2;
-    hash_value ^= Tail::GetHashValue() << 30;
-    return hash_value;
-  }
-
-  // A string that represents the structure from the input layer to this layer
-  static std::string GetStructureString() {
-    return "Sum[" +
-        std::to_string(kOutputDimensions) + "](" + GetSummandsString() + ")";
-  }
-
-  // read parameters
-  bool ReadParameters(std::istream& stream) {
-    if (!Tail::ReadParameters(stream)) return false;
-    return previous_layer_.ReadParameters(stream);
-  }
-
-  // write parameters
-  bool WriteParameters(std::ostream& stream) const {
-    if (!Tail::WriteParameters(stream)) return false;
-    return previous_layer_.WriteParameters(stream);
-  }
-
-  // forward propagation
-  const OutputType* Propagate(
-      const TransformedFeatureType* transformed_features, char* buffer) const {
-    Tail::Propagate(transformed_features, buffer);
-    const auto head_output = previous_layer_.Propagate(
-        transformed_features, buffer + kSelfBufferSize);
-    const auto output = reinterpret_cast<OutputType*>(buffer);
-    for (IndexType i = 0; i <kOutputDimensions; ++i) {
-      output[i] += head_output[i];
-    }
-    return output;
-  }
-
- protected:
-  // A string that represents the list of layers to be summed
-  static std::string GetSummandsString() {
-    return Head::GetStructureString() + "," + Tail::GetSummandsString();
-  }
-
-  // Make the learning class a friend
-  friend class Trainer<Sum>;
-
-  // the layer immediately before this layer
-  FirstPreviousLayer previous_layer_;
-};
-
-// Layer that sums the output of multiple layers (when there is one template argument)
-template <typename PreviousLayer>
-class Sum<PreviousLayer> {
- public:
-  // Input/output type
-  using InputType = typename PreviousLayer::OutputType;
-  using OutputType = InputType;
-
-  // number of input/output dimensions
-  static constexpr IndexType kInputDimensions =
-      PreviousLayer::kOutputDimensions;
-  static constexpr IndexType kOutputDimensions = kInputDimensions;
-
-  // Size of the forward propagation buffer used from the input layer to this layer
-  static constexpr std::size_t kBufferSize = PreviousLayer::kBufferSize;
-
-  // Hash value embedded in the evaluation function file
-  static constexpr std::uint32_t GetHashValue() {
-    std::uint32_t hash_value = 0xBCE400B4u;
-    hash_value ^= PreviousLayer::GetHashValue() >> 1;
-    hash_value ^= PreviousLayer::GetHashValue() << 31;
-    return hash_value;
-  }
-
-  // A string that represents the structure from the input layer to this layer
-  static std::string GetStructureString() {
-    return "Sum[" +
-        std::to_string(kOutputDimensions) + "](" + GetSummandsString() + ")";
-  }
-
-  // read parameters
-  bool ReadParameters(std::istream& stream) {
-    return previous_layer_.ReadParameters(stream);
-  }
-
-  // write parameters
-  bool WriteParameters(std::ostream& stream) const {
-    return previous_layer_.WriteParameters(stream);
-  }
-
-  // forward propagation
-  const OutputType* Propagate(
-      const TransformedFeatureType* transformed_features, char* buffer) const {
-    return previous_layer_.Propagate(transformed_features, buffer);
-  }
-
- protected:
-  // A string that represents the list of layers to be summed
-  static std::string GetSummandsString() {
-    return PreviousLayer::GetStructureString();
-  }
-
-  // Make the learning class a friend
-  friend class Trainer<Sum>;
-
-  // the layer immediately before this layer
-  PreviousLayer previous_layer_;
-};
-
-}  // namespace Layers
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
diff --git a/src/eval/nnue/nnue_accumulator.h b/src/eval/nnue/nnue_accumulator.h
deleted file mode 100644
index 07f4f183..00000000
--- a/src/eval/nnue/nnue_accumulator.h
+++ /dev/null
@@ -1,30 +0,0 @@
-﻿// Class for difference calculation of NNUE evaluation function
-
-#ifndef _NNUE_ACCUMULATOR_H_
-#define _NNUE_ACCUMULATOR_H_
-
-#if defined(EVAL_NNUE)
-
-#include "nnue_architecture.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-// Class that holds the result of affine transformation of input features
-// Keep the evaluation value that is the final output together
-struct alignas(32) Accumulator {
-  std::int16_t
-      accumulation[2][kRefreshTriggers.size()][kTransformedFeatureDimensions];
-  Value score = VALUE_ZERO;
-  bool computed_accumulation = false;
-  bool computed_score = false;
-};
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
diff --git a/src/eval/nnue/nnue_architecture.h b/src/eval/nnue/nnue_architecture.h
deleted file mode 100644
index cb53e4f9..00000000
--- a/src/eval/nnue/nnue_architecture.h
+++ /dev/null
@@ -1,33 +0,0 @@
-﻿// Input features and network structure used in NNUE evaluation function
-
-#ifndef _NNUE_ARCHITECTURE_H_
-#define _NNUE_ARCHITECTURE_H_
-
-#if defined(EVAL_NNUE)
-
-// include a header that defines the input features and network structure
-//#include "architectures/k-p_256x2-32-32.h"
-//#include "architectures/k-p-cr_256x2-32-32.h"
-//#include "architectures/k-p-cr-ep_256x2-32-32.h"
-#include "architectures/halfkp_256x2-32-32.h"
-//#include "architectures/halfkp-cr-ep_256x2-32-32.h"
-//#include "architectures/halfkp_384x2-32-32.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-static_assert(kTransformedFeatureDimensions % kMaxSimdWidth == 0, "");
-static_assert(Network::kOutputDimensions == 1, "");
-static_assert(std::is_same<Network::OutputType, std::int32_t>::value, "");
-
-// List of timings to perform all calculations instead of difference calculation
-constexpr auto kRefreshTriggers = RawFeatures::kRefreshTriggers;
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
diff --git a/src/eval/nnue/nnue_common.h b/src/eval/nnue/nnue_common.h
deleted file mode 100644
index cffb0098..00000000
--- a/src/eval/nnue/nnue_common.h
+++ /dev/null
@@ -1,64 +0,0 @@
-﻿// Constants used in NNUE evaluation function
-
-#ifndef _NNUE_COMMON_H_
-#define _NNUE_COMMON_H_
-
-#if defined(EVAL_NNUE)
-
-#if defined(USE_AVX2)
-#include <immintrin.h>
-#elif defined(USE_SSE41)
-#include <smmintrin.h>
-#elif defined(USE_SSSE3)
-#include <tmmintrin.h>
-#elif defined(USE_SSE2)
-#include <emmintrin.h>
-#endif
-
-namespace Eval {
-
-namespace NNUE {
-
-// A constant that represents the version of the evaluation function file
-constexpr std::uint32_t kVersion = 0x7AF32F16u;
-
-// Constant used in evaluation value calculation
-constexpr int FV_SCALE = 16;
-constexpr int kWeightScaleBits = 6;
-
-// Size of cache line (in bytes)
-constexpr std::size_t kCacheLineSize = 64;
-
-// SIMD width (in bytes)
-#if defined(USE_AVX2)
-constexpr std::size_t kSimdWidth = 32;
-#elif defined(USE_SSE2)
-constexpr std::size_t kSimdWidth = 16;
-#elif defined(IS_ARM)
-constexpr std::size_t kSimdWidth = 16;
-#endif
-constexpr std::size_t kMaxSimdWidth = 32;
-
-// Type of input feature after conversion
-using TransformedFeatureType = std::uint8_t;
-
-// index type
-using IndexType = std::uint32_t;
-
-// Forward declaration of learning class template
-template <typename Layer>
-class Trainer;
-
-// find the smallest multiple of n and above
-template <typename IntType>
-constexpr IntType CeilToMultiple(IntType n, IntType base) {
-  return (n + base - 1) / base * base;
-}
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
diff --git a/src/eval/nnue/nnue_feature_transformer.h b/src/eval/nnue/nnue_feature_transformer.h
deleted file mode 100644
index bb1a50bc..00000000
--- a/src/eval/nnue/nnue_feature_transformer.h
+++ /dev/null
@@ -1,357 +0,0 @@
-﻿// A class that converts the input features of the NNUE evaluation function
-
-#ifndef _NNUE_FEATURE_TRANSFORMER_H_
-#define _NNUE_FEATURE_TRANSFORMER_H_
-
-#if defined(EVAL_NNUE)
-
-#include "nnue_common.h"
-#include "nnue_architecture.h"
-#include "features/index_list.h"
-
-#include <cstring> // std::memset()
-
-namespace Eval {
-
-namespace NNUE {
-
-// Input feature converter
-class FeatureTransformer {
- private:
-  // number of output dimensions for one side
-  static constexpr IndexType kHalfDimensions = kTransformedFeatureDimensions;
-
- public:
-  // output type
-  using OutputType = TransformedFeatureType;
-
-  // number of input/output dimensions
-  static constexpr IndexType kInputDimensions = RawFeatures::kDimensions;
-  static constexpr IndexType kOutputDimensions = kHalfDimensions * 2;
-
-  // size of forward propagation buffer
-  static constexpr std::size_t kBufferSize =
-      kOutputDimensions * sizeof(OutputType);
-
-  // Hash value embedded in the evaluation function file
-  static constexpr std::uint32_t GetHashValue() {
-    return RawFeatures::kHashValue ^ kOutputDimensions;
-  }
-
-  // a string representing the structure
-  static std::string GetStructureString() {
-    return RawFeatures::GetName() + "[" +
-        std::to_string(kInputDimensions) + "->" +
-        std::to_string(kHalfDimensions) + "x2]";
-  }
-
-  // read parameters
-  bool ReadParameters(std::istream& stream) {
-    stream.read(reinterpret_cast<char*>(biases_),
-                kHalfDimensions * sizeof(BiasType));
-    stream.read(reinterpret_cast<char*>(weights_),
-                kHalfDimensions * kInputDimensions * sizeof(WeightType));
-    return !stream.fail();
-  }
-
-  // write parameters
-  bool WriteParameters(std::ostream& stream) const {
-    stream.write(reinterpret_cast<const char*>(biases_),
-                 kHalfDimensions * sizeof(BiasType));
-    stream.write(reinterpret_cast<const char*>(weights_),
-                 kHalfDimensions * kInputDimensions * sizeof(WeightType));
-    return !stream.fail();
-  }
-
-  // proceed with the difference calculation if possible
-  bool UpdateAccumulatorIfPossible(const Position& pos) const {
-    const auto now = pos.state();
-    if (now->accumulator.computed_accumulation) {
-      return true;
-    }
-    const auto prev = now->previous;
-    if (prev && prev->accumulator.computed_accumulation) {
-      UpdateAccumulator(pos);
-      return true;
-    }
-    return false;
-  }
-
-  // convert input features
-  void Transform(const Position& pos, OutputType* output, bool refresh) const {
-    if (refresh || !UpdateAccumulatorIfPossible(pos)) {
-      RefreshAccumulator(pos);
-    }
-    const auto& accumulation = pos.state()->accumulator.accumulation;
-#if defined(USE_AVX2)
-    constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
-    constexpr int kControl = 0b11011000;
-    const __m256i kZero = _mm256_setzero_si256();
-#elif defined(USE_SSSE3)
-    constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
-    const __m128i kZero = _mm_setzero_si128();
-#ifndef USE_SSE41
-    const __m128i k0x80s = _mm_set1_epi8(-128);
-#endif
-#elif defined(IS_ARM)
-    constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
-    const int8x8_t kZero = {0};
-#endif
-    const Color perspectives[2] = {pos.side_to_move(), ~pos.side_to_move()};
-    for (IndexType p = 0; p < 2; ++p) {
-      const IndexType offset = kHalfDimensions * p;
-#if defined(USE_AVX2)
-      auto out = reinterpret_cast<__m256i*>(&output[offset]);
-      for (IndexType j = 0; j < kNumChunks; ++j) {
-        __m256i sum0 =
-#if defined(__MINGW32__) || defined(__MINGW64__)
-          // HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Because the binary
-          //       compiled with g++ in MSYS2 crashes here because the output memory is not aligned
-          //       even though alignas is specified.
-          _mm256_loadu_si256
-#else
-          _mm256_load_si256
-#endif
-          (&reinterpret_cast<const __m256i*>(
-            accumulation[perspectives[p]][0])[j * 2 + 0]);
-        __m256i sum1 =
-#if defined(__MINGW32__) || defined(__MINGW64__)
-          _mm256_loadu_si256
-#else
-          _mm256_load_si256
-#endif
-          (&reinterpret_cast<const __m256i*>(
-            accumulation[perspectives[p]][0])[j * 2 + 1]);
-        for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
-          sum0 = _mm256_add_epi16(sum0, reinterpret_cast<const __m256i*>(
-              accumulation[perspectives[p]][i])[j * 2 + 0]);
-          sum1 = _mm256_add_epi16(sum1, reinterpret_cast<const __m256i*>(
-              accumulation[perspectives[p]][i])[j * 2 + 1]);
-        }
-#if defined(__MINGW32__) || defined(__MINGW64__)
-        _mm256_storeu_si256
-#else
-        _mm256_store_si256
-#endif
-        (&out[j], _mm256_permute4x64_epi64(_mm256_max_epi8(
-            _mm256_packs_epi16(sum0, sum1), kZero), kControl));
-      }
-#elif defined(USE_SSSE3)
-      auto out = reinterpret_cast<__m128i*>(&output[offset]);
-      for (IndexType j = 0; j < kNumChunks; ++j) {
-        __m128i sum0 = _mm_load_si128(&reinterpret_cast<const __m128i*>(
-            accumulation[perspectives[p]][0])[j * 2 + 0]);
-        __m128i sum1 = _mm_load_si128(&reinterpret_cast<const __m128i*>(
-            accumulation[perspectives[p]][0])[j * 2 + 1]);
-        for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
-          sum0 = _mm_add_epi16(sum0, reinterpret_cast<const __m128i*>(
-              accumulation[perspectives[p]][i])[j * 2 + 0]);
-          sum1 = _mm_add_epi16(sum1, reinterpret_cast<const __m128i*>(
-              accumulation[perspectives[p]][i])[j * 2 + 1]);
-        }
-  	const __m128i packedbytes = _mm_packs_epi16(sum0, sum1);
- 
-        _mm_store_si128(&out[j],
-#ifdef USE_SSE41
-          _mm_max_epi8(packedbytes, kZero)
-#else
-          _mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s)
-#endif
-        );
-      }
-#elif defined(IS_ARM)
-      const auto out = reinterpret_cast<int8x8_t*>(&output[offset]);
-      for (IndexType j = 0; j < kNumChunks; ++j) {
-        int16x8_t sum = reinterpret_cast<const int16x8_t*>(
-            accumulation[perspectives[p]][0])[j];
-        for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
-          sum = vaddq_s16(sum, reinterpret_cast<const int16x8_t*>(
-              accumulation[perspectives[p]][i])[j]);
-        }
-        out[j] = vmax_s8(vqmovn_s16(sum), kZero);
-      }
-#else
-      for (IndexType j = 0; j < kHalfDimensions; ++j) {
-        BiasType sum = accumulation[static_cast<int>(perspectives[p])][0][j];
-        for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
-          sum += accumulation[static_cast<int>(perspectives[p])][i][j];
-        }
-        output[offset + j] = static_cast<OutputType>(
-            std::max<int>(0, std::min<int>(127, sum)));
-      }
-#endif
-    }
-  }
-
- private:
-  // Calculate cumulative value without using difference calculation
-  void RefreshAccumulator(const Position& pos) const {
-    auto& accumulator = pos.state()->accumulator;
-    for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
-      Features::IndexList active_indices[2];
-      RawFeatures::AppendActiveIndices(pos, kRefreshTriggers[i],
-                                       active_indices);
-      for (const auto perspective : Colors) {
-        if (i == 0) {
-          std::memcpy(accumulator.accumulation[perspective][i], biases_,
-                      kHalfDimensions * sizeof(BiasType));
-        } else {
-          std::memset(accumulator.accumulation[perspective][i], 0,
-                      kHalfDimensions * sizeof(BiasType));
-        }
-        for (const auto index : active_indices[perspective]) {
-          const IndexType offset = kHalfDimensions * index;
-#if defined(USE_AVX2)
-          auto accumulation = reinterpret_cast<__m256i*>(
-              &accumulator.accumulation[perspective][i][0]);
-          auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
-          constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
-          for (IndexType j = 0; j < kNumChunks; ++j) {
-#if defined(__MINGW32__) || defined(__MINGW64__)
-            _mm256_storeu_si256(&accumulation[j], _mm256_add_epi16(_mm256_loadu_si256(&accumulation[j]), column[j]));
-#else
-            accumulation[j] = _mm256_add_epi16(accumulation[j], column[j]);
-#endif
-          }
-#elif defined(USE_SSE2)
-          auto accumulation = reinterpret_cast<__m128i*>(
-              &accumulator.accumulation[perspective][i][0]);
-          auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
-          constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
-          for (IndexType j = 0; j < kNumChunks; ++j) {
-            accumulation[j] = _mm_add_epi16(accumulation[j], column[j]);
-          }
-#elif defined(IS_ARM)
-          auto accumulation = reinterpret_cast<int16x8_t*>(
-              &accumulator.accumulation[perspective][i][0]);
-          auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
-          constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
-          for (IndexType j = 0; j < kNumChunks; ++j) {
-            accumulation[j] = vaddq_s16(accumulation[j], column[j]);
-          }
-#else
-          for (IndexType j = 0; j < kHalfDimensions; ++j) {
-            accumulator.accumulation[perspective][i][j] += weights_[offset + j];
-          }
-#endif
-        }
-      }
-    }
-
-    accumulator.computed_accumulation = true;
-    accumulator.computed_score = false;
-  }
-
-  // Calculate cumulative value using difference calculation
-  void UpdateAccumulator(const Position& pos) const {
-    const auto prev_accumulator = pos.state()->previous->accumulator;
-    auto& accumulator = pos.state()->accumulator;
-    for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
-      Features::IndexList removed_indices[2], added_indices[2];
-      bool reset[2];
-      RawFeatures::AppendChangedIndices(pos, kRefreshTriggers[i],
-                                        removed_indices, added_indices, reset);
-      for (const auto perspective : Colors) {
-#if defined(USE_AVX2)
-        constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
-        auto accumulation = reinterpret_cast<__m256i*>(
-            &accumulator.accumulation[perspective][i][0]);
-#elif defined(USE_SSE2)
-        constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
-        auto accumulation = reinterpret_cast<__m128i*>(
-            &accumulator.accumulation[perspective][i][0]);
-#elif defined(IS_ARM)
-        constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
-        auto accumulation = reinterpret_cast<int16x8_t*>(
-            &accumulator.accumulation[perspective][i][0]);
-#endif
-        if (reset[perspective]) {
-          if (i == 0) {
-            std::memcpy(accumulator.accumulation[perspective][i], biases_,
-                        kHalfDimensions * sizeof(BiasType));
-          } else {
-            std::memset(accumulator.accumulation[perspective][i], 0,
-                        kHalfDimensions * sizeof(BiasType));
-          }
-        } else {// Difference calculation for the feature amount changed from 1 to 0
-          std::memcpy(accumulator.accumulation[perspective][i],
-                      prev_accumulator.accumulation[perspective][i],
-                      kHalfDimensions * sizeof(BiasType));
-          for (const auto index : removed_indices[perspective]) {
-            const IndexType offset = kHalfDimensions * index;
-#if defined(USE_AVX2)
-            auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
-            for (IndexType j = 0; j < kNumChunks; ++j) {
-              accumulation[j] = _mm256_sub_epi16(accumulation[j], column[j]);
-            }
-#elif defined(USE_SSE2)
-            auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
-            for (IndexType j = 0; j < kNumChunks; ++j) {
-              accumulation[j] = _mm_sub_epi16(accumulation[j], column[j]);
-            }
-#elif defined(IS_ARM)
-            auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
-            for (IndexType j = 0; j < kNumChunks; ++j) {
-              accumulation[j] = vsubq_s16(accumulation[j], column[j]);
-            }
-#else
-            for (IndexType j = 0; j < kHalfDimensions; ++j) {
-              accumulator.accumulation[perspective][i][j] -=
-                  weights_[offset + j];
-            }
-#endif
-          }
-        }
-        {// Difference calculation for features that changed from 0 to 1
-          for (const auto index : added_indices[perspective]) {
-            const IndexType offset = kHalfDimensions * index;
-#if defined(USE_AVX2)
-            auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
-            for (IndexType j = 0; j < kNumChunks; ++j) {
-              accumulation[j] = _mm256_add_epi16(accumulation[j], column[j]);
-            }
-#elif defined(USE_SSE2)
-            auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
-            for (IndexType j = 0; j < kNumChunks; ++j) {
-              accumulation[j] = _mm_add_epi16(accumulation[j], column[j]);
-            }
-#elif defined(IS_ARM)
-            auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
-            for (IndexType j = 0; j < kNumChunks; ++j) {
-              accumulation[j] = vaddq_s16(accumulation[j], column[j]);
-            }
-#else
-            for (IndexType j = 0; j < kHalfDimensions; ++j) {
-              accumulator.accumulation[perspective][i][j] +=
-                  weights_[offset + j];
-            }
-#endif
-          }
-        }
-      }
-    }
-
-    accumulator.computed_accumulation = true;
-    accumulator.computed_score = false;
-  }
-
-  // parameter type
-  using BiasType = std::int16_t;
-  using WeightType = std::int16_t;
-
-  // Make the learning class a friend
-  friend class Trainer<FeatureTransformer>;
-
-  // parameter
-  alignas(kCacheLineSize) BiasType biases_[kHalfDimensions];
-  alignas(kCacheLineSize)
-      WeightType weights_[kHalfDimensions * kInputDimensions];
-};
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
diff --git a/src/eval/nnue/nnue_test_command.cpp b/src/eval/nnue/nnue_test_command.cpp
deleted file mode 100644
index b0c57d4c..00000000
--- a/src/eval/nnue/nnue_test_command.cpp
+++ /dev/null
@@ -1,201 +0,0 @@
-﻿// USI extended command for NNUE evaluation function
-
-#if defined(ENABLE_TEST_CMD) && defined(EVAL_NNUE)
-
-#include "../../thread.h"
-#include "../../uci.h"
-#include "evaluate_nnue.h"
-#include "nnue_test_command.h"
-
-#include <set>
-#include <fstream>
-
-#define ASSERT(X) { if (!(X)) { std::cout << "\nError : ASSERT(" << #X << "), " << __FILE__ << "(" << __LINE__ << "): " << __func__ << std::endl; \
- std::this_thread::sleep_for(std::chrono::microseconds(3000)); *(int*)1 =0;} }
-
-namespace Eval {
-
-namespace NNUE {
-
-namespace {
-
-// Testing RawFeatures mainly for difference calculation
-void TestFeatures(Position& pos) {
-  const std::uint64_t num_games = 1000;
-  StateInfo si;
-  pos.set(StartFEN, false, &si, Threads.main());
-  const int MAX_PLY = 256; // test up to 256 hands
-
-  StateInfo state[MAX_PLY]; // StateInfo only for the maximum number of steps
-  int ply; // Trouble from the initial phase
-
-  PRNG prng(20171128);
-
-  std::uint64_t num_moves = 0;
-  std::vector<std::uint64_t> num_updates(kRefreshTriggers.size() + 1);
-  std::vector<std::uint64_t> num_resets(kRefreshTriggers.size());
-  constexpr IndexType kUnknown = -1;
-  std::vector<IndexType> trigger_map(RawFeatures::kDimensions, kUnknown);
-  auto make_index_sets = [&](const Position& pos) {
-    std::vector<std::vector<std::set<IndexType>>> index_sets(
-        kRefreshTriggers.size(), std::vector<std::set<IndexType>>(2));
-    for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
-      Features::IndexList active_indices[2];
-      RawFeatures::AppendActiveIndices(pos, kRefreshTriggers[i],
-                                       active_indices);
-      for (const auto perspective : Colors) {
-        for (const auto index : active_indices[perspective]) {
-          ASSERT(index < RawFeatures::kDimensions);
-          ASSERT(index_sets[i][perspective].count(index) == 0);
-          ASSERT(trigger_map[index] == kUnknown || trigger_map[index] == i);
-          index_sets[i][perspective].insert(index);
-          trigger_map[index] = i;
-        }
-      }
-    }
-    return index_sets;
-  };
-  auto update_index_sets = [&](const Position& pos, auto* index_sets) {
-    for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
-      Features::IndexList removed_indices[2], added_indices[2];
-      bool reset[2];
-      RawFeatures::AppendChangedIndices(pos, kRefreshTriggers[i],
-                                        removed_indices, added_indices, reset);
-      for (const auto perspective : Colors) {
-        if (reset[perspective]) {
-          (*index_sets)[i][perspective].clear();
-          ++num_resets[i];
-        } else {
-          for (const auto index : removed_indices[perspective]) {
-            ASSERT(index < RawFeatures::kDimensions);
-            ASSERT((*index_sets)[i][perspective].count(index) == 1);
-            ASSERT(trigger_map[index] == kUnknown || trigger_map[index] == i);
-            (*index_sets)[i][perspective].erase(index);
-            ++num_updates.back();
-            ++num_updates[i];
-            trigger_map[index] = i;
-          }
-        }
-        for (const auto index : added_indices[perspective]) {
-          ASSERT(index < RawFeatures::kDimensions);
-          ASSERT((*index_sets)[i][perspective].count(index) == 0);
-          ASSERT(trigger_map[index] == kUnknown || trigger_map[index] == i);
-          (*index_sets)[i][perspective].insert(index);
-          ++num_updates.back();
-          ++num_updates[i];
-          trigger_map[index] = i;
-        }
-      }
-    }
-  };
-
-  std::cout << "feature set: " << RawFeatures::GetName()
-            << "[" << RawFeatures::kDimensions << "]" << std::endl;
-  std::cout << "start testing with random games";
-
-  for (std::uint64_t i = 0; i < num_games; ++i) {
-    auto index_sets = make_index_sets(pos);
-    for (ply = 0; ply < MAX_PLY; ++ply) {
-      MoveList<LEGAL> mg(pos); // Generate all legal hands
-
-      // There was no legal move == Clog
-      if (mg.size() == 0)
-        break;
-
-      // Randomly choose from the generated moves and advance the phase with the moves.
-      Move m = mg.begin()[prng.rand(mg.size())];
-      pos.do_move(m, state[ply]);
-
-      ++num_moves;
-      update_index_sets(pos, &index_sets);
-      ASSERT(index_sets == make_index_sets(pos));
-    }
-
-    pos.set(StartFEN, false, &si, Threads.main());
-
-    // Output'.' every 100 times (so you can see that it's progressing)
-    if ((i % 100) == 0)
-      std::cout << "." << std::flush;
-  }
-  std::cout << "passed." << std::endl;
-  std::cout << num_games << " games, " << num_moves << " moves, "
-            << num_updates.back() << " updates, "
-            << (1.0 * num_updates.back() / num_moves)
-            << " updates per move" << std::endl;
-  std::size_t num_observed_indices = 0;
-  for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
-    const auto count = std::count(trigger_map.begin(), trigger_map.end(), i);
-    num_observed_indices += count;
-    std::cout << "TriggerEvent(" << static_cast<int>(kRefreshTriggers[i])
-              << "): " << count << " features ("
-              << (100.0 * count / RawFeatures::kDimensions) << "%), "
-              << num_updates[i] << " updates ("
-              << (1.0 * num_updates[i] / num_moves) << " per move), "
-              << num_resets[i] << " resets ("
-              << (100.0 * num_resets[i] / num_moves) << "%)"
-              << std::endl;
-  }
-  std::cout << "observed " << num_observed_indices << " ("
-            << (100.0 * num_observed_indices / RawFeatures::kDimensions)
-            << "% of " << RawFeatures::kDimensions
-            << ") features" << std::endl;
-}
-
-// Output a string that represents the structure of the evaluation function
-void PrintInfo(std::istream& stream) {
-  std::cout << "network architecture: " << GetArchitectureString() << std::endl;
-
-  while (true) {
-    std::string file_name;
-    stream >> file_name;
-    if (file_name.empty()) break;
-
-    std::uint32_t hash_value;
-    std::string architecture;
-    const bool success = [&]() {
-      std::ifstream file_stream(file_name, std::ios::binary);
-      if (!file_stream) return false;
-      if (!ReadHeader(file_stream, &hash_value, &architecture)) return false;
-      return true;
-    }();
-
-    std::cout << file_name << ": ";
-    if (success) {
-      if (hash_value == kHashValue) {
-        std::cout << "matches with this binary";
-        if (architecture != GetArchitectureString()) {
-          std::cout << ", but architecture string differs: " << architecture;
-        }
-        std::cout << std::endl;
-      } else {
-        std::cout << architecture << std::endl;
-      }
-    } else {
-      std::cout << "failed to read header" << std::endl;
-    }
-  }
-}
-
-}  // namespace
-
-// USI extended command for NNUE evaluation function
-void TestCommand(Position& pos, std::istream& stream) {
-  std::string sub_command;
-  stream >> sub_command;
-
-  if (sub_command == "test_features") {
-    TestFeatures(pos);
-  } else if (sub_command == "info") {
-    PrintInfo(stream);
-  } else {
-    std::cout << "usage:" << std::endl;
-    std::cout << " test nnue test_features" << std::endl;
-    std::cout << " test nnue info [path/to/" << fileName << "...]" << std::endl;
-  }
-}
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(ENABLE_TEST_CMD) && defined(EVAL_NNUE)
diff --git a/src/eval/nnue/nnue_test_command.h b/src/eval/nnue/nnue_test_command.h
deleted file mode 100644
index 570ef01b..00000000
--- a/src/eval/nnue/nnue_test_command.h
+++ /dev/null
@@ -1,21 +0,0 @@
-﻿// USI extended command interface for NNUE evaluation function
-
-#ifndef _NNUE_TEST_COMMAND_H_
-#define _NNUE_TEST_COMMAND_H_
-
-#if defined(ENABLE_TEST_CMD) && defined(EVAL_NNUE)
-
-namespace Eval {
-
-namespace NNUE {
-
-// USI extended command for NNUE evaluation function
-void TestCommand(Position& pos, std::istream& stream);
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(ENABLE_TEST_CMD) && defined(EVAL_NNUE)
-
-#endif
diff --git a/src/eval/nnue/trainer/features/factorizer.h b/src/eval/nnue/trainer/features/factorizer.h
deleted file mode 100644
index 148ee8ec..00000000
--- a/src/eval/nnue/trainer/features/factorizer.h
+++ /dev/null
@@ -1,110 +0,0 @@
-﻿// NNUE evaluation function feature conversion class template
-
-#ifndef _NNUE_TRAINER_FEATURES_FACTORIZER_H_
-#define _NNUE_TRAINER_FEATURES_FACTORIZER_H_
-
-#if defined(EVAL_NNUE)
-
-#include "../../nnue_common.h"
-#include "../trainer.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-namespace Features {
-
-// Class template that converts input features into learning features
-// By default, the learning feature is the same as the original input feature, and specialized as necessary
-template <typename FeatureType>
-class Factorizer {
- public:
-  // Get the dimensionality of the learning feature
-  static constexpr IndexType GetDimensions() {
-    return FeatureType::kDimensions;
-  }
-
-  // Get index of learning feature and scale of learning rate
-  static void AppendTrainingFeatures(
-      IndexType base_index, std::vector<TrainingFeature>* training_features) {
-    assert(base_index <FeatureType::kDimensions);
-    training_features->emplace_back(base_index);
-  }
-};
-
-// Learning feature information
-struct FeatureProperties {
-  bool active;
-  IndexType dimensions;
-};
-
-// Add the original input features to the learning features
-template <typename FeatureType>
-IndexType AppendBaseFeature(
-    FeatureProperties properties, IndexType base_index,
-    std::vector<TrainingFeature>* training_features) {
-  assert(properties.dimensions == FeatureType::kDimensions);
-  assert(base_index < FeatureType::kDimensions);
-  training_features->emplace_back(base_index);
-  return properties.dimensions;
-}
-
-// If the learning rate scale is not 0, inherit other types of learning features
-template <typename FeatureType>
-IndexType InheritFeaturesIfRequired(
-    IndexType index_offset, FeatureProperties properties, IndexType base_index,
-    std::vector<TrainingFeature>* training_features) {
-  if (!properties.active) {
-    return 0;
-  }
-  assert(properties.dimensions == Factorizer<FeatureType>::GetDimensions());
-  assert(base_index < FeatureType::kDimensions);
-  const auto start = training_features->size();
-  Factorizer<FeatureType>::AppendTrainingFeatures(
-      base_index, training_features);
-  for (auto i = start; i < training_features->size(); ++i) {
-    auto& feature = (*training_features)[i];
-    assert(feature.GetIndex() < Factorizer<FeatureType>::GetDimensions());
-    feature.ShiftIndex(index_offset);
-  }
-  return properties.dimensions;
-}
-
-// Return the index difference as needed, without adding learning features
-// Call instead of InheritFeaturesIfRequired() if there are no corresponding features
-IndexType SkipFeatures(FeatureProperties properties) {
-  if (!properties.active) {
-    return 0;
-  }
-  return properties.dimensions;
-}
-
-// Get the dimensionality of the learning feature
-template <std::size_t N>
-constexpr IndexType GetActiveDimensions(
-    const FeatureProperties (&properties)[N]) {
-  static_assert(N > 0, "");
-  IndexType dimensions = properties[0].dimensions;
-  for (std::size_t i = 1; i < N; ++i) {
-    if (properties[i].active) {
-      dimensions += properties[i].dimensions;
-    }
-  }
-  return dimensions;
-}
-
-// get the number of elements in the array
-template <typename T, std::size_t N>
-constexpr std::size_t GetArrayLength(const T (&/*array*/)[N]) {
-  return N;
-}
-
-}  // namespace Features
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
diff --git a/src/eval/nnue/trainer/features/factorizer_feature_set.h b/src/eval/nnue/trainer/features/factorizer_feature_set.h
deleted file mode 100644
index af524719..00000000
--- a/src/eval/nnue/trainer/features/factorizer_feature_set.h
+++ /dev/null
@@ -1,104 +0,0 @@
-﻿// Specialization for feature set of feature conversion class template of NNUE evaluation function
-
-#ifndef _NNUE_TRAINER_FEATURES_FACTORIZER_FEATURE_SET_H_
-#define _NNUE_TRAINER_FEATURES_FACTORIZER_FEATURE_SET_H_
-
-#if defined(EVAL_NNUE)
-
-#include "../../features/feature_set.h"
-#include "factorizer.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-namespace Features {
-
-// Class template that converts input features into learning features
-// Specialization for FeatureSet
-template <typename FirstFeatureType, typename... RemainingFeatureTypes>
-class Factorizer<FeatureSet<FirstFeatureType, RemainingFeatureTypes...>> {
- private:
-  using Head = Factorizer<FeatureSet<FirstFeatureType>>;
-  using Tail = Factorizer<FeatureSet<RemainingFeatureTypes...>>;
-
- public:
-  // number of dimensions of original input features
-  static constexpr IndexType kBaseDimensions =
-      FeatureSet<FirstFeatureType, RemainingFeatureTypes...>::kDimensions;
-
-  // Get the dimensionality of the learning feature
-  static constexpr IndexType GetDimensions() {
-    return Head::GetDimensions() + Tail::GetDimensions();
-  }
-
-  // Get index of learning feature and scale of learning rate
-  static void AppendTrainingFeatures(
-      IndexType base_index, std::vector<TrainingFeature>* training_features,
-      IndexType base_dimensions = kBaseDimensions) {
-    assert(base_index < kBaseDimensions);
-    constexpr auto boundary = FeatureSet<RemainingFeatureTypes...>::kDimensions;
-    if (base_index < boundary) {
-      Tail::AppendTrainingFeatures(
-          base_index, training_features, base_dimensions);
-    } else {
-      const auto start = training_features->size();
-      Head::AppendTrainingFeatures(
-          base_index - boundary, training_features, base_dimensions);
-      for (auto i = start; i < training_features->size(); ++i) {
-        auto& feature = (*training_features)[i];
-        const auto index = feature.GetIndex();
-        assert(index < Head::GetDimensions() ||
-                   (index >= base_dimensions &&
-                    index < base_dimensions +
-                            Head::GetDimensions() - Head::kBaseDimensions));
-        if (index < Head::kBaseDimensions) {
-          feature.ShiftIndex(Tail::kBaseDimensions);
-        } else {
-          feature.ShiftIndex(Tail::GetDimensions() - Tail::kBaseDimensions);
-        }
-      }
-    }
-  }
-};
-
-// Class template that converts input features into learning features
-// Specialization when FeatureSet has one template argument
-template <typename FeatureType>
-class Factorizer<FeatureSet<FeatureType>> {
-public:
-  // number of dimensions of original input features
-  static constexpr IndexType kBaseDimensions = FeatureType::kDimensions;
-
-  // Get the dimensionality of the learning feature
-  static constexpr IndexType GetDimensions() {
-    return Factorizer<FeatureType>::GetDimensions();
-  }
-
-  // Get index of learning feature and scale of learning rate
-  static void AppendTrainingFeatures(
-      IndexType base_index, std::vector<TrainingFeature>* training_features,
-      IndexType base_dimensions = kBaseDimensions) {
-    assert(base_index < kBaseDimensions);
-    const auto start = training_features->size();
-    Factorizer<FeatureType>::AppendTrainingFeatures(
-        base_index, training_features);
-    for (auto i = start; i < training_features->size(); ++i) {
-      auto& feature = (*training_features)[i];
-      assert(feature.GetIndex() < Factorizer<FeatureType>::GetDimensions());
-      if (feature.GetIndex() >= kBaseDimensions) {
-        feature.ShiftIndex(base_dimensions - kBaseDimensions);
-      }
-    }
-  }
-};
-
-}  // namespace Features
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
diff --git a/src/eval/nnue/trainer/features/factorizer_half_kp.h b/src/eval/nnue/trainer/features/factorizer_half_kp.h
deleted file mode 100644
index a5363771..00000000
--- a/src/eval/nnue/trainer/features/factorizer_half_kp.h
+++ /dev/null
@@ -1,103 +0,0 @@
-﻿// Specialization of NNUE evaluation function feature conversion class template for HalfKP
-
-#ifndef _NNUE_TRAINER_FEATURES_FACTORIZER_HALF_KP_H_
-#define _NNUE_TRAINER_FEATURES_FACTORIZER_HALF_KP_H_
-
-#if defined(EVAL_NNUE)
-
-#include "../../features/half_kp.h"
-#include "../../features/p.h"
-#include "../../features/half_relative_kp.h"
-#include "factorizer.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-namespace Features {
-
-// Class template that converts input features into learning features
-// Specialization for HalfKP
-template <Side AssociatedKing>
-class Factorizer<HalfKP<AssociatedKing>> {
- private:
-  using FeatureType = HalfKP<AssociatedKing>;
-
-  // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
-  static constexpr IndexType kMaxActiveDimensions =
-      FeatureType::kMaxActiveDimensions;
-
-  // Type of learning feature
-  enum TrainingFeatureType {
-    kFeaturesHalfKP,
-    kFeaturesHalfK,
-    kFeaturesP,
-    kFeaturesHalfRelativeKP,
-    kNumTrainingFeatureTypes,
-  };
-
-  // Learning feature information
-  static constexpr FeatureProperties kProperties[] = {
-    // kFeaturesHalfKP
-    {true, FeatureType::kDimensions},
-    // kFeaturesHalfK
-    {true, SQUARE_NB},
-    // kFeaturesP
-    {true, Factorizer<P>::GetDimensions()},
-    // kFeaturesHalfRelativeKP
-    {true, Factorizer<HalfRelativeKP<AssociatedKing>>::GetDimensions()},
-  };
-  static_assert(GetArrayLength(kProperties) == kNumTrainingFeatureTypes, "");
-
- public:
-  // Get the dimensionality of the learning feature
-  static constexpr IndexType GetDimensions() {
-    return GetActiveDimensions(kProperties);
-  }
-
-  // Get index of learning feature and scale of learning rate
-  static void AppendTrainingFeatures(
-      IndexType base_index, std::vector<TrainingFeature>* training_features) {
-    // kFeaturesHalfKP
-    IndexType index_offset = AppendBaseFeature<FeatureType>(
-        kProperties[kFeaturesHalfKP], base_index, training_features);
-
-    const auto sq_k = static_cast<Square>(base_index / fe_end);
-    const auto p = static_cast<BonaPiece>(base_index % fe_end);
-    // kFeaturesHalfK
-    {
-      const auto& properties = kProperties[kFeaturesHalfK];
-      if (properties.active) {
-        training_features->emplace_back(index_offset + sq_k);
-        index_offset += properties.dimensions;
-      }
-    }
-    // kFeaturesP
-    index_offset += InheritFeaturesIfRequired<P>(
-        index_offset, kProperties[kFeaturesP], p, training_features);
-    // kFeaturesHalfRelativeKP
-    if (p >= fe_hand_end) {
-      index_offset += InheritFeaturesIfRequired<HalfRelativeKP<AssociatedKing>>(
-          index_offset, kProperties[kFeaturesHalfRelativeKP],
-          HalfRelativeKP<AssociatedKing>::MakeIndex(sq_k, p),
-          training_features);
-    } else {
-      index_offset += SkipFeatures(kProperties[kFeaturesHalfRelativeKP]);
-    }
-
-    assert(index_offset == GetDimensions());
-  }
-};
-
-template <Side AssociatedKing>
-constexpr FeatureProperties Factorizer<HalfKP<AssociatedKing>>::kProperties[];
-
-}  // namespace Features
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
diff --git a/src/eval/nnue/trainer/trainer.h b/src/eval/nnue/trainer/trainer.h
deleted file mode 100644
index 49400bbe..00000000
--- a/src/eval/nnue/trainer/trainer.h
+++ /dev/null
@@ -1,125 +0,0 @@
-﻿// Common header of class template for learning NNUE evaluation function
-
-#ifndef _NNUE_TRAINER_H_
-#define _NNUE_TRAINER_H_
-
-#if defined(EVAL_LEARN) && defined(EVAL_NNUE)
-
-#include "../nnue_common.h"
-#include "../features/index_list.h"
-
-#include <sstream>
-#if defined(USE_BLAS)
-static_assert(std::is_same<LearnFloatType, float>::value, "");
-#include <cblas.h>
-#endif
-
-namespace Eval {
-
-namespace NNUE {
-
-// Ponanza constant used in the relation between evaluation value and winning percentage
-constexpr double kPonanzaConstant = 600.0;
-
-// Class that represents one index of learning feature
-class TrainingFeature {
-  using StorageType = std::uint32_t;
-  static_assert(std::is_unsigned<StorageType>::value, "");
-
- public:
-  static constexpr std::uint32_t kIndexBits = 24;
-  static_assert(kIndexBits < std::numeric_limits<StorageType>::digits, "");
-  static constexpr std::uint32_t kCountBits =
-      std::numeric_limits<StorageType>::digits - kIndexBits;
-
-  explicit TrainingFeature(IndexType index) :
-      index_and_count_((index << kCountBits) | 1) {
-    assert(index < (1 << kIndexBits));
-  }
-  TrainingFeature& operator+=(const TrainingFeature& other) {
-    assert(other.GetIndex() == GetIndex());
-    assert(other.GetCount() + GetCount() < (1 << kCountBits));
-    index_and_count_ += other.GetCount();
-    return *this;
-  }
-  IndexType GetIndex() const {
-    return static_cast<IndexType>(index_and_count_ >> kCountBits);
-  }
-  void ShiftIndex(IndexType offset) {
-    assert(GetIndex() + offset < (1 << kIndexBits));
-    index_and_count_ += offset << kCountBits;
-  }
-  IndexType GetCount() const {
-    return static_cast<IndexType>(index_and_count_ & ((1 << kCountBits) - 1));
-  }
-  bool operator<(const TrainingFeature& other) const {
-    return index_and_count_ < other.index_and_count_;
-  }
-
- private:
-  StorageType index_and_count_;
-};
-
-// Structure that represents one sample of training data
-struct Example {
-  std::vector<TrainingFeature> training_features[2];
-  Learner::PackedSfenValue psv;
-  int sign;
-  double weight;
-};
-
-// Message used for setting hyperparameters
-struct Message {
-  Message(const std::string& name, const std::string& value = ""):
-      name(name), value(value), num_peekers(0), num_receivers(0) {}
-  const std::string name;
-  const std::string value;
-  std::uint32_t num_peekers;
-  std::uint32_t num_receivers;
-};
-
-// determine whether to accept the message
-bool ReceiveMessage(const std::string& name, Message* message) {
-  const auto subscript = "[" + std::to_string(message->num_peekers) + "]";
-  if (message->name.substr(0, name.size() + 1) == name + "[") {
-    ++message->num_peekers;
-  }
-  if (message->name == name || message->name == name + subscript) {
-    ++message->num_receivers;
-    return true;
-  }
-  return false;
-}
-
-// split the string
-std::vector<std::string> Split(const std::string& input, char delimiter) {
-  std::istringstream stream(input);
-  std::string field;
-  std::vector<std::string> fields;
-  while (std::getline(stream, field, delimiter)) {
-    fields.push_back(field);
-  }
-  return fields;
-}
-
-// round a floating point number to an integer
-template <typename IntType>
-IntType Round(double value) {
-  return static_cast<IntType>(std::floor(value + 0.5));
-}
-
-// make_shared with alignment
-template <typename T, typename... ArgumentTypes>
-std::shared_ptr<T> MakeAlignedSharedPtr(ArgumentTypes&&... arguments) {
-  const auto ptr = new(aligned_malloc(sizeof(T), alignof(T)))
-      T(std::forward<ArgumentTypes>(arguments)...);
-  return std::shared_ptr<T>(ptr, AlignedDeleter<T>());
-}
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_LEARN) && defined(EVAL_NNUE)
-
-#endif
diff --git a/src/eval/nnue/trainer/trainer_affine_transform.h b/src/eval/nnue/trainer/trainer_affine_transform.h
deleted file mode 100644
index f5b208a3..00000000
--- a/src/eval/nnue/trainer/trainer_affine_transform.h
+++ /dev/null
@@ -1,301 +0,0 @@
-﻿// Specialization of NNUE evaluation function learning class template for AffineTransform
-
-#ifndef _NNUE_TRAINER_AFFINE_TRANSFORM_H_
-#define _NNUE_TRAINER_AFFINE_TRANSFORM_H_
-
-#if defined(EVAL_LEARN) && defined(EVAL_NNUE)
-
-#include "../../../learn/learn.h"
-#include "../layers/affine_transform.h"
-#include "trainer.h"
-
-#include <random>
-
-namespace Eval {
-
-namespace NNUE {
-
-// Learning: Affine transformation layer
-template <typename PreviousLayer, IndexType OutputDimensions>
-class Trainer<Layers::AffineTransform<PreviousLayer, OutputDimensions>> {
- private:
-  // Type of layer to learn
-  using LayerType = Layers::AffineTransform<PreviousLayer, OutputDimensions>;
-
- public:
-  // factory function
-  static std::shared_ptr<Trainer> Create(
-      LayerType* target_layer, FeatureTransformer* feature_transformer) {
-    return std::shared_ptr<Trainer>(
-        new Trainer(target_layer, feature_transformer));
-  }
-
-  // Set options such as hyperparameters
-  void SendMessage(Message* message) {
-    previous_layer_trainer_->SendMessage(message);
-    if (ReceiveMessage("momentum", message)) {
-      momentum_ = static_cast<LearnFloatType>(std::stod(message->value));
-    }
-    if (ReceiveMessage("learning_rate_scale", message)) {
-      learning_rate_scale_ =
-          static_cast<LearnFloatType>(std::stod(message->value));
-    }
-    if (ReceiveMessage("reset", message)) {
-      DequantizeParameters();
-    }
-    if (ReceiveMessage("quantize_parameters", message)) {
-      QuantizeParameters();
-    }
-  }
-
-  // Initialize the parameters with random numbers
-  template <typename RNG>
-  void Initialize(RNG& rng) {
-    previous_layer_trainer_->Initialize(rng);
-    if (kIsOutputLayer) {
-      // Initialize output layer with 0
-      std::fill(std::begin(biases_), std::end(biases_),
-                static_cast<LearnFloatType>(0.0));
-      std::fill(std::begin(weights_), std::end(weights_),
-                static_cast<LearnFloatType>(0.0));
-    } else {
-      // Assuming that the input distribution is unit-mean 0.5, equal variance,
-      // Initialize the output distribution so that each unit has a mean of 0.5 and the same variance as the input
-      const double kSigma = 1.0 / std::sqrt(kInputDimensions);
-      auto distribution = std::normal_distribution<double>(0.0, kSigma);
-      for (IndexType i = 0; i < kOutputDimensions; ++i) {
-        double sum = 0.0;
-        for (IndexType j = 0; j < kInputDimensions; ++j) {
-          const auto weight = static_cast<LearnFloatType>(distribution(rng));
-          weights_[kInputDimensions * i + j] = weight;
-          sum += weight;
-        }
-        biases_[i] = static_cast<LearnFloatType>(0.5 - 0.5 * sum);
-      }
-    }
-    QuantizeParameters();
-  }
-
-  // forward propagation
-  const LearnFloatType* Propagate(const std::vector<Example>& batch) {
-    if (output_.size() < kOutputDimensions * batch.size()) {
-      output_.resize(kOutputDimensions * batch.size());
-      gradients_.resize(kInputDimensions * batch.size());
-    }
-    batch_size_ = static_cast<IndexType>(batch.size());
-    batch_input_ = previous_layer_trainer_->Propagate(batch);
-#if defined(USE_BLAS)
-    for (IndexType b = 0; b < batch_size_; ++b) {
-      const IndexType batch_offset = kOutputDimensions * b;
-      cblas_scopy(kOutputDimensions, biases_, 1, &output_[batch_offset], 1);
-    }
-    cblas_sgemm(CblasColMajor, CblasTrans, CblasNoTrans,
-                kOutputDimensions, batch_size_, kInputDimensions, 1.0,
-                weights_, kInputDimensions,
-                batch_input_, kInputDimensions,
-                1.0, &output_[0], kOutputDimensions);
-#else
-    for (IndexType b = 0; b < batch_size_; ++b) {
-      const IndexType input_batch_offset = kInputDimensions * b;
-      const IndexType output_batch_offset = kOutputDimensions * b;
-      for (IndexType i = 0; i < kOutputDimensions; ++i) {
-        double sum = biases_[i];
-        for (IndexType j = 0; j < kInputDimensions; ++j) {
-          const IndexType index = kInputDimensions * i + j;
-          sum += weights_[index] * batch_input_[input_batch_offset + j];
-        }
-        output_[output_batch_offset + i] = static_cast<LearnFloatType>(sum);
-      }
-    }
-#endif
-    return output_.data();
-  }
-
-  // backpropagation
-  void Backpropagate(const LearnFloatType* gradients,
-                     LearnFloatType learning_rate) {
-    const LearnFloatType local_learning_rate =
-        learning_rate * learning_rate_scale_;
-#if defined(USE_BLAS)
-    // backpropagate
-    cblas_sgemm(CblasColMajor, CblasNoTrans, CblasNoTrans,
-                kInputDimensions, batch_size_, kOutputDimensions, 1.0,
-                weights_, kInputDimensions,
-                gradients, kOutputDimensions,
-                0.0, &gradients_[0], kInputDimensions);
-    // update
-    cblas_sscal(kOutputDimensions, momentum_, biases_diff_, 1);
-    for (IndexType b = 0; b < batch_size_; ++b) {
-      const IndexType batch_offset = kOutputDimensions * b;
-      cblas_saxpy(kOutputDimensions, 1.0,
-                  &gradients[batch_offset], 1, biases_diff_, 1);
-    }
-    cblas_saxpy(kOutputDimensions, -local_learning_rate,
-                biases_diff_, 1, biases_, 1);
-    cblas_sgemm(CblasRowMajor, CblasTrans, CblasNoTrans,
-                kOutputDimensions, kInputDimensions, batch_size_, 1.0,
-                gradients, kOutputDimensions,
-                batch_input_, kInputDimensions,
-                momentum_, weights_diff_, kInputDimensions);
-    cblas_saxpy(kOutputDimensions * kInputDimensions, -local_learning_rate,
-                weights_diff_, 1, weights_, 1);
-#else
-    // backpropagate
-    for (IndexType b = 0; b < batch_size_; ++b) {
-      const IndexType input_batch_offset = kInputDimensions * b;
-      const IndexType output_batch_offset = kOutputDimensions * b;
-      for (IndexType j = 0; j < kInputDimensions; ++j) {
-        double sum = 0.0;
-        for (IndexType i = 0; i < kOutputDimensions; ++i) {
-          const IndexType index = kInputDimensions * i + j;
-          sum += weights_[index] * gradients[output_batch_offset + i];
-        }
-        gradients_[input_batch_offset + j] = static_cast<LearnFloatType>(sum);
-      }
-    }
-    // update
-    for (IndexType i = 0; i < kOutputDimensions; ++i) {
-      biases_diff_[i] *= momentum_;
-    }
-    for (IndexType i = 0; i < kOutputDimensions * kInputDimensions; ++i) {
-      weights_diff_[i] *= momentum_;
-    }
-    for (IndexType b = 0; b < batch_size_; ++b) {
-      const IndexType input_batch_offset = kInputDimensions * b;
-      const IndexType output_batch_offset = kOutputDimensions * b;
-      for (IndexType i = 0; i < kOutputDimensions; ++i) {
-        biases_diff_[i] += gradients[output_batch_offset + i];
-      }
-      for (IndexType i = 0; i < kOutputDimensions; ++i) {
-        for (IndexType j = 0; j < kInputDimensions; ++j) {
-          const IndexType index = kInputDimensions * i + j;
-          weights_diff_[index] += gradients[output_batch_offset + i] *
-              batch_input_[input_batch_offset + j];
-        }
-      }
-    }
-    for (IndexType i = 0; i < kOutputDimensions; ++i) {
-      biases_[i] -= local_learning_rate * biases_diff_[i];
-    }
-    for (IndexType i = 0; i < kOutputDimensions * kInputDimensions; ++i) {
-      weights_[i] -= local_learning_rate * weights_diff_[i];
-    }
-#endif
-    previous_layer_trainer_->Backpropagate(gradients_.data(), learning_rate);
-  }
-
- private:
-  // constructor
-  Trainer(LayerType* target_layer, FeatureTransformer* feature_transformer) :
-      batch_size_(0),
-      batch_input_(nullptr),
-      previous_layer_trainer_(Trainer<PreviousLayer>::Create(
-          &target_layer->previous_layer_, feature_transformer)),
-      target_layer_(target_layer),
-      biases_(),
-      weights_(),
-      biases_diff_(),
-      weights_diff_(),
-      momentum_(0.0),
-      learning_rate_scale_(1.0) {
-    DequantizeParameters();
-  }
-
-  // Weight saturation and parameterization
-  void QuantizeParameters() {
-    for (IndexType i = 0; i < kOutputDimensions * kInputDimensions; ++i) {
-      weights_[i] = std::max(-kMaxWeightMagnitude,
-                             std::min(+kMaxWeightMagnitude, weights_[i]));
-    }
-    for (IndexType i = 0; i < kOutputDimensions; ++i) {
-      target_layer_->biases_[i] =
-          Round<typename LayerType::BiasType>(biases_[i] * kBiasScale);
-    }
-    for (IndexType i = 0; i < kOutputDimensions; ++i) {
-      const auto offset = kInputDimensions * i;
-      const auto padded_offset = LayerType::kPaddedInputDimensions * i;
-      for (IndexType j = 0; j < kInputDimensions; ++j) {
-        target_layer_->weights_[padded_offset + j] =
-            Round<typename LayerType::WeightType>(
-                weights_[offset + j] * kWeightScale);
-      }
-    }
-  }
-
-  // read parameterized integer
-  void DequantizeParameters() {
-    for (IndexType i = 0; i < kOutputDimensions; ++i) {
-      biases_[i] = static_cast<LearnFloatType>(
-          target_layer_->biases_[i] / kBiasScale);
-    }
-    for (IndexType i = 0; i < kOutputDimensions; ++i) {
-      const auto offset = kInputDimensions * i;
-      const auto padded_offset = LayerType::kPaddedInputDimensions * i;
-      for (IndexType j = 0; j < kInputDimensions; ++j) {
-        weights_[offset + j] = static_cast<LearnFloatType>(
-            target_layer_->weights_[padded_offset + j] / kWeightScale);
-      }
-    }
-    std::fill(std::begin(biases_diff_), std::end(biases_diff_),
-              static_cast<LearnFloatType>(0.0));
-    std::fill(std::begin(weights_diff_), std::end(weights_diff_),
-              static_cast<LearnFloatType>(0.0));
-  }
-
-  // number of input/output dimensions
-  static constexpr IndexType kInputDimensions = LayerType::kInputDimensions;
-  static constexpr IndexType kOutputDimensions = LayerType::kOutputDimensions;
-
-  // If the output dimensionality is 1, the output layer
-  static constexpr bool kIsOutputLayer = kOutputDimensions == 1;
-
-  // Coefficient used for parameterization
-  static constexpr LearnFloatType kActivationScale =
-      std::numeric_limits<std::int8_t>::max();
-  static constexpr LearnFloatType kBiasScale = kIsOutputLayer ?
-      (kPonanzaConstant * FV_SCALE) :
-      ((1 << kWeightScaleBits) * kActivationScale);
-  static constexpr LearnFloatType kWeightScale = kBiasScale / kActivationScale;
-
-  // Upper limit of absolute value of weight used to prevent overflow when parameterizing integers
-  static constexpr LearnFloatType kMaxWeightMagnitude =
-      std::numeric_limits<typename LayerType::WeightType>::max() / kWeightScale;
-
-  // number of samples in mini-batch
-  IndexType batch_size_;
-
-  // Input mini batch
-  const LearnFloatType* batch_input_;
-
-  // Trainer of the previous layer
-  const std::shared_ptr<Trainer<PreviousLayer>> previous_layer_trainer_;
-
-  // layer to learn
-  LayerType* const target_layer_;
-
-  // parameter
-  LearnFloatType biases_[kOutputDimensions];
-  LearnFloatType weights_[kOutputDimensions * kInputDimensions];
-
-  // Buffer used for updating parameters
-  LearnFloatType biases_diff_[kOutputDimensions];
-  LearnFloatType weights_diff_[kOutputDimensions * kInputDimensions];
-
-  // Forward propagation buffer
-  std::vector<LearnFloatType> output_;
-
-  // buffer for back propagation
-  std::vector<LearnFloatType> gradients_;
-
-  // hyper parameter
-  LearnFloatType momentum_;
-  LearnFloatType learning_rate_scale_;
-};
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_LEARN) && defined(EVAL_NNUE)
-
-#endif
diff --git a/src/eval/nnue/trainer/trainer_clipped_relu.h b/src/eval/nnue/trainer/trainer_clipped_relu.h
deleted file mode 100644
index 566ed777..00000000
--- a/src/eval/nnue/trainer/trainer_clipped_relu.h
+++ /dev/null
@@ -1,142 +0,0 @@
-﻿// Specialization of NNUE evaluation function learning class template for ClippedReLU
-
-#ifndef _NNUE_TRAINER_CLIPPED_RELU_H_
-#define _NNUE_TRAINER_CLIPPED_RELU_H_
-
-#if defined(EVAL_LEARN) && defined(EVAL_NNUE)
-
-#include "../../../learn/learn.h"
-#include "../layers/clipped_relu.h"
-#include "trainer.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-// Learning: Affine transformation layer
-template <typename PreviousLayer>
-class Trainer<Layers::ClippedReLU<PreviousLayer>> {
- private:
-  // Type of layer to learn
-  using LayerType = Layers::ClippedReLU<PreviousLayer>;
-
- public:
-  // factory function
-  static std::shared_ptr<Trainer> Create(
-      LayerType* target_layer, FeatureTransformer* feature_transformer) {
-    return std::shared_ptr<Trainer>(
-        new Trainer(target_layer, feature_transformer));
-  }
-
-  // Set options such as hyperparameters
-  void SendMessage(Message* message) {
-    previous_layer_trainer_->SendMessage(message);
-    if (ReceiveMessage("check_health", message)) {
-      CheckHealth();
-    }
-  }
-
-  // Initialize the parameters with random numbers
-  template <typename RNG>
-  void Initialize(RNG& rng) {
-    previous_layer_trainer_->Initialize(rng);
-  }
-
-  // forward propagation
-  const LearnFloatType* Propagate(const std::vector<Example>& batch) {
-    if (output_.size() < kOutputDimensions * batch.size()) {
-      output_.resize(kOutputDimensions * batch.size());
-      gradients_.resize(kInputDimensions * batch.size());
-    }
-    const auto input = previous_layer_trainer_->Propagate(batch);
-    batch_size_ = static_cast<IndexType>(batch.size());
-    for (IndexType b = 0; b < batch_size_; ++b) {
-      const IndexType batch_offset = kOutputDimensions * b;
-      for (IndexType i = 0; i < kOutputDimensions; ++i) {
-        const IndexType index = batch_offset + i;
-        output_[index] = std::max(+kZero, std::min(+kOne, input[index]));
-        min_activations_[i] = std::min(min_activations_[i], output_[index]);
-        max_activations_[i] = std::max(max_activations_[i], output_[index]);
-      }
-    }
-    return output_.data();
-  }
-
-  // backpropagation
-  void Backpropagate(const LearnFloatType* gradients,
-                     LearnFloatType learning_rate) {
-    for (IndexType b = 0; b < batch_size_; ++b) {
-      const IndexType batch_offset = kOutputDimensions * b;
-      for (IndexType i = 0; i < kOutputDimensions; ++i) {
-        const IndexType index = batch_offset + i;
-        gradients_[index] = gradients[index] *
-            (output_[index] > kZero) * (output_[index] < kOne);
-      }
-    }
-    previous_layer_trainer_->Backpropagate(gradients_.data(), learning_rate);
-  }
-
- private:
-  // constructor
-  Trainer(LayerType* target_layer, FeatureTransformer* feature_transformer) :
-      batch_size_(0),
-      previous_layer_trainer_(Trainer<PreviousLayer>::Create(
-          &target_layer->previous_layer_, feature_transformer)),
-      target_layer_(target_layer) {
-    std::fill(std::begin(min_activations_), std::end(min_activations_),
-              std::numeric_limits<LearnFloatType>::max());
-    std::fill(std::begin(max_activations_), std::end(max_activations_),
-              std::numeric_limits<LearnFloatType>::lowest());
-  }
-
-  // Check if there are any problems with learning
-  void CheckHealth() {
-    const auto largest_min_activation = *std::max_element(
-        std::begin(min_activations_), std::end(min_activations_));
-    const auto smallest_max_activation = *std::min_element(
-        std::begin(max_activations_), std::end(max_activations_));
-    std::cout << "INFO: largest min activation = " << largest_min_activation
-              << ", smallest max activation = " << smallest_max_activation
-              << std::endl;
-
-    std::fill(std::begin(min_activations_), std::end(min_activations_),
-              std::numeric_limits<LearnFloatType>::max());
-    std::fill(std::begin(max_activations_), std::end(max_activations_),
-              std::numeric_limits<LearnFloatType>::lowest());
-  }
-
-  // number of input/output dimensions
-  static constexpr IndexType kInputDimensions = LayerType::kOutputDimensions;
-  static constexpr IndexType kOutputDimensions = LayerType::kOutputDimensions;
-
-  // LearnFloatType constant
-  static constexpr LearnFloatType kZero = static_cast<LearnFloatType>(0.0);
-  static constexpr LearnFloatType kOne = static_cast<LearnFloatType>(1.0);
-
-  // number of samples in mini-batch
-  IndexType batch_size_;
-
-  // Trainer of the previous layer
-  const std::shared_ptr<Trainer<PreviousLayer>> previous_layer_trainer_;
-
-  // layer to learn
-  LayerType* const target_layer_;
-
-  // Forward propagation buffer
-  std::vector<LearnFloatType> output_;
-
-  // buffer for back propagation
-  std::vector<LearnFloatType> gradients_;
-
-  // Health check statistics
-  LearnFloatType min_activations_[kOutputDimensions];
-  LearnFloatType max_activations_[kOutputDimensions];
-};
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_LEARN) && defined(EVAL_NNUE)
-
-#endif
diff --git a/src/eval/nnue/trainer/trainer_feature_transformer.h b/src/eval/nnue/trainer/trainer_feature_transformer.h
deleted file mode 100644
index 0139d534..00000000
--- a/src/eval/nnue/trainer/trainer_feature_transformer.h
+++ /dev/null
@@ -1,377 +0,0 @@
-﻿// Specialization for feature transformer of learning class template of NNUE evaluation function
-
-#ifndef _NNUE_TRAINER_FEATURE_TRANSFORMER_H_
-#define _NNUE_TRAINER_FEATURE_TRANSFORMER_H_
-
-#if defined(EVAL_LEARN) && defined(EVAL_NNUE)
-
-#include "../../../learn/learn.h"
-#include "../nnue_feature_transformer.h"
-#include "trainer.h"
-#include "features/factorizer_feature_set.h"
-
-#include <array>
-#include <bitset>
-#include <numeric>
-#include <random>
-#include <set>
-
-#if defined(_OPENMP)
-#include <omp.h>
-#endif
-
-namespace Eval {
-
-namespace NNUE {
-
-// Learning: Input feature converter
-template <>
-class Trainer<FeatureTransformer> {
- private:
-  // Type of layer to learn
-  using LayerType = FeatureTransformer;
-
- public:
-  template <typename T>
-  friend struct AlignedDeleter;
-  template <typename T, typename... ArgumentTypes>
-  friend std::shared_ptr<T> MakeAlignedSharedPtr(ArgumentTypes&&... arguments);
-
-  // factory function
-  static std::shared_ptr<Trainer> Create(LayerType* target_layer) {
-    return MakeAlignedSharedPtr<Trainer>(target_layer);
-  }
-
-  // Set options such as hyperparameters
-  void SendMessage(Message* message) {
-    if (ReceiveMessage("momentum", message)) {
-      momentum_ = static_cast<LearnFloatType>(std::stod(message->value));
-    }
-    if (ReceiveMessage("learning_rate_scale", message)) {
-      learning_rate_scale_ =
-          static_cast<LearnFloatType>(std::stod(message->value));
-    }
-    if (ReceiveMessage("reset", message)) {
-      DequantizeParameters();
-    }
-    if (ReceiveMessage("quantize_parameters", message)) {
-      QuantizeParameters();
-    }
-    if (ReceiveMessage("clear_unobserved_feature_weights", message)) {
-      ClearUnobservedFeatureWeights();
-    }
-    if (ReceiveMessage("check_health", message)) {
-      CheckHealth();
-    }
-  }
-
-  // Initialize the parameters with random numbers
-  template <typename RNG>
-  void Initialize(RNG& rng) {
-    std::fill(std::begin(weights_), std::end(weights_), +kZero);
-    const double kSigma = 0.1 / std::sqrt(RawFeatures::kMaxActiveDimensions);
-    auto distribution = std::normal_distribution<double>(0.0, kSigma);
-    for (IndexType i = 0; i < kHalfDimensions * RawFeatures::kDimensions; ++i) {
-      const auto weight = static_cast<LearnFloatType>(distribution(rng));
-      weights_[i] = weight;
-    }
-    for (IndexType i = 0; i < kHalfDimensions; ++i) {
-      biases_[i] = static_cast<LearnFloatType>(0.5);
-    }
-    QuantizeParameters();
-  }
-
-  // forward propagation
-  const LearnFloatType* Propagate(const std::vector<Example>& batch) {
-    if (output_.size() < kOutputDimensions * batch.size()) {
-      output_.resize(kOutputDimensions * batch.size());
-      gradients_.resize(kOutputDimensions * batch.size());
-    }
-    batch_ = &batch;
-    // affine transform
-#pragma omp parallel for
-    for (IndexType b = 0; b < batch.size(); ++b) {
-      const IndexType batch_offset = kOutputDimensions * b;
-      for (IndexType c = 0; c < 2; ++c) {
-        const IndexType output_offset = batch_offset + kHalfDimensions * c;
-#if defined(USE_BLAS)
-        cblas_scopy(kHalfDimensions, biases_, 1, &output_[output_offset], 1);
-        for (const auto& feature : batch[b].training_features[c]) {
-          const IndexType weights_offset = kHalfDimensions * feature.GetIndex();
-          cblas_saxpy(kHalfDimensions, (float)feature.GetCount(),
-                      &weights_[weights_offset], 1, &output_[output_offset], 1);
-        }
-#else
-        for (IndexType i = 0; i < kHalfDimensions; ++i) {
-          output_[output_offset + i] = biases_[i];
-        }
-        for (const auto& feature : batch[b].training_features[c]) {
-          const IndexType weights_offset = kHalfDimensions * feature.GetIndex();
-          for (IndexType i = 0; i < kHalfDimensions; ++i) {
-            output_[output_offset + i] +=
-                feature.GetCount() * weights_[weights_offset + i];
-          }
-        }
-#endif
-      }
-    }
-    // clipped ReLU
-    for (IndexType b = 0; b < batch.size(); ++b) {
-      const IndexType batch_offset = kOutputDimensions * b;
-      for (IndexType i = 0; i < kOutputDimensions; ++i) {
-        const IndexType index = batch_offset + i;
-        min_pre_activation_ = std::min(min_pre_activation_, output_[index]);
-        max_pre_activation_ = std::max(max_pre_activation_, output_[index]);
-        output_[index] = std::max(+kZero, std::min(+kOne, output_[index]));
-        const IndexType t = i % kHalfDimensions;
-        min_activations_[t] = std::min(min_activations_[t], output_[index]);
-        max_activations_[t] = std::max(max_activations_[t], output_[index]);
-      }
-    }
-    return output_.data();
-  }
-
-  // backpropagation
-  void Backpropagate(const LearnFloatType* gradients,
-                     LearnFloatType learning_rate) {
-    const LearnFloatType local_learning_rate =
-        learning_rate * learning_rate_scale_;
-    for (IndexType b = 0; b < batch_->size(); ++b) {
-      const IndexType batch_offset = kOutputDimensions * b;
-      for (IndexType i = 0; i < kOutputDimensions; ++i) {
-        const IndexType index = batch_offset + i;
-        gradients_[index] = gradients[index] *
-            ((output_[index] > kZero) * (output_[index] < kOne));
-      }
-    }
-    // Since the weight matrix updates only the columns corresponding to the features that appeared in the input,
-    // Correct the learning rate and adjust the scale without using momentum
-    const LearnFloatType effective_learning_rate =
-        static_cast<LearnFloatType>(local_learning_rate / (1.0 - momentum_));
-#if defined(USE_BLAS)
-    cblas_sscal(kHalfDimensions, momentum_, biases_diff_, 1);
-    for (IndexType b = 0; b < batch_->size(); ++b) {
-      const IndexType batch_offset = kOutputDimensions * b;
-      for (IndexType c = 0; c < 2; ++c) {
-        const IndexType output_offset = batch_offset + kHalfDimensions * c;
-        cblas_saxpy(kHalfDimensions, 1.0,
-                    &gradients_[output_offset], 1, biases_diff_, 1);
-      }
-    }
-    cblas_saxpy(kHalfDimensions, -local_learning_rate,
-                biases_diff_, 1, biases_, 1);
-#pragma omp parallel
-    {
-#if defined(_OPENMP)
-      const IndexType num_threads = omp_get_num_threads();
-      const IndexType thread_index = omp_get_thread_num();
-#endif
-      for (IndexType b = 0; b < batch_->size(); ++b) {
-        const IndexType batch_offset = kOutputDimensions * b;
-        for (IndexType c = 0; c < 2; ++c) {
-          const IndexType output_offset = batch_offset + kHalfDimensions * c;
-          for (const auto& feature : (*batch_)[b].training_features[c]) {
-#if defined(_OPENMP)
-            if (feature.GetIndex() % num_threads != thread_index) continue;
-#endif
-            const IndexType weights_offset =
-                kHalfDimensions * feature.GetIndex();
-            const auto scale = static_cast<LearnFloatType>(
-                effective_learning_rate / feature.GetCount());
-            cblas_saxpy(kHalfDimensions, -scale,
-                        &gradients_[output_offset], 1,
-                        &weights_[weights_offset], 1);
-          }
-        }
-      }
-    }
-#else
-    for (IndexType i = 0; i < kHalfDimensions; ++i) {
-      biases_diff_[i] *= momentum_;
-    }
-    for (IndexType b = 0; b < batch_->size(); ++b) {
-      const IndexType batch_offset = kOutputDimensions * b;
-      for (IndexType c = 0; c < 2; ++c) {
-        const IndexType output_offset = batch_offset + kHalfDimensions * c;
-        for (IndexType i = 0; i < kHalfDimensions; ++i) {
-          biases_diff_[i] += gradients_[output_offset + i];
-        }
-      }
-    }
-    for (IndexType i = 0; i < kHalfDimensions; ++i) {
-      biases_[i] -= local_learning_rate * biases_diff_[i];
-    }
-    for (IndexType b = 0; b < batch_->size(); ++b) {
-      const IndexType batch_offset = kOutputDimensions * b;
-      for (IndexType c = 0; c < 2; ++c) {
-        const IndexType output_offset = batch_offset + kHalfDimensions * c;
-        for (const auto& feature : (*batch_)[b].training_features[c]) {
-          const IndexType weights_offset = kHalfDimensions * feature.GetIndex();
-          const auto scale = static_cast<LearnFloatType>(
-              effective_learning_rate / feature.GetCount());
-          for (IndexType i = 0; i < kHalfDimensions; ++i) {
-            weights_[weights_offset + i] -=
-                scale * gradients_[output_offset + i];
-          }
-        }
-      }
-    }
-#endif
-    for (IndexType b = 0; b < batch_->size(); ++b) {
-      for (IndexType c = 0; c < 2; ++c) {
-        for (const auto& feature : (*batch_)[b].training_features[c]) {
-          observed_features.set(feature.GetIndex());
-        }
-      }
-    }
-  }
-
- private:
-  // constructor
-  Trainer(LayerType* target_layer) :
-      batch_(nullptr),
-      target_layer_(target_layer),
-      biases_(),
-      weights_(),
-      biases_diff_(),
-      momentum_(0.0),
-      learning_rate_scale_(1.0) {
-    min_pre_activation_ = std::numeric_limits<LearnFloatType>::max();
-    max_pre_activation_ = std::numeric_limits<LearnFloatType>::lowest();
-    std::fill(std::begin(min_activations_), std::end(min_activations_),
-              std::numeric_limits<LearnFloatType>::max());
-    std::fill(std::begin(max_activations_), std::end(max_activations_),
-              std::numeric_limits<LearnFloatType>::lowest());
-    DequantizeParameters();
-  }
-
-  // Weight saturation and parameterization
-  void QuantizeParameters() {
-    for (IndexType i = 0; i < kHalfDimensions; ++i) {
-      target_layer_->biases_[i] =
-          Round<typename LayerType::BiasType>(biases_[i] * kBiasScale);
-    }
-    std::vector<TrainingFeature> training_features;
-#pragma omp parallel for private(training_features)
-    for (IndexType j = 0; j < RawFeatures::kDimensions; ++j) {
-      training_features.clear();
-      Features::Factorizer<RawFeatures>::AppendTrainingFeatures(
-          j, &training_features);
-      for (IndexType i = 0; i < kHalfDimensions; ++i) {
-        double sum = 0.0;
-        for (const auto& feature : training_features) {
-          sum += weights_[kHalfDimensions * feature.GetIndex() + i];
-        }
-        target_layer_->weights_[kHalfDimensions * j + i] =
-            Round<typename LayerType::WeightType>(sum * kWeightScale);
-      }
-    }
-  }
-
-  // read parameterized integer
-  void DequantizeParameters() {
-    for (IndexType i = 0; i < kHalfDimensions; ++i) {
-      biases_[i] = static_cast<LearnFloatType>(
-          target_layer_->biases_[i] / kBiasScale);
-    }
-    std::fill(std::begin(weights_), std::end(weights_), +kZero);
-    for (IndexType i = 0; i < kHalfDimensions * RawFeatures::kDimensions; ++i) {
-      weights_[i] = static_cast<LearnFloatType>(
-          target_layer_->weights_[i] / kWeightScale);
-    }
-    std::fill(std::begin(biases_diff_), std::end(biases_diff_), +kZero);
-  }
-
-  // Set the weight corresponding to the feature that does not appear in the learning data to 0
-  void ClearUnobservedFeatureWeights() {
-    for (IndexType i = 0; i < kInputDimensions; ++i) {
-      if (!observed_features.test(i)) {
-        std::fill(std::begin(weights_) + kHalfDimensions * i,
-                  std::begin(weights_) + kHalfDimensions * (i + 1), +kZero);
-      }
-    }
-    QuantizeParameters();
-  }
-
-  // Check if there are any problems with learning
-  void CheckHealth() {
-    std::cout << "INFO: observed " << observed_features.count()
-              << " (out of " << kInputDimensions << ") features" << std::endl;
-
-    constexpr LearnFloatType kPreActivationLimit =
-        std::numeric_limits<typename LayerType::WeightType>::max() /
-        kWeightScale;
-    std::cout << "INFO: (min, max) of pre-activations = "
-              << min_pre_activation_ << ", "
-              << max_pre_activation_ << " (limit = "
-              << kPreActivationLimit << ")" << std::endl;
-
-    const auto largest_min_activation = *std::max_element(
-        std::begin(min_activations_), std::end(min_activations_));
-    const auto smallest_max_activation = *std::min_element(
-        std::begin(max_activations_), std::end(max_activations_));
-    std::cout << "INFO: largest min activation = " << largest_min_activation
-              << ", smallest max activation = " << smallest_max_activation
-              << std::endl;
-
-    std::fill(std::begin(min_activations_), std::end(min_activations_),
-              std::numeric_limits<LearnFloatType>::max());
-    std::fill(std::begin(max_activations_), std::end(max_activations_),
-              std::numeric_limits<LearnFloatType>::lowest());
-  }
-
-  // number of input/output dimensions
-  static constexpr IndexType kInputDimensions =
-      Features::Factorizer<RawFeatures>::GetDimensions();
-  static constexpr IndexType kOutputDimensions = LayerType::kOutputDimensions;
-  static constexpr IndexType kHalfDimensions = LayerType::kHalfDimensions;
-
-  // Coefficient used for parameterization
-  static constexpr LearnFloatType kActivationScale =
-      std::numeric_limits<std::int8_t>::max();
-  static constexpr LearnFloatType kBiasScale = kActivationScale;
-  static constexpr LearnFloatType kWeightScale = kActivationScale;
-
-  // LearnFloatType constant
-  static constexpr LearnFloatType kZero = static_cast<LearnFloatType>(0.0);
-  static constexpr LearnFloatType kOne = static_cast<LearnFloatType>(1.0);
-
-  // mini batch
-  const std::vector<Example>* batch_;
-
-  // layer to learn
-  LayerType* const target_layer_;
-
-  // parameter
-  alignas(kCacheLineSize) LearnFloatType biases_[kHalfDimensions];
-  alignas(kCacheLineSize)
-      LearnFloatType weights_[kHalfDimensions * kInputDimensions];
-
-  // Buffer used for updating parameters
-  LearnFloatType biases_diff_[kHalfDimensions];
-  std::vector<LearnFloatType> gradients_;
-
-  // Forward propagation buffer
-  std::vector<LearnFloatType> output_;
-
-  // Features that appeared in the training data
-  std::bitset<kInputDimensions> observed_features;
-
-  // hyper parameter
-  LearnFloatType momentum_;
-  LearnFloatType learning_rate_scale_;
-
-  // Health check statistics
-  LearnFloatType min_pre_activation_;
-  LearnFloatType max_pre_activation_;
-  LearnFloatType min_activations_[kHalfDimensions];
-  LearnFloatType max_activations_[kHalfDimensions];
-};
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_LEARN) && defined(EVAL_NNUE)
-
-#endif
diff --git a/src/eval/nnue/trainer/trainer_input_slice.h b/src/eval/nnue/trainer/trainer_input_slice.h
deleted file mode 100644
index f5b263d3..00000000
--- a/src/eval/nnue/trainer/trainer_input_slice.h
+++ /dev/null
@@ -1,251 +0,0 @@
-﻿// Specialization of NNUE evaluation function learning class template for InputSlice
-
-#ifndef _NNUE_TRAINER_INPUT_SLICE_H_
-#define _NNUE_TRAINER_INPUT_SLICE_H_
-
-#if defined(EVAL_LEARN) && defined(EVAL_NNUE)
-
-#include "../../../learn/learn.h"
-#include "../layers/input_slice.h"
-#include "trainer.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-// Learning: Input layer
-class SharedInputTrainer {
- public:
-  // factory function
-  static std::shared_ptr<SharedInputTrainer> Create(
-      FeatureTransformer* feature_transformer) {
-    static std::shared_ptr<SharedInputTrainer> instance;
-    if (!instance) {
-      instance.reset(new SharedInputTrainer(feature_transformer));
-    }
-    ++instance->num_referrers_;
-    return instance;
-  }
-
-  // Set options such as hyperparameters
-  void SendMessage(Message* message) {
-    if (num_calls_ == 0) {
-      current_operation_ = Operation::kSendMessage;
-      feature_transformer_trainer_->SendMessage(message);
-    }
-    assert(current_operation_ == Operation::kSendMessage);
-    if (++num_calls_ == num_referrers_) {
-      num_calls_ = 0;
-      current_operation_ = Operation::kNone;
-    }
-  }
-
-  // Initialize the parameters with random numbers
-  template <typename RNG>
-  void Initialize(RNG& rng) {
-    if (num_calls_ == 0) {
-      current_operation_ = Operation::kInitialize;
-      feature_transformer_trainer_->Initialize(rng);
-    }
-    assert(current_operation_ == Operation::kInitialize);
-    if (++num_calls_ == num_referrers_) {
-      num_calls_ = 0;
-      current_operation_ = Operation::kNone;
-    }
-  }
-
-  // forward propagation
-  const LearnFloatType* Propagate(const std::vector<Example>& batch) {
-    if (gradients_.size() < kInputDimensions * batch.size()) {
-      gradients_.resize(kInputDimensions * batch.size());
-    }
-    batch_size_ = static_cast<IndexType>(batch.size());
-    if (num_calls_ == 0) {
-      current_operation_ = Operation::kPropagate;
-      output_ = feature_transformer_trainer_->Propagate(batch);
-    }
-    assert(current_operation_ == Operation::kPropagate);
-    if (++num_calls_ == num_referrers_) {
-      num_calls_ = 0;
-      current_operation_ = Operation::kNone;
-    }
-    return output_;
-  }
-
-  // backpropagation
-  void Backpropagate(const LearnFloatType* gradients,
-                     LearnFloatType learning_rate) {
-    if (num_referrers_ == 1) {
-      feature_transformer_trainer_->Backpropagate(gradients, learning_rate);
-      return;
-    }
-    if (num_calls_ == 0) {
-      current_operation_ = Operation::kBackPropagate;
-      for (IndexType b = 0; b < batch_size_; ++b) {
-        const IndexType batch_offset = kInputDimensions * b;
-        for (IndexType i = 0; i < kInputDimensions; ++i) {
-          gradients_[batch_offset + i] = static_cast<LearnFloatType>(0.0);
-        }
-      }
-    }
-    assert(current_operation_ == Operation::kBackPropagate);
-    for (IndexType b = 0; b < batch_size_; ++b) {
-      const IndexType batch_offset = kInputDimensions * b;
-      for (IndexType i = 0; i < kInputDimensions; ++i) {
-        gradients_[batch_offset + i] += gradients[batch_offset + i];
-      }
-    }
-    if (++num_calls_ == num_referrers_) {
-      feature_transformer_trainer_->Backpropagate(
-          gradients_.data(), learning_rate);
-      num_calls_ = 0;
-      current_operation_ = Operation::kNone;
-    }
-  }
-
- private:
-  // constructor
-  SharedInputTrainer(FeatureTransformer* feature_transformer) :
-      batch_size_(0),
-      num_referrers_(0),
-      num_calls_(0),
-      current_operation_(Operation::kNone),
-      feature_transformer_trainer_(Trainer<FeatureTransformer>::Create(
-          feature_transformer)),
-      output_(nullptr) {
-  }
-
-  // number of input/output dimensions
-  static constexpr IndexType kInputDimensions =
-      FeatureTransformer::kOutputDimensions;
-
-  // type of processing
-  enum class Operation {
-    kNone,
-    kSendMessage,
-    kInitialize,
-    kPropagate,
-    kBackPropagate,
-  };
-
-  // number of samples in mini-batch
-  IndexType batch_size_;
-
-  // number of layers sharing this layer as input
-  std::uint32_t num_referrers_;
-
-  // Number of times the current process has been called
-  std::uint32_t num_calls_;
-
-  // current processing type
-  Operation current_operation_;
-
-  // Trainer of input feature converter
-  const std::shared_ptr<Trainer<FeatureTransformer>>
-      feature_transformer_trainer_;
-
-  // pointer to output shared for forward propagation
-  const LearnFloatType* output_;
-
-  // buffer for back propagation
-  std::vector<LearnFloatType> gradients_;
-};
-
-// Learning: Input layer
-template <IndexType OutputDimensions, IndexType Offset>
-class Trainer<Layers::InputSlice<OutputDimensions, Offset>> {
- private:
-  // Type of layer to learn
-  using LayerType = Layers::InputSlice<OutputDimensions, Offset>;
-
- public:
-  // factory function
-  static std::shared_ptr<Trainer> Create(
-      LayerType* /*target_layer*/, FeatureTransformer* feature_transformer) {
-    return std::shared_ptr<Trainer>(new Trainer(feature_transformer));
-  }
-
-  // Set options such as hyperparameters
-  void SendMessage(Message* message) {
-    shared_input_trainer_->SendMessage(message);
-  }
-
-  // Initialize the parameters with random numbers
-  template <typename RNG>
-  void Initialize(RNG& rng) {
-    shared_input_trainer_->Initialize(rng);
-  }
-
-  // forward propagation
-  const LearnFloatType* Propagate(const std::vector<Example>& batch) {
-    if (output_.size() < kOutputDimensions * batch.size()) {
-      output_.resize(kOutputDimensions * batch.size());
-      gradients_.resize(kInputDimensions * batch.size());
-    }
-    batch_size_ = static_cast<IndexType>(batch.size());
-    const auto input = shared_input_trainer_->Propagate(batch);
-    for (IndexType b = 0; b < batch_size_; ++b) {
-      const IndexType input_offset = kInputDimensions * b;
-      const IndexType output_offset = kOutputDimensions * b;
-#if defined(USE_BLAS)
-      cblas_scopy(kOutputDimensions, &input[input_offset + Offset], 1,
-                  &output_[output_offset], 1);
-#else
-      for (IndexType i = 0; i < kOutputDimensions; ++i) {
-        output_[output_offset + i] = input[input_offset + Offset + i];
-      }
-#endif
-    }
-    return output_.data();
-  }
-
-  // backpropagation
-  void Backpropagate(const LearnFloatType* gradients,
-                     LearnFloatType learning_rate) {
-    for (IndexType b = 0; b < batch_size_; ++b) {
-      const IndexType input_offset = kInputDimensions * b;
-      const IndexType output_offset = kOutputDimensions * b;
-      for (IndexType i = 0; i < kInputDimensions; ++i) {
-        if (i < Offset || i >= Offset + kOutputDimensions) {
-          gradients_[input_offset + i] = static_cast<LearnFloatType>(0.0);
-        } else {
-          gradients_[input_offset + i] = gradients[output_offset + i - Offset];
-        }
-      }
-    }
-    shared_input_trainer_->Backpropagate(gradients_.data(), learning_rate);
-  }
-
- private:
-  // constructor
-  Trainer(FeatureTransformer* feature_transformer):
-      batch_size_(0),
-      shared_input_trainer_(SharedInputTrainer::Create(feature_transformer)) {
-  }
-
-  // number of input/output dimensions
-  static constexpr IndexType kInputDimensions =
-      FeatureTransformer::kOutputDimensions;
-  static constexpr IndexType kOutputDimensions = OutputDimensions;
-  static_assert(Offset + kOutputDimensions <= kInputDimensions, "");
-
-  // number of samples in mini-batch
-  IndexType batch_size_;
-
-  // Trainer of shared input layer
-  const std::shared_ptr<SharedInputTrainer> shared_input_trainer_;
-
-  // Forward propagation buffer
-  std::vector<LearnFloatType> output_;
-
-  // buffer for back propagation
-  std::vector<LearnFloatType> gradients_;
-};
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_LEARN) && defined(EVAL_NNUE)
-
-#endif
diff --git a/src/eval/nnue/trainer/trainer_sum.h b/src/eval/nnue/trainer/trainer_sum.h
deleted file mode 100644
index 2efdff67..00000000
--- a/src/eval/nnue/trainer/trainer_sum.h
+++ /dev/null
@@ -1,190 +0,0 @@
-﻿// Specialization of NNUE evaluation function learning class template for Sum
-
-#ifndef _NNUE_TRAINER_SUM_H_
-#define _NNUE_TRAINER_SUM_H_
-
-#if defined(EVAL_LEARN) && defined(EVAL_NNUE)
-
-#include "../../../learn/learn.h"
-#include "../layers/sum.h"
-#include "trainer.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-// Learning: A layer that sums the outputs of multiple layers
-template <typename FirstPreviousLayer, typename... RemainingPreviousLayers>
-class Trainer<Layers::Sum<FirstPreviousLayer, RemainingPreviousLayers...>> :
-      Trainer<Layers::Sum<RemainingPreviousLayers...>> {
- private:
-  // Type of layer to learn
-  using LayerType = Layers::Sum<FirstPreviousLayer, RemainingPreviousLayers...>;
-  using Tail = Trainer<Layers::Sum<RemainingPreviousLayers...>>;
-
- public:
-  // factory function
-  static std::shared_ptr<Trainer> Create(
-      LayerType* target_layer, FeatureTransformer* feature_transformer) {
-    return std::shared_ptr<Trainer>(
-        new Trainer(target_layer, feature_transformer));
-  }
-
-  // Set options such as hyperparameters
-  void SendMessage(Message* message) {
-    // The results of other member functions do not depend on the processing order, so
-    // Tail is processed first for the purpose of simplifying the implementation, but
-    // SendMessage processes Head first to make it easier to understand subscript correspondence
-    previous_layer_trainer_->SendMessage(message);
-    Tail::SendMessage(message);
-  }
-
-  // Initialize the parameters with random numbers
-  template <typename RNG>
-  void Initialize(RNG& rng) {
-    Tail::Initialize(rng);
-    previous_layer_trainer_->Initialize(rng);
-  }
-
-  // forward propagation
-  /*const*/ LearnFloatType* Propagate(const std::vector<Example>& batch) {
-    batch_size_ = static_cast<IndexType>(batch.size());
-    auto output = Tail::Propagate(batch);
-    const auto head_output = previous_layer_trainer_->Propagate(batch);
-#if defined(USE_BLAS)
-    cblas_saxpy(kOutputDimensions * batch_size_, 1.0,
-                head_output, 1, output, 1);
-#else
-    for (IndexType b = 0; b < batch_size_; ++b) {
-      const IndexType batch_offset = kOutputDimensions * b;
-      for (IndexType i = 0; i < kOutputDimensions; ++i) {
-        output[batch_offset + i] += head_output[batch_offset + i];
-      }
-    }
-#endif
-    return output;
-  }
-
-  // backpropagation
-  void Backpropagate(const LearnFloatType* gradients,
-                     LearnFloatType learning_rate) {
-    Tail::Backpropagate(gradients, learning_rate);
-    previous_layer_trainer_->Backpropagate(gradients, learning_rate);
-  }
-
- private:
-  // constructor
-  Trainer(LayerType* target_layer, FeatureTransformer* feature_transformer):
-      Tail(target_layer, feature_transformer),
-      batch_size_(0),
-      previous_layer_trainer_(Trainer<FirstPreviousLayer>::Create(
-          &target_layer->previous_layer_, feature_transformer)),
-      target_layer_(target_layer) {
-  }
-
-  // number of input/output dimensions
-  static constexpr IndexType kOutputDimensions = LayerType::kOutputDimensions;
-
-  // make subclass friend
-  template <typename SumLayer>
-  friend class Trainer;
-
-  // number of samples in mini-batch
-  IndexType batch_size_;
-
-  // Trainer of the previous layer
-  const std::shared_ptr<Trainer<FirstPreviousLayer>> previous_layer_trainer_;
-
-  // layer to learn
-  LayerType* const target_layer_;
-};
-
-
-// Learning: Layer that takes the sum of the outputs of multiple layers (when there is one template argument)
-template <typename PreviousLayer>
-class Trainer<Layers::Sum<PreviousLayer>> {
- private:
-  // Type of layer to learn
-  using LayerType = Layers::Sum<PreviousLayer>;
-
- public:
-  // factory function
-  static std::shared_ptr<Trainer> Create(
-      LayerType* target_layer, FeatureTransformer* feature_transformer) {
-    return std::shared_ptr<Trainer>(
-        new Trainer(target_layer, feature_transformer));
-  }
-
-  // Set options such as hyperparameters
-  void SendMessage(Message* message) {
-    previous_layer_trainer_->SendMessage(message);
-  }
-
-  // Initialize the parameters with random numbers
-  template <typename RNG>
-  void Initialize(RNG& rng) {
-    previous_layer_trainer_->Initialize(rng);
-  }
-
-  // forward propagation
-  /*const*/ LearnFloatType* Propagate(const std::vector<Example>& batch) {
-    if (output_.size() < kOutputDimensions * batch.size()) {
-      output_.resize(kOutputDimensions * batch.size());
-    }
-    batch_size_ = static_cast<IndexType>(batch.size());
-    const auto output = previous_layer_trainer_->Propagate(batch);
-#if defined(USE_BLAS)
-    cblas_scopy(kOutputDimensions * batch_size_, output, 1, &output_[0], 1);
-#else
-    for (IndexType b = 0; b < batch_size_; ++b) {
-      const IndexType batch_offset = kOutputDimensions * b;
-      for (IndexType i = 0; i < kOutputDimensions; ++i) {
-        output_[batch_offset + i] = output[batch_offset + i];
-      }
-    }
-#endif
-    return output_.data();
-  }
-
-  // backpropagation
-  void Backpropagate(const LearnFloatType* gradients,
-                     LearnFloatType learning_rate) {
-    previous_layer_trainer_->Backpropagate(gradients, learning_rate);
-  }
-
- private:
-  // constructor
-  Trainer(LayerType* target_layer, FeatureTransformer* feature_transformer) :
-      batch_size_(0),
-      previous_layer_trainer_(Trainer<PreviousLayer>::Create(
-          &target_layer->previous_layer_, feature_transformer)),
-      target_layer_(target_layer) {
-  }
-
-  // number of input/output dimensions
-  static constexpr IndexType kOutputDimensions = LayerType::kOutputDimensions;
-
-  // make subclass friend
-  template <typename SumLayer>
-  friend class Trainer;
-
-  // number of samples in mini-batch
-  IndexType batch_size_;
-
-  // Trainer of the previous layer
-  const std::shared_ptr<Trainer<PreviousLayer>> previous_layer_trainer_;
-
-  // layer to learn
-  LayerType* const target_layer_;
-
-  // Forward propagation buffer
-  std::vector<LearnFloatType> output_;
-};
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_LEARN) && defined(EVAL_NNUE)
-
-#endif
diff --git a/src/evaluate.cpp b/src/evaluate.cpp
index 12ecff00..1c59f821 100644
--- a/src/evaluate.cpp
+++ b/src/evaluate.cpp
@@ -22,7 +22,6 @@
 #include <cassert>
 #include <cstring>   // For std::memset
 #include <iomanip>
-#include <set>
 #include <sstream>
 
 #include "bitboard.h"
@@ -30,7 +29,6 @@
 #include "material.h"
 #include "pawns.h"
 #include "thread.h"
-#include "eval/nnue/evaluate_nnue.h"
 
 namespace Trace {
 
@@ -76,7 +74,8 @@ using namespace Trace;
 namespace {
 
   // Threshold for lazy and space evaluation
-  constexpr Value LazyThreshold  = Value(1400);
+  constexpr Value LazyThreshold1  = Value(1400);
+  constexpr Value LazyThreshold2  = Value(1300);
   constexpr Value SpaceThreshold = Value(12222);
 
   // KingAttackWeights[PieceType] contains king attack weights by piece type
@@ -788,7 +787,7 @@ namespace {
                 && pos.non_pawn_material(BLACK) == RookValueMg
                 && pos.count<PAWN>(strongSide) - pos.count<PAWN>(~strongSide) <= 1
                 && bool(KingSide & pos.pieces(strongSide, PAWN)) != bool(QueenSide & pos.pieces(strongSide, PAWN))
-                && (attackedBy[~strongSide][KING] & pos.pieces(~strongSide, PAWN)))
+                && (attacks_bb<KING>(pos.square<KING>(~strongSide)) & pos.pieces(~strongSide, PAWN)))
             sf = 36;
         else if (pos.count<QUEEN>() == 1)
             sf = 37 + 3 * (pos.count<QUEEN>(WHITE) == 1 ? pos.count<BISHOP>(BLACK) + pos.count<KNIGHT>(BLACK)
@@ -839,9 +838,12 @@ namespace {
     score += pe->pawn_score(WHITE) - pe->pawn_score(BLACK);
 
     // Early exit if score is high
-    Value v = (mg_value(score) + eg_value(score)) / 2;
-    if (abs(v) > LazyThreshold + pos.non_pawn_material() / 64)
-       return pos.side_to_move() == WHITE ? v : -v;
+    auto lazy_skip = [&](Value lazyThreshold) {
+        return abs(mg_value(score) + eg_value(score)) / 2 > lazyThreshold + pos.non_pawn_material() / 64;
+    };
+
+    if (lazy_skip(LazyThreshold1))
+        goto make_v;
 
     // Main evaluation begins here
     initialize<WHITE>();
@@ -858,12 +860,17 @@ namespace {
 
     // More complex interactions that require fully populated attack bitboards
     score +=  king<   WHITE>() - king<   BLACK>()
-            + threats<WHITE>() - threats<BLACK>()
-            + passed< WHITE>() - passed< BLACK>()
+            + passed< WHITE>() - passed< BLACK>();
+
+    if (lazy_skip(LazyThreshold2))
+        goto make_v;
+
+    score +=  threats<WHITE>() - threats<BLACK>()
             + space<  WHITE>() - space<  BLACK>();
 
+make_v:
     // Derive single value from mg and eg parts of score
-    v = winnable(score);
+    Value v = winnable(score);
 
     // In case of tracing add all remaining individual evaluation terms
     if (T)
@@ -892,12 +899,12 @@ namespace {
 /// evaluate() is the evaluator for the outer world. It returns a static
 /// evaluation of the position from the point of view of the side to move.
 
-#if !defined(EVAL_NNUE)
 Value Eval::evaluate(const Position& pos) {
-  return Evaluation<NO_TRACE>(pos).value();
+  if (pos.use_nnue())
+    return NNUE::evaluate(pos);
+  else
+    return Evaluation<NO_TRACE>(pos).value();
 }
-#endif  // defined(EVAL_NNUE)
-
 
 /// trace() is like evaluate(), but instead of returning a value, it returns
 /// a string (suitable for outputting to stdout) that contains the detailed
@@ -941,138 +948,3 @@ std::string Eval::trace(const Position& pos) {
 
   return ss.str();
 }
-
-#if defined(EVAL_NNUE) || defined(EVAL_LEARN)
-namespace Eval {
-ExtBonaPiece kpp_board_index[PIECE_NB] = {
-    { BONA_PIECE_ZERO, BONA_PIECE_ZERO },
-    { f_pawn, e_pawn },
-    { f_knight, e_knight },
-    { f_bishop, e_bishop },
-    { f_rook, e_rook },
-    { f_queen, e_queen },
-    { f_king, e_king },
-    { BONA_PIECE_ZERO, BONA_PIECE_ZERO },
-
-    // When viewed from behind. f and e are exchanged.
-    { BONA_PIECE_ZERO, BONA_PIECE_ZERO },
-    { e_pawn, f_pawn },
-    { e_knight, f_knight },
-    { e_bishop, f_bishop },
-    { e_rook, f_rook },
-    { e_queen, f_queen },
-    { e_king, f_king },
-    { BONA_PIECE_ZERO, BONA_PIECE_ZERO }, // no money
-};
-
-// Check whether the pieceListFw[] held internally is a correct BonaPiece.
-// Note: For debugging. slow.
-bool EvalList::is_valid(const Position& pos)
-{
-  std::set<PieceNumber> piece_numbers;
-  for (Square sq = SQ_A1; sq != SQUARE_NB; ++sq) {
-    auto piece_number = piece_no_of_board(sq);
-    if (piece_number == PIECE_NUMBER_NB) {
-      continue;
-    }
-    assert(!piece_numbers.count(piece_number));
-    piece_numbers.insert(piece_number);
-  }
-
-  for (int i = 0; i < length(); ++i)
-  {
-    BonaPiece fw = pieceListFw[i];
-    // Go to the Position class to see if this fw really exists.
-
-    if (fw == Eval::BONA_PIECE_ZERO) {
-      continue;
-    }
-
-    // Out of range
-    if (!(0 <= fw && fw < fe_end))
-      return false;
-
-    // Since it is a piece on the board, I will check if this piece really exists.
-    for (Piece pc = NO_PIECE; pc < PIECE_NB; ++pc)
-    {
-      auto pt = type_of(pc);
-      if (pt == NO_PIECE_TYPE || pt == 7) // non-existing piece
-        continue;
-
-      // BonaPiece start number of piece pc
-      auto s = BonaPiece(kpp_board_index[pc].fw);
-      if (s <= fw && fw < s + SQUARE_NB)
-      {
-        // Since it was found, check if this piece is at sq.
-        Square sq = (Square)(fw - s);
-        Piece pc2 = pos.piece_on(sq);
-
-        if (pc2 != pc)
-          return false;
-
-        goto Found;
-      }
-    }
-    // It was a piece that did not exist for some reason..
-    return false;
-  Found:;
-  }
-
-  // Validate piece_no_list_board
-  for (auto sq = SQUARE_ZERO; sq < SQUARE_NB; ++sq) {
-    Piece expected_piece = pos.piece_on(sq);
-    PieceNumber piece_number = piece_no_list_board[sq];
-    if (piece_number == PIECE_NUMBER_NB) {
-      assert(expected_piece == NO_PIECE);
-      if (expected_piece != NO_PIECE) {
-        return false;
-      }
-      continue;
-    }
-
-    BonaPiece bona_piece_white = pieceListFw[piece_number];
-    Piece actual_piece;
-    for (actual_piece = NO_PIECE; actual_piece < PIECE_NB; ++actual_piece) {
-      if (kpp_board_index[actual_piece].fw == BONA_PIECE_ZERO) {
-        continue;
-      }
-
-      if (kpp_board_index[actual_piece].fw <= bona_piece_white
-        && bona_piece_white < kpp_board_index[actual_piece].fw + SQUARE_NB) {
-        break;
-      }
-    }
-
-    assert(actual_piece != PIECE_NB);
-    if (actual_piece == PIECE_NB) {
-      return false;
-    }
-
-    assert(actual_piece == expected_piece);
-    if (actual_piece != expected_piece) {
-      return false;
-    }
-
-    Square actual_square = static_cast<Square>(
-      bona_piece_white - kpp_board_index[actual_piece].fw);
-    assert(sq == actual_square);
-    if (sq != actual_square) {
-      return false;
-    }
-  }
-
-  return true;
-}
-}
-#endif  // defined(EVAL_NNUE) || defined(EVAL_LEARN)
-
-#if !defined(EVAL_NNUE)
-namespace Eval {
-void evaluate_with_no_return(const Position& pos) {}
-void update_weights(uint64_t epoch, const std::array<bool, 4> & freeze) {}
-void init_grad(double eta1, uint64_t eta_epoch, double eta2, uint64_t eta2_epoch, double eta3) {}
-void add_grad(Position& pos, Color rootColor, double delt_grad, const std::array<bool, 4> & freeze) {}
-void save_eval(std::string suffix) {}
-double get_eta() { return 0.0; }
-}
-#endif  // defined(EVAL_NNUE)
diff --git a/src/evaluate.h b/src/evaluate.h
index 0301f455..942cccbd 100644
--- a/src/evaluate.h
+++ b/src/evaluate.h
@@ -30,193 +30,17 @@ class Position;
 namespace Eval {
 
 std::string trace(const Position& pos);
-
 Value evaluate(const Position& pos);
 
-void evaluate_with_no_return(const Position& pos);
+namespace NNUE {
 
+Value evaluate(const Position& pos);
 Value compute_eval(const Position& pos);
+void  update_eval(const Position& pos);
+void  load_eval(const std::string& evalFile);
 
-#if defined(EVAL_NNUE) || defined(EVAL_LEARN)
-// Read the evaluation function file.
-// This is only called once in response to the "is_ready" command. It is not supposed to be called twice.
-// (However, if isready is sent again after EvalDir (evaluation function folder) has been changed, read it again.)
-void load_eval();
+} // namespace NNUE
 
-static uint64_t calc_check_sum() {return 0;}
-
-static void print_softname(uint64_t check_sum) {}
-
-// --- enum corresponding to P of constant KPP (ball and arbitrary 2 pieces) used in evaluation function
-
-// (BonaPiece wants to define freely in experiment of evaluation function, so I don't define it here.)
-
-
-// A type that represents P(Piece) when calling KKP/KPP in Bonanza.
-// When you ask for �� KPP, you need a unique number for each box �~ piece type, like the step at 39 points.
-enum BonaPiece : int32_t
-{
-	// Meaning of f = friend (��first move). Meaning of e = enemy (��rear)
-
-	// Value when uninitialized
-	BONA_PIECE_NOT_INIT = -1,
-
-	// Invalid piece. When you drop a piece, move unnecessary pieces here.
-	BONA_PIECE_ZERO = 0,
-
-	fe_hand_end = BONA_PIECE_ZERO + 1,
-
-	// Don't pack the numbers of unrealistic walks and incense on the board like Bonanza.
-	// Reason 1) When learning, there are times when the incense is on the first stage in relative PP, and it is difficult to display it correctly in the inverse transformation.
-	// Reason 2) It is difficult to convert from Square with vertical Bitboard.
-
-	// --- Pieces on the board
-	f_pawn = fe_hand_end,
-	e_pawn = f_pawn + SQUARE_NB,
-	f_knight = e_pawn + SQUARE_NB,
-	e_knight = f_knight + SQUARE_NB,
-	f_bishop = e_knight + SQUARE_NB,
-	e_bishop = f_bishop + SQUARE_NB,
-	f_rook = e_bishop + SQUARE_NB,
-	e_rook = f_rook + SQUARE_NB,
-	f_queen = e_rook + SQUARE_NB,
-	e_queen = f_queen + SQUARE_NB,
-	fe_end = e_queen + SQUARE_NB,
-	f_king = fe_end,
-	e_king = f_king + SQUARE_NB,
-	fe_end2 = e_king + SQUARE_NB, // Last number including balls.
-};
-
-#define ENABLE_INCR_OPERATORS_ON(T)                                \
-inline T& operator++(T& d) { return d = T(int(d) + 1); }           \
-inline T& operator--(T& d) { return d = T(int(d) - 1); }
-
-ENABLE_INCR_OPERATORS_ON(BonaPiece)
-
-#undef ENABLE_INCR_OPERATORS_ON
-
-// The number when you look at BonaPiece from the back (the number of steps from the previous 39 to the number 71 from the back)
-// Let's call the paired one the ExtBonaPiece type.
-union ExtBonaPiece
-{
-	struct {
-		BonaPiece fw; // from white
-		BonaPiece fb; // from black
-	};
-	BonaPiece from[2];
-
-	ExtBonaPiece() {}
-	ExtBonaPiece(BonaPiece fw_, BonaPiece fb_) : fw(fw_), fb(fb_) {}
-};
-
-// Information about where the piece has moved from where to by this move.
-// Assume the piece is an ExtBonaPiece expression.
-struct ChangedBonaPiece
-{
-	ExtBonaPiece old_piece;
-	ExtBonaPiece new_piece;
-};
-
-// An array for finding the BonaPiece corresponding to the piece pc on the board of the KPP table.
-// example)
-// BonaPiece fb = kpp_board_index[pc].fb + sq; // BonaPiece corresponding to pc in sq seen from the front
-// BonaPiece fw = kpp_board_index[pc].fw + sq; // BonaPiece corresponding to pc in sq seen from behind
-extern ExtBonaPiece kpp_board_index[PIECE_NB];
-
-// List of pieces used in the evaluation function. A structure holding which piece (PieceNumber) is where (BonaPiece)
-struct EvalList
-{
-	// List of frame numbers used in evaluation function (FV38 type)
-	BonaPiece* piece_list_fw() const { return const_cast<BonaPiece*>(pieceListFw); }
-	BonaPiece* piece_list_fb() const { return const_cast<BonaPiece*>(pieceListFb); }
-
-	// Convert the specified piece_no piece to ExtBonaPiece type and return it.
-	ExtBonaPiece bona_piece(PieceNumber piece_no) const
-	{
-		ExtBonaPiece bp;
-		bp.fw = pieceListFw[piece_no];
-		bp.fb = pieceListFb[piece_no];
-		return bp;
-	}
-
-	// Place the piece_no pc piece in the sq box on the board
-	void put_piece(PieceNumber piece_no, Square sq, Piece pc) {
-		set_piece_on_board(piece_no, BonaPiece(kpp_board_index[pc].fw + sq), BonaPiece(kpp_board_index[pc].fb + Inv(sq)), sq);
-	}
-
-	// Returns the PieceNumber corresponding to a box on the board.
-	PieceNumber piece_no_of_board(Square sq) const { return piece_no_list_board[sq]; }
-
-	// Initialize the pieceList.
-	// Set the value of unused pieces to BONA_PIECE_ZERO in case you want to deal with dropped pieces.
-	// A normal evaluation function can be used as an evaluation function for missing frames.
-	// piece_no_list is initialized with PIECE_NUMBER_NB to facilitate debugging.
-	void clear()
-	{
-
-		for (auto& p: pieceListFw)
-			p = BONA_PIECE_ZERO;
-
-		for (auto& p: pieceListFb)
-			p = BONA_PIECE_ZERO;
-
-		for (auto& v :piece_no_list_board)
-			v = PIECE_NUMBER_NB;
-	}
-
-	// Check whether the pieceListFw[] held internally is a correct BonaPiece.
-	// Note: For debugging. slow.
-	bool is_valid(const Position& pos);
-
-	// Set that the BonaPiece of the piece_no piece on the board sq is fb,fw.
-	inline void set_piece_on_board(PieceNumber piece_no, BonaPiece fw, BonaPiece fb, Square sq)
-	{
-		assert(is_ok(piece_no));
-		pieceListFw[piece_no] = fw;
-		pieceListFb[piece_no] = fb;
-		piece_no_list_board[sq] = piece_no;
-	}
-
-	// Piece list. Piece Number Shows how many pieces are in place (Bona Piece). Used in FV38 etc.
-
-	// Length of piece list
-  // 38 fixed
-public:
-	int length() const { return PIECE_NUMBER_KING; }
-
-	// Must be a multiple of 4 to use VPGATHERDD.
-	// In addition, the KPPT type evaluation function, etc. is based on the assumption that the 39th and 40th elements are zero.
-	// Please note that there is a part that is accessed.
-	static const int MAX_LENGTH = 32;
-
-  // An array that holds the piece number (PieceNumber) for the pieces on the board
-  // Hold up to +1 for when the ball is moving to SQUARE_NB,
-  // SQUARE_NB balls are not moved, so this value should never be used.
-  PieceNumber piece_no_list_board[SQUARE_NB_PLUS1];
-private:
-
-	BonaPiece pieceListFw[MAX_LENGTH];
-	BonaPiece pieceListFb[MAX_LENGTH];
-};
-
-// For management of evaluation value difference calculation
-// A structure for managing the number of pieces that have moved from the previous stage
-// Up to 2 moving pieces.
-struct DirtyPiece
-{
-	// What changed from the piece with that piece number
-	Eval::ChangedBonaPiece changed_piece[2];
-
-	// The number of dirty pieces
-	PieceNumber pieceNo[2];
-
-	// The number of dirty files.
-	// It can be 0 for null move.
-	// Up to 2 moving pieces and taken pieces.
-	int dirty_num;
-
-};
-#endif  // defined(EVAL_NNUE) || defined(EVAL_LEARN)
-}
+} // namespace Eval
 
 #endif // #ifndef EVALUATE_H_INCLUDED
diff --git a/src/extra/sfen_packer.cpp b/src/extra/sfen_packer.cpp
deleted file mode 100644
index b3404542..00000000
--- a/src/extra/sfen_packer.cpp
+++ /dev/null
@@ -1,447 +0,0 @@
-﻿#if defined (EVAL_LEARN)
-
-#include "../misc.h"
-#include "../position.h"
-
-#include <sstream>
-#include <fstream>
-#include <cstring> // std::memset()
-
-using namespace std;
-
-// -----------------------------------
-// stage compression/decompression
-// -----------------------------------
-
-// Class that handles bitstream
-// useful when doing aspect encoding
-struct BitStream
-{
-  // Set the memory to store the data in advance.
-  // Assume that memory is cleared to 0.
-  void  set_data(uint8_t* data_) { data = data_; reset(); }
-
-  // Get the pointer passed in set_data().
-  uint8_t* get_data() const { return data; }
-
-  // Get the cursor.
-  int get_cursor() const { return bit_cursor; }
-
-  // reset the cursor
-  void reset() { bit_cursor = 0; }
-
-  // Write 1bit to the stream.
-  // If b is non-zero, write out 1. If 0, write 0.
-  void write_one_bit(int b)
-  {
-    if (b)
-      data[bit_cursor / 8] |= 1 << (bit_cursor & 7);
-
-    ++bit_cursor;
-  }
-
-  // Get 1 bit from the stream.
-  int read_one_bit()
-  {
-    int b = (data[bit_cursor / 8] >> (bit_cursor & 7)) & 1;
-    ++bit_cursor;
-
-    return b;
-  }
-
-  // write n bits of data
-  // Data shall be written out from the lower order of d.
-  void write_n_bit(int d, int n)
-  {
-    for (int i = 0; i <n; ++i)
-      write_one_bit(d & (1 << i));
-  }
-
-  // read n bits of data
-  // Reverse conversion of write_n_bit().
-  int read_n_bit(int n)
-  {
-    int result = 0;
-    for (int i = 0; i < n; ++i)
-      result |= read_one_bit() ? (1 << i) : 0;
-
-    return result;
-  }
-
-private:
-  // Next bit position to read/write.
-  int bit_cursor;
-
-  // data entity
-  uint8_t* data;
-};
-
-
-// Huffman coding
-// * is simplified from mini encoding to make conversion easier.
-//
-// 1 box on the board (other than NO_PIECE) = 2 to 6 bits (+ 1-bit flag + 1-bit forward and backward)
-// 1 piece of hand piece = 1-5bit (+ 1-bit flag + 1bit ahead and behind)
-//
-// empty xxxxx0 + 0 (none)
-// step xxxx01 + 2 xxxx0 + 2
-// incense xx0011 + 2 xx001 + 2
-// Katsura xx1011 + 2 xx101 + 2
-// silver xx0111 + 2 xx011 + 2
-// Gold x01111 + 1 x0111 + 1 // Gold is valid and has no flags.
-// corner 011111 + 2 01111 + 2
-// Fly 111111 + 2 11111 + 2
-//
-// Assuming all pieces are on the board,
-// Sky 81-40 pieces = 41 boxes = 41bit
-// Walk 4bit*18 pieces = 72bit
-// Incense 6bit*4 pieces = 24bit
-// Katsura 6bit*4 pieces = 24bit
-// Silver 6bit*4 pieces = 24bit
-// Gold 6bit* 4 pieces = 24bit
-// corner 8bit* 2 pieces = 16bit
-// Fly 8bit* 2 pieces = 16bit
-// -------
-// 241bit + 1bit (turn) + 7bit × 2 (King's position after) = 256bit
-//
-// When the piece on the board moves to the hand piece, the piece on the board becomes empty, so the box on the board can be expressed with 1 bit,
-// Since the hand piece can be expressed by 1 bit less than the piece on the board, the total number of bits does not change in the end.
-// Therefore, in this expression, any aspect can be expressed by this bit number.
-// It is a hand piece and no flag is required, but if you include this, the bit number of the piece on the board will be -1
-// Since the total number of bits can be fixed, we will include this as well.
-
-// Huffman Encoding
-//
-// Empty  xxxxxxx0
-// Pawn   xxxxx001 + 1 bit (Side to move)
-// Knight xxxxx011 + 1 bit (Side to move)
-// Bishop xxxxx101 + 1 bit (Side to move)
-// Rook   xxxxx111 + 1 bit (Side to move)
-
-struct HuffmanedPiece
-{
-  int code; // how it will be coded
-  int bits; // How many bits do you have
-};
-
-HuffmanedPiece huffman_table[] =
-{
-  {0b0000,1}, // NO_PIECE
-  {0b0001,4}, // PAWN
-  {0b0011,4}, // KNIGHT
-  {0b0101,4}, // BISHOP
-  {0b0111,4}, // ROOK
-  {0b1001,4}, // QUEEN
-};
-
-// Class for compressing/decompressing sfen
-// sfen can be packed to 256bit (32bytes) by Huffman coding.
-// This is proven by mini. The above is Huffman coding.
-//
-// Internal format = 1-bit turn + 7-bit king position *2 + piece on board (Huffman coding) + hand piece (Huffman coding)
-// Side to move (White = 0, Black = 1) (1bit)
-// White King Position (6 bits)
-// Black King Position (6 bits)
-// Huffman Encoding of the board
-// Castling availability (1 bit x 4)
-// En passant square (1 or 1 + 6 bits)
-// Rule 50 (6 bits)
-// Game play (8 bits)
-//
-// TODO(someone): Rename SFEN to FEN.
-//
-struct SfenPacker
-{
-  // Pack sfen and store in data[32].
-  void pack(const Position& pos)
-  {
-// cout << pos;
-
-    memset(data, 0, 32 /* 256bit */);
-    stream.set_data(data);
-
-    // turn
-    // Side to move.
-    stream.write_one_bit((int)(pos.side_to_move()));
-
-    // 7-bit positions for leading and trailing balls
-    // White king and black king, 6 bits for each.
-    for(auto c: Colors)
-      stream.write_n_bit(pos.king_square(c), 6);
-
-    // Write the pieces on the board other than the kings.
-    for (Rank r = RANK_8; r >= RANK_1; --r)
-    {
-      for (File f = FILE_A; f <= FILE_H; ++f)
-      {
-        Piece pc = pos.piece_on(make_square(f, r));
-        if (type_of(pc) == KING)
-          continue;
-        write_board_piece_to_stream(pc);
-      }
-    }
-
-    // TODO(someone): Support chess960.
-    stream.write_one_bit(pos.can_castle(WHITE_OO));
-    stream.write_one_bit(pos.can_castle(WHITE_OOO));
-    stream.write_one_bit(pos.can_castle(BLACK_OO));
-    stream.write_one_bit(pos.can_castle(BLACK_OOO));
-
-    if (pos.ep_square() == SQ_NONE) {
-      stream.write_one_bit(0);
-    }
-    else {
-      stream.write_one_bit(1);
-      stream.write_n_bit(static_cast<int>(pos.ep_square()), 6);
-    }
-
-    stream.write_n_bit(pos.state()->rule50, 6);
-
-    stream.write_n_bit(1 + (pos.game_ply()-(pos.side_to_move() == BLACK)) / 2, 8);
-
-    assert(stream.get_cursor() <= 256);
-  }
-
-  // sfen packed by pack() (256bit = 32bytes)
-  // Or sfen to decode with unpack()
-  uint8_t *data; // uint8_t[32];
-
-//private:
-  // Position::set_from_packed_sfen(uint8_t data[32]) I want to use these functions, so the line is bad, but I want to keep it public.
-
-  BitStream stream;
-
-  // Output the board pieces to stream.
-  void write_board_piece_to_stream(Piece pc)
-  {
-    // piece type
-    PieceType pr = type_of(pc);
-    auto c = huffman_table[pr];
-    stream.write_n_bit(c.code, c.bits);
- 
-    if (pc == NO_PIECE)
-      return;
-
-    // first and second flag
-    stream.write_one_bit(color_of(pc));
-  }
-
-  // Read one board piece from stream
-  Piece read_board_piece_from_stream()
-  {
-    PieceType pr = NO_PIECE_TYPE;
-    int code = 0, bits = 0;
-    while (true)
-    {
-      code |= stream.read_one_bit() << bits;
-      ++bits;
-
-      assert(bits <= 6);
-
-      for (pr = NO_PIECE_TYPE; pr <KING; ++pr)
-        if (huffman_table[pr].code == code
-          && huffman_table[pr].bits == bits)
-          goto Found;
-    }
-  Found:;
-    if (pr == NO_PIECE_TYPE)
-      return NO_PIECE;
-
-    // first and second flag
-    Color c = (Color)stream.read_one_bit();
-    
-    return make_piece(c, pr);
-  }
-};
-
-
-// -----------------------------------
-// Add to Position class
-// -----------------------------------
-
-// Add a function that directly unpacks for speed. It's pretty tough.
-// Write it by combining packer::unpack() and Position::set().
-// If there is a problem with the passed phase and there is an error, non-zero is returned.
-int Position::set_from_packed_sfen(const PackedSfen& sfen , StateInfo * si, Thread* th, bool mirror)
-{
-	SfenPacker packer;
-	auto& stream = packer.stream;
-	stream.set_data((uint8_t*)&sfen);
-
-	std::memset(this, 0, sizeof(Position));
-	std::memset(si, 0, sizeof(StateInfo));
-  std::fill_n(&pieceList[0][0], sizeof(pieceList) / sizeof(Square), SQ_NONE);
-  st = si;
-
-	// Active color
-	sideToMove = (Color)stream.read_one_bit();
-
-	// clear evalList. It is cleared when memset is cleared to zero above...
-	evalList.clear();
-
-	// In updating the PieceList, we have to set which piece is where,
-	// A counter of how much each piece has been used
-  PieceNumber next_piece_number = PIECE_NUMBER_ZERO;
-
-  pieceList[W_KING][0] = SQUARE_NB;
-  pieceList[B_KING][0] = SQUARE_NB;
-
-	// First the position of the ball
-	if (mirror)
-	{
-		for (auto c : Colors)
-			board[Mir((Square)stream.read_n_bit(6))] = make_piece(c, KING);
-	}
-	else
-	{
-		for (auto c : Colors)
-			board[stream.read_n_bit(6)] = make_piece(c, KING);
-	}
-
-  // Piece placement
-  for (Rank r = RANK_8; r >= RANK_1; --r)
-  {
-    for (File f = FILE_A; f <= FILE_H; ++f)
-    {
-      auto sq = make_square(f, r);
-      if (mirror) {
-        sq = Mir(sq);
-      }
-
-      // it seems there are already balls
-      Piece pc;
-      if (type_of(board[sq]) != KING)
-      {
-        assert(board[sq] == NO_PIECE);
-        pc = packer.read_board_piece_from_stream();
-      }
-      else
-      {
-        pc = board[sq];
-        board[sq] = NO_PIECE; // put_piece() will catch ASSERT unless you remove it all.
-      }
-
-      // There may be no pieces, so skip in that case.
-      if (pc == NO_PIECE)
-        continue;
-
-      put_piece(Piece(pc), sq);
-
-      // update evalList
-      PieceNumber piece_no =
-        (pc == B_KING) ?PIECE_NUMBER_BKING :// Move ball
-        (pc == W_KING) ?PIECE_NUMBER_WKING :// Backing ball
-        next_piece_number++; // otherwise
-
-      evalList.put_piece(piece_no, sq, pc); // Place the pc piece in the sq box
-
-      //cout << sq << ' ' << board[sq] << ' ' << stream.get_cursor() << endl;
-
-      if (stream.get_cursor()> 256)
-        return 1;
-      //assert(stream.get_cursor() <= 256);
-
-    }
-  }
-
-  // Castling availability.
-  // TODO(someone): Support chess960.
-  st->castlingRights = 0;
-  if (stream.read_one_bit()) {
-    Square rsq;
-    for (rsq = relative_square(WHITE, SQ_H1); piece_on(rsq) != W_ROOK; --rsq) {}
-    set_castling_right(WHITE, rsq);
-  }
-  if (stream.read_one_bit()) {
-    Square rsq;
-    for (rsq = relative_square(WHITE, SQ_A1); piece_on(rsq) != W_ROOK; ++rsq) {}
-    set_castling_right(WHITE, rsq);
-  }
-  if (stream.read_one_bit()) {
-    Square rsq;
-    for (rsq = relative_square(BLACK, SQ_H1); piece_on(rsq) != B_ROOK; --rsq) {}
-    set_castling_right(BLACK, rsq);
-  }
-  if (stream.read_one_bit()) {
-    Square rsq;
-    for (rsq = relative_square(BLACK, SQ_A1); piece_on(rsq) != B_ROOK; ++rsq) {}
-    set_castling_right(BLACK, rsq);
-  }
-
-  // En passant square. Ignore if no pawn capture is possible
-  if (stream.read_one_bit()) {
-    Square ep_square = static_cast<Square>(stream.read_n_bit(6));
-    if (mirror) {
-      ep_square = Mir(ep_square);
-    }
-    st->epSquare = ep_square;
-
-    if (!(attackers_to(st->epSquare) & pieces(sideToMove, PAWN))
-      || !(pieces(~sideToMove, PAWN) & (st->epSquare + pawn_push(~sideToMove))))
-      st->epSquare = SQ_NONE;
-  }
-  else {
-    st->epSquare = SQ_NONE;
-  }
-
-  // Halfmove clock
-  st->rule50 = static_cast<Square>(stream.read_n_bit(6));
-
-  // Fullmove number
-  gamePly = static_cast<Square>(stream.read_n_bit(8));
-  // Convert from fullmove starting from 1 to gamePly starting from 0,
-  // handle also common incorrect FEN with fullmove = 0.
-  gamePly = std::max(2 * (gamePly - 1), 0) + (sideToMove == BLACK);
-
-  assert(stream.get_cursor() <= 256);
-
-  chess960 = false;
-  thisThread = th;
-set_state(st);
-
-  //std::cout << *this << std::endl;
-
-  assert(pos_is_ok());
-#if defined(EVAL_NNUE)
-  assert(evalList.is_valid(*this));
-#endif  // defined(EVAL_NNUE)
-
-	return 0;
-}
-
-// Give the board, hand piece, and turn, and return the sfen.
-//std::string Position::sfen_from_rawdata(Piece board[81], Hand hands[2], Color turn, int gamePly_)
-//{
-// // Copy it to an internal structure and call sfen() if the conversion process depends only on it
-// // Maybe it will be converted normally...
-//  Position pos;
-//
-//  memcpy(pos.board, board, sizeof(Piece) * 81);
-//  memcpy(pos.hand, hands, sizeof(Hand) * 2);
-//  pos.sideToMove = turn;
-//  pos.gamePly = gamePly_;
-//
-//  return pos.sfen();
-//
-// // Implementation of ↑ is beautiful, but slow.
-// // This is a bottleneck when learning a large amount of game records, so write a function to unpack directly.
-//}
-
-// Get the packed sfen. Returns to the buffer specified in the argument.
-void Position::sfen_pack(PackedSfen& sfen)
-{
-  SfenPacker sp;
-  sp.data = (uint8_t*)&sfen;
-  sp.pack(*this);
-}
-
-//// Unpack the packed sfen. Returns an sfen string.
-//std::string Position::sfen_unpack(const PackedSfen& sfen)
-//{
-// SfenPacker sp;
-// sp.data = (uint8_t*)&sfen;
-// return sp.unpack();
-//}
-
-
-#endif // USE_SFEN_PACKER
diff --git a/src/learn/gensfen2019.cpp b/src/learn/gensfen2019.cpp
deleted file mode 100644
index 01293b9c..00000000
--- a/src/learn/gensfen2019.cpp
+++ /dev/null
@@ -1 +0,0 @@
-// just a place holder
diff --git a/src/learn/half_float.h b/src/learn/half_float.h
deleted file mode 100644
index 30b3e482..00000000
--- a/src/learn/half_float.h
+++ /dev/null
@@ -1,133 +0,0 @@
-﻿#ifndef __HALF_FLOAT_H__
-#define __HALF_FLOAT_H__
-
-// Half Float Library by yaneurao
-// (16-bit float)
-
-// Floating point operation by 16bit type
-// Assume that the float type code generated by the compiler is in IEEE 754 format and use it.
-
-#include "../types.h"
-
-namespace HalfFloat
-{
-	// IEEE 754 float 32 format is :
-	//   sign(1bit) + exponent(8bits) + fraction(23bits) = 32bits
-	//
-	// Our float16 format is :
-	//   sign(1bit) + exponent(5bits) + fraction(10bits) = 16bits
-	union float32_converter
-	{
-		int32_t n;
-		float f;
-	};
-
-
-	// 16-bit float
-	struct float16
-	{
-		// --- constructors
-
-		float16() {}
-		float16(int16_t n) { from_float((float)n);  }
-		float16(int32_t n) { from_float((float)n); }
-		float16(float n) { from_float(n); }
-		float16(double n) { from_float((float)n); }
-
-		// build from a float
-		void from_float(float f) { *this = to_float16(f); }
-
-		// --- implicit converters
-
-		operator int32_t() const { return (int32_t)to_float(*this); }
-		operator float() const { return to_float(*this); }
-		operator double() const { return double(to_float(*this)); }
-
-		// --- operators
-
-		float16 operator += (float16 rhs) { from_float(to_float(*this) + to_float(rhs)); return *this; }
-		float16 operator -= (float16 rhs) { from_float(to_float(*this) - to_float(rhs)); return *this; }
-		float16 operator *= (float16 rhs) { from_float(to_float(*this) * to_float(rhs)); return *this; }
-		float16 operator /= (float16 rhs) { from_float(to_float(*this) / to_float(rhs)); return *this; }
-		float16 operator + (float16 rhs) const { return float16(*this) += rhs; }
-		float16 operator - (float16 rhs) const { return float16(*this) -= rhs; }
-		float16 operator * (float16 rhs) const { return float16(*this) *= rhs; }
-		float16 operator / (float16 rhs) const { return float16(*this) /= rhs; }
-		float16 operator - () const { return float16(-to_float(*this)); }
-		bool operator == (float16 rhs) const { return this->v_ == rhs.v_; }
-		bool operator != (float16 rhs) const { return !(*this == rhs); }
-
-		static void UnitTest() { unit_test(); }
-
-	private:
-
-		// --- entity
-
-		uint16_t v_;
-
-		// --- conversion between float and float16
-
-		static float16 to_float16(float f)
-		{
-			float32_converter c;
-			c.f = f;
-			u32 n = c.n;
-
-			// The sign bit is MSB in common.
-			uint16_t sign_bit = (n >> 16) & 0x8000;
-
-			// The exponent of IEEE 754's float 32 is biased +127 , so we change this bias into +15 and limited to 5-bit.
-			uint16_t exponent = (((n >> 23) - 127 + 15) & 0x1f) << 10;
-
-			// The fraction is limited to 10-bit.
-			uint16_t fraction = (n >> (23-10)) & 0x3ff;
-
-			float16 f_;
-			f_.v_ = sign_bit | exponent | fraction;
-
-			return f_;
-		}
-
-		static float to_float(float16 v)
-		{
-			u32 sign_bit = (v.v_ & 0x8000) << 16;
-			u32 exponent = ((((v.v_ >> 10) & 0x1f) - 15 + 127) & 0xff) << 23;
-			u32 fraction = (v.v_ & 0x3ff) << (23 - 10);
-
-			float32_converter c;
-			c.n = sign_bit | exponent | fraction;
-			return c.f;
-		}
-
-		// It is not a unit test, but I confirmed that it can be calculated. I'll fix the code later (maybe).
-		static void unit_test()
-		{
-			float16 a, b, c, d;
-			a = 1;
-			std::cout << (float)a << std::endl;
-			b = -118.625;
-			std::cout << (float)b << std::endl;
-			c = 2.5;
-			std::cout << (float)c << std::endl;
-			d = a + c;
-			std::cout << (float)d << std::endl;
-
-			c *= 1.5;
-			std::cout << (float)c << std::endl;
-
-			b /= 3;
-			std::cout << (float)b << std::endl;
-
-			float f1 = 1.5;
-			a += f1;
-			std::cout << (float)a << std::endl;
-
-			a += f1 * (float)a;
-			std::cout << (float)a << std::endl;
-		}
-
-	};
-
-}
-
-#endif // __HALF_FLOAT_H__
diff --git a/src/learn/learn.h b/src/learn/learn.h
deleted file mode 100644
index eda2bb32..00000000
--- a/src/learn/learn.h
+++ /dev/null
@@ -1,237 +0,0 @@
-﻿#ifndef _LEARN_H_
-#define _LEARN_H_
-
-#if defined(EVAL_LEARN)
-
-#include <vector>
-
-// =====================
-// Settings for learning
-// =====================
-
-// If you select one of the following, the details after that will be automatically selected.
-// If you don't select any of them, you need to set the subsequent details one by one.
-
-// Learning setting by elmo method. This is the default setting.
-// To make a standard squeeze diaphragm, specify "lambda 1" with the learn command.
-#define LEARN_ELMO_METHOD
-
-
-// ----------------------
-// update formula
-// ----------------------
-
-// Ada Grad. Recommended because it is stable.
-// #define ADA_GRAD_UPDATE
-
-// SGD looking only at the sign of the gradient. It requires less memory, but the accuracy is...
-// #define SGD_UPDATE
-
-// ----------------------
-// Settings for learning
-// ----------------------
-
-// mini-batch size.
-// Calculate the gradient by combining this number of phases.
-// If you make it smaller, the number of update_weights() will increase and the convergence will be faster. The gradient is incorrect.
-// If you increase it, the number of update_weights() decreases, so the convergence will be slow. The slope will come out accurately.
-// I don't think you need to change this value in most cases.
-
-#define LEARN_MINI_BATCH_SIZE (1000 * 1000 * 1)
-
-// The number of phases to read from the file at one time. After reading this much, shuffle.
-// It is better to have a certain size, but this number x 40 bytes x 3 times as much memory is consumed. 400MB*3 is consumed in the 10M phase.
-// Must be a multiple of THREAD_BUFFER_SIZE(=10000).
-
-#define LEARN_SFEN_READ_SIZE (1000 * 1000 * 10)
-
-// Saving interval of evaluation function at learning. Save each time you learn this number of phases.
-// Needless to say, the longer the saving interval, the shorter the learning time.
-// Folder name is incremented for each save like 0/, 1/, 2/...
-// By default, once every 1 billion phases.
-#define LEARN_EVAL_SAVE_INTERVAL (1000000000ULL)
-
-
-// ----------------------
-// Select the objective function
-// ----------------------
-
-// The objective function is the sum of squares of the difference in winning percentage
-// See learner.cpp for more information.
-
-//#define LOSS_FUNCTION_IS_WINNING_PERCENTAGE
-
-// Objective function is cross entropy
-// See learner.cpp for more information.
-// So-called ordinary "rag cloth squeezer"
-//#define LOSS_FUNCTION_IS_CROSS_ENTOROPY
-
-// A version in which the objective function is cross entropy, but the win rate function is not passed
-// #define LOSS_FUNCTION_IS_CROSS_ENTOROPY_FOR_VALUE
-
-// elmo (WCSC27) method
-// #define LOSS_FUNCTION_IS_ELMO_METHOD
-
-// ※ Other things may be added.
-
-
-// ----------------------
-// debug settings for learning
-// ----------------------
-
-// Reduce the output of rmse during learning to 1 for this number of times.
-// rmse calculation is done in one thread, so it takes some time, so reducing the output is effective.
-#define LEARN_RMSE_OUTPUT_INTERVAL 1
-
-
-// ----------------------
-// learning from zero vector
-// ----------------------
-
-// Start learning the evaluation function parameters from the zero vector.
-// Initialize to zero, generate a game, learn from zero vector,
-// Game generation → If you repeat learning, you will get parameters that do not depend on the professional game. (maybe)
-// (very time consuming)
-
-//#define RESET_TO_ZERO_VECTOR
-
-
-// ----------------------
-// Floating point for learning
-// ----------------------
-
-// If this is set to double, the calculation accuracy will be higher, but the weight array entangled memory will be doubled.
-// Currently, if this is float, the weight array is 4.5 times the size of the evaluation function file. (About 4.5GB with KPPT)
-// Even if it is a double type, there is almost no difference in the way of convergence, so fix it to float.
-
-// when using float
-typedef float LearnFloatType;
-
-// when using double
-//typedef double LearnFloatType;
-
-// when using float16
-//#include "half_float.h"
-//typedef HalfFloat::float16 LearnFloatType;
-
-// ----------------------
-// save memory
-// ----------------------
-
-// Use a triangular array for the Weight array (of which is KPP) to save memory.
-// If this is used, the weight array for learning will be about 3 times as large as the evaluation function file.
-
-#define USE_TRIANGLE_WEIGHT_ARRAY
-
-// ----------------------
-// dimension down
-// ----------------------
-
-// Dimension reduction for mirrors (left/right symmetry) and inverse (forward/backward symmetry).
-// All on by default.
-
-// Dimension reduction using mirror and inverse for KK. (Unclear effect)
-// USE_KK_MIRROR_WRITE must be on when USE_KK_INVERSE_WRITE is on.
-#define USE_KK_MIRROR_WRITE
-#define USE_KK_INVERSE_WRITE
-
-// Dimension reduction using Mirror and Inverse for KKP. (Inverse is not so effective)
-// When USE_KKP_INVERSE_WRITE is turned on, USE_KKP_MIRROR_WRITE must also be turned on.
-#define USE_KKP_MIRROR_WRITE
-#define USE_KKP_INVERSE_WRITE
-
-// Perform dimension reduction using a mirror for KPP. (Turning this off requires double the teacher position)
-// KPP has no inverse. (Because there is only K on the front side)
-#define USE_KPP_MIRROR_WRITE
-
-// Perform a dimension reduction using a mirror for KPPP. (Turning this off requires double the teacher position)
-// KPPP has no inverse. (Because there is only K on the front side)
-#define USE_KPPP_MIRROR_WRITE
-
-// Reduce the dimension by KPP for learning the KKPP component.
-// Learning is very slow.
-// Do not use as it is not debugged.
-//#define USE_KKPP_LOWER_DIM
-
-
-// ======================
-// Settings for creating teacher phases
-// ======================
-
-// ----------------------
-// write out the draw
-// ----------------------
-
-// When you reach a draw, write it out as a teacher position
-// It's subtle whether it's better to do this.
-// #define LEARN_GENSFEN_USE_DRAW_RESULT
-
-
-// ======================
-// configure
-// ======================
-
-// ----------------------
-// Learning with the method of elmo (WCSC27)
-// ----------------------
-
-#if defined( LEARN_ELMO_METHOD )
-#define LOSS_FUNCTION_IS_ELMO_METHOD
-#define ADA_GRAD_UPDATE
-#endif
-
-
-// ----------------------
-// Definition of struct used in Learner
-// ----------------------
-#include "../position.h"
-
-namespace Learner
-{
-	//Structure in which PackedSfen and evaluation value are integrated
-	// If you write different contents for each option, it will be a problem when reusing the teacher game
-	// For the time being, write all the following members regardless of the options.
-	struct PackedSfenValue
-	{
-		// phase
-		PackedSfen sfen;
-
-		// Evaluation value returned from Learner::search()
-		int16_t score;
-
-		// PV first move
-		// Used when finding the match rate with the teacher
-		uint16_t move;
-
-		// Trouble of the phase from the initial phase.
-		uint16_t gamePly;
-
-		// 1 if the player on this side ultimately wins the game. -1 if you are losing.
-		// 0 if a draw is reached.
-		// The draw is in the teacher position generation command gensfen,
-		// Only write if LEARN_GENSFEN_DRAW_RESULT is enabled.
-		int8_t game_result;
-
-		// When exchanging the file that wrote the teacher aspect with other people
-		//Because this structure size is not fixed, pad it so that it is 40 bytes in any environment.
-		uint8_t padding;
-
-		// 32 + 2 + 2 + 2 + 1 + 1 = 40bytes
-	};
-
-	// Type that returns the reading line and the evaluation value at that time
-	// Used in Learner::search(), Learner::qsearch().
-	typedef std::pair<Value, std::vector<Move> > ValueAndPV;
-
-	// So far, only Yaneura King 2018 Otafuku has this stub
-	// This stub is required if EVAL_LEARN is defined.
-	extern Learner::ValueAndPV  search(Position& pos, int depth , size_t multiPV = 1 , uint64_t NodesLimit = 0);
-	extern Learner::ValueAndPV qsearch(Position& pos);
-
-	double calc_grad(Value shallow, const PackedSfenValue& psv);
-
-}
-
-#endif
-
-#endif // ifndef _LEARN_H_
diff --git a/src/learn/learner.cpp b/src/learn/learner.cpp
deleted file mode 100644
index e343fde5..00000000
--- a/src/learn/learner.cpp
+++ /dev/null
@@ -1,3269 +0,0 @@
-﻿// learning routines
-//
-// 1) Automatic generation of game records
-// → "gensfen" command
-// 2) Learning evaluation function parameters from the generated game record
-// → "learn" command
-// → Shuffle in the teacher phase is also an extension of this command.
-// Example) "learn shuffle"
-// 3) Automatic generation of fixed traces
-// → "makebook think" command
-// → implemented in extra/book/book.cpp
-// 4) Post-station automatic review mode
-// → I will not be involved in the engine because it is a problem that the GUI should assist.
-// etc..
-
-#if defined(EVAL_LEARN)
-
-#include <filesystem>
-#include <random>
-#include <regex>
-
-#include "learn.h"
-#include "multi_think.h"
-#include "../uci.h"
-
-// evaluate header for learning
-#include "../eval/evaluate_common.h"
-
-// ----------------------
-// constant string based on the settings
-// ----------------------
-
-// Character string according to update formula. (Output for debugging.)
-// Implemented various update expressions, but concluded that AdaGrad is the best in terms of speed and memory.
-#if defined(ADA_GRAD_UPDATE)
-#define LEARN_UPDATE "AdaGrad"
-#elif defined(SGD_UPDATE)
-#define LEARN_UPDATE "SGD"
-#endif
-
-#if defined(LOSS_FUNCTION_IS_WINNING_PERCENTAGE)
-#define LOSS_FUNCTION "WINNING_PERCENTAGE"
-#elif defined(LOSS_FUNCTION_IS_CROSS_ENTOROPY)
-#define LOSS_FUNCTION "CROSS_ENTOROPY"
-#elif defined(LOSS_FUNCTION_IS_CROSS_ENTOROPY_FOR_VALUE)
-#define LOSS_FUNCTION "CROSS_ENTOROPY_FOR_VALUE"
-#elif defined(LOSS_FUNCTION_IS_ELMO_METHOD)
-#define LOSS_FUNCTION "ELMO_METHOD(WCSC27)"
-#endif
-
-// -----------------------------------
-// Below, the implementation section.
-// -----------------------------------
-
-#include <sstream>
-#include <fstream>
-#include <unordered_set>
-#include <iomanip>
-#include <list>
-#include <cmath>	// std::exp(),std::pow(),std::log()
-#include <cstring>	// memcpy()
-
-#if defined (_OPENMP)
-#include <omp.h>
-#endif
-
-#if defined(_MSC_VER)
-// The C++ filesystem cannot be used unless it is C++17 or later or MSVC.
-// I tried to use windows.h, but with g++ of msys2 I can not get the files in the folder well.
-// Use dirent.h because there is no help for it.
-#include <filesystem>
-#elif defined(__GNUC__)
-#include <dirent.h>
-#endif
-
-#include "../misc.h"
-#include "../thread.h"
-#include "../position.h"
-//#include "../extra/book/book.h"
-#include "../tt.h"
-#include "multi_think.h"
-
-#if defined(EVAL_NNUE)
-#include "../eval/nnue/evaluate_nnue_learner.h"
-#include <shared_mutex>
-#endif
-
-using namespace std;
-
-//// This is defined in the search section.
-//extern Book::BookMoveSelector book;
-
-// Addition and subtraction definition for atomic<T>
-// Aligned with atomicAdd() in Apery/learner.hpp.
-template <typename T>
-T operator += (std::atomic<T>& x, const T rhs)
-{
-	T old = x.load(std::memory_order_consume);
-	// It is allowed that the value is rewritten from other thread at this timing.
-	// The idea that the value is not destroyed is good.
-	T desired = old + rhs;
-	while (!x.compare_exchange_weak(old, desired, std::memory_order_release, std::memory_order_consume))
-		desired = old + rhs;
-	return desired;
-}
-template <typename T>
-T operator -= (std::atomic<T>& x, const T rhs) { return x += -rhs; }
-
-namespace Learner
-{
-
-// Phase array: PSVector stands for packed sfen vector.
-typedef std::vector<PackedSfenValue> PSVector;
-
-bool use_draw_in_training_data_generation = false;
-bool use_draw_in_training = false;
-bool use_draw_in_validation = false;
-bool use_hash_in_training = true;
-
-// -----------------------------------
-// write phase file
-// -----------------------------------
-
-// Helper class for exporting Sfen
-struct SfenWriter
-{
-		// File name to write and number of threads to create
-	SfenWriter(string filename, int thread_num)
-	{
-		sfen_buffers_pool.reserve((size_t)thread_num * 10);
-		sfen_buffers.resize(thread_num);
-
-		// When performing additional learning, the quality of the teacher generated after learning the evaluation function does not change much and I want to earn more teacher positions.
-		// Since it is preferable that old teachers also use it, it has such a specification.
-		fs.open(filename, ios::out | ios::binary | ios::app);
-		filename_ = filename;
-
-		finished = false;
-	}
-
-	~SfenWriter()
-	{
-		finished = true;
-		file_worker_thread.join();
-		fs.close();
-
-		// all buffers should be empty since file_worker_thread has written all..
-		for (auto p : sfen_buffers) { assert(p == nullptr); }
-		assert(sfen_buffers_pool.empty());
-	}
-
-	// For each thread, flush the file by this number of phases.
-	const size_t SFEN_WRITE_SIZE = 5000;
-
-	// write one by pairing the phase and evaluation value (in packed sfen format)
-	void write(size_t thread_id, const PackedSfenValue& psv)
-	{
-		// We have a buffer for each thread and add it there.
-		// If the buffer overflows, write it to a file.
-
-		// This buffer is prepared for each thread.
-		auto& buf = sfen_buffers[thread_id];
-
-		// Secure since there is no buf at the first time and immediately after writing the thread buffer.
-		if (!buf)
-		{
-			buf = new PSVector();
-			buf->reserve(SFEN_WRITE_SIZE);
-		}
-
-		// It is prepared for each thread, so one thread does not call this write() function at the same time.
-		// There is no need to exclude at this point.
-		buf->push_back(psv);
-
-		if (buf->size() >= SFEN_WRITE_SIZE)
-		{
-			// If you load it in sfen_buffers_pool, the worker will do the rest.
-
-			// Mutex lock is required when changing the contents of sfen_buffers_pool.
-			std::unique_lock<std::mutex> lk(mutex);
-			sfen_buffers_pool.push_back(buf);
-
-			buf = nullptr;
-			// If you set buf == nullptr, the buffer will be allocated the next time this function is called.
-		}
-	}
-
-	// Move what remains in the buffer for your thread to a buffer for writing to a file.
-	void finalize(size_t thread_id)
-	{
-		std::unique_lock<std::mutex> lk(mutex);
-
-		auto& buf = sfen_buffers[thread_id];
-
-		// There is a case that buf==nullptr, so that check is necessary.
-		if (buf && buf->size() != 0)
-			sfen_buffers_pool.push_back(buf);
-
-		buf = nullptr;
-	}
-
-	// Start the write_worker thread.
-	void start_file_write_worker()
-	{
-		file_worker_thread = std::thread([&] { this->file_write_worker(); });
-	}
-
-	// Dedicated thread to write to file
-	void file_write_worker()
-	{
-		auto output_status = [&]()
-		{
-			// also output the current time
-			sync_cout << endl << sfen_write_count << " sfens , at " << now_string() << sync_endl;
-
-			// This is enough for flush().
-			fs.flush();
-		};
-
-		while (!finished || sfen_buffers_pool.size())
-		{
-			vector<PSVector*> buffers;
-			{
-				std::unique_lock<std::mutex> lk(mutex);
-
-				// copy the whole
-				buffers = sfen_buffers_pool;
-				sfen_buffers_pool.clear();
-			}
-
-			// sleep() if you didn't get anything
-			if (!buffers.size())
-				sleep(100);
-			else
-			{
-				for (auto ptr : buffers)
-				{
-					fs.write((const char*)&((*ptr)[0]), sizeof(PackedSfenValue) * ptr->size());
-
-					sfen_write_count += ptr->size();
-
-#if 1
-					// Add the processed number here, and if it exceeds save_every, change the file name and reset this counter.
-					save_every_counter += ptr->size();
-					if (save_every_counter >= save_every)
-					{
-						save_every_counter = 0;
-						// Change the file name.
-
-						fs.close();
-
-						// Sequential number attached to the file
-						int n = (int)(sfen_write_count / save_every);
-						// Rename the file and open it again. Add ios::app in consideration of overwriting. (Depending on the operation, it may not be necessary.)
-						string filename = filename_ + "_" + std::to_string(n);
-						fs.open(filename, ios::out | ios::binary | ios::app);
-						cout << endl << "output sfen file = " << filename << endl;
-					}
-#endif
-
-					// Output'.' every time when writing a game record.
-					std::cout << ".";
-
-					// Output the number of phases processed every 40 times
-					// Finally, the remainder of the teacher phase of each thread is written out, so halfway numbers are displayed, but is it okay?
-					// If you overuse the threads to the maximum number of logical cores, the console will be clogged, so it may be a little more loose.
-					if ((++time_stamp_count % 40) == 0)
-						output_status();
-
-					// Since this memory is unnecessary, release it at this timing.
-					delete ptr;
-				}
-			}
-		}
-
-		// Output the time stamp again before the end.
-		output_status();
-	}
-
-	// Change the file name in this unit.
-	uint64_t save_every = UINT64_MAX;
-
-private:
-
-	fstream fs;
-
-	// File name passed in the constructor
-	std::string filename_;
-
-	// Add the processed number here, and if it exceeds save_every, change the file name and reset this counter.
-	uint64_t save_every_counter = 0;
-
-	// thread to write to the file
-	std::thread file_worker_thread;
-	// Flag that all threads have finished
-	atomic<bool> finished;
-
-	// Counter for time stamp output
-	uint64_t time_stamp_count = 0;
-
-	// buffer before writing to file
-	// sfen_buffers is the buffer for each thread
-	// sfen_buffers_pool is a buffer for writing.
-	// After loading the phase in the former buffer by SFEN_WRITE_SIZE, transfer it to the latter.
-	std::vector<PSVector*> sfen_buffers;
-	std::vector<PSVector*> sfen_buffers_pool;
-
-	// Mutex required to access sfen_buffers_pool
-	std::mutex mutex;
-
-	// number of written phases
-	uint64_t sfen_write_count = 0;
-};
-
-// -----------------------------------
-// worker that creates the game record (for each thread)
-// -----------------------------------
-
-// Class to generate sfen with multiple threads
-struct MultiThinkGenSfen : public MultiThink
-{
-	MultiThinkGenSfen(int search_depth_, int search_depth2_, SfenWriter& sw_)
-		: search_depth(search_depth_), search_depth2(search_depth2_), sw(sw_)
-	{
-		hash.resize(GENSFEN_HASH_SIZE);
-
-		// Output for confirmation if the same random seed is not drawn when parallelizing and gensfening the PC.
-		std::cout << prng << std::endl;
-	}
-
-	virtual void thread_worker(size_t thread_id);
-	void start_file_write_worker() { sw.start_file_write_worker(); }
-
-	// search_depth = search depth for normal search
-	int search_depth;
-	int search_depth2;
-
-	// Number of the nodes to be searched.
-	// 0 represents no limits.
-	uint64_t nodes;
-
-	// Upper limit of evaluation value of generated situation
-	int eval_limit;
-
-	// minimum ply with random move
-	int random_move_minply;
-	// maximum ply with random move
-	int random_move_maxply;
-	// Number of random moves in one station
-	int random_move_count;
-	// Move balls with a probability of 1/N when randomly moving like Apery.
-	// When you move the ball again, there is a 1/N chance that it will randomly move once in the opponent's number.
-	// Apery has N=2. Specifying 0 here disables this function.
-	int random_move_like_apery;
-
-	// For when using multi pv instead of random move.
-	// random_multi_pv is the number of candidates for MultiPV.
-	// When adopting the move of the candidate move, the difference between the evaluation value of the move of the 1st place and the evaluation value of the move of the Nth place is
-	// Must be in the range random_multi_pv_diff.
-	// random_multi_pv_depth is the search depth for MultiPV.
-	int random_multi_pv;
-	int random_multi_pv_diff;
-	int random_multi_pv_depth;
-
-	// The minimum and maximum ply (number of steps from the initial phase) of the phase to write out.
-	int write_minply;
-	int write_maxply;
-
-	// sfen exporter
-	SfenWriter& sw;
-
-	// hash to limit the export of the same phase
-	// It must be 2**N because it will be used as the mask to calculate hash_index.
-	static const uint64_t GENSFEN_HASH_SIZE = 64 * 1024 * 1024;
-
-	vector<Key> hash; // 64MB*sizeof(HASH_KEY) = 512MB
-};
-
-//  thread_id    = 0..Threads.size()-1
-void MultiThinkGenSfen::thread_worker(size_t thread_id)
-{
-	// For the time being, it will be treated as a draw at the maximum number of steps to write.
-	const int MAX_PLY2 = write_maxply;
-
-	//Maximum StateInfo + Search PV to advance to leaf buffer
-	std::vector<StateInfo,AlignedAllocator<StateInfo>> states(MAX_PLY2 + MAX_PLY /* == search_depth + α */);
-	StateInfo si;
-
-	// This move. Use this move to advance the stage.
-	Move m = MOVE_NONE;
-
-	// end flag
-	bool quit = false;
-
-	// repeat until the specified number of times
-	while (!quit)
-	{
-		// It is necessary to set a dependent thread for Position.
-		// When parallelizing, Threads (since this is a vector<Thread*>,
-		// Do the same for up to Threads[0]...Threads[thread_num-1].
-		auto th = Threads[thread_id];
-
-		auto& pos = th->rootPos;
-    pos.set(StartFEN, false, &si, th);
-
-    // Test cod for Packed SFEN.
-    //{
-    //  PackedSfen packed_sfen;
-    //  pos.sfen_pack(packed_sfen);
-    //  std::cout << pos << std::endl;
-    //  pos.set_from_packed_sfen(packed_sfen, &si, th);
-    //  std::string actual = pos.fen();
-    //  assert(actual == StartFEN);
-    //}
-
-		// Refer to the members of BookMoveSelector defined in the search section.
-		//auto& book = ::book;
-
-		// Save the situation for one station, and write it out including the winning and losing at the end.
-		// The function to write is flush_psv() below this.
-		PSVector a_psv;
-		a_psv.reserve(MAX_PLY2 + MAX_PLY);
-
-		// Write out the phases loaded in a_psv to a file.
-		// lastTurnIsWin: win/loss in the next phase after the final phase in a_psv
-		// 1 when winning. -1 when losing. Pass 0 for a draw.
-		// Return value: true if the specified number of phases has already been reached and the process ends.
-		auto flush_psv = [&](int8_t lastTurnIsWin)
-		{
-			int8_t isWin = lastTurnIsWin;
-
-			// From the final stage (one step before) to the first stage, give information on the outcome of the game for each stage.
-			// The phases stored in a_psv are assumed to be continuous (in order).
-			for (auto it = a_psv.rbegin(); it != a_psv.rend(); ++it)
-			{
-				// If isWin == 0 (draw), multiply by -1 and it will remain 0 (draw)
-				isWin = - isWin;
-				it->game_result = isWin;
-
-				// When I tried to write out the phase, it reached the specified number of times.
-				// Because the counter is added in get_next_loop_count()
-				// If you don't call this when the phase is output, the counter goes crazy.
-				auto loop_count = get_next_loop_count();
-				if (loop_count == UINT64_MAX)
-				{
-					// Set the end flag.
-					quit = true;
-					return;
-				}
-
-				// Write out one aspect.
-				sw.write(thread_id, *it);
-
-#if 0
-				pos.set_from_packed_sfen(it->sfen);
-				cout << pos << "Win : " << it->isWin << " , " << it->score << endl;
-#endif
-			}
-		};
-
-		// ply flag for whether or not to randomly move by eyes
-		vector<bool> random_move_flag;
-		{
-			// If you want to add a random move, random_move_maxply be sure to enter random_move_count times before the first move.
-			// I want you to disperse so much.
-			// I'm not sure how best it is. Experimenting under various conditions.
-
-			// Make an array like a[0] = 0 ,a[1] = 1, ...
-			// Fisher-Yates shuffle and take out the first N items.
-			// Actually, I only want N pieces, so I only need to shuffle the first N pieces with Fisher-Yates.
-
-			vector<int> a;
-			a.reserve((size_t)random_move_maxply);
-
-			// random_move_minply ,random_move_maxply is specified by 1 origin,
-			// Note that we are handling 0 origin here.
-			for (int i = std::max(random_move_minply - 1 , 0) ; i < random_move_maxply; ++i)
-				a.push_back(i);
-
-			// In case of Apery random move, insert() may be called random_move_count times.
-			// Reserve only the size considering it.
-			random_move_flag.resize((size_t)random_move_maxply + random_move_count);
-
-			// A random move that exceeds the size() of a[] cannot be applied, so limit it.
-			for (int i = 0 ; i < std::min(random_move_count, (int)a.size()) ; ++i)
-			{
-				swap(a[i], a[prng.rand((uint64_t)a.size() - i) + i]);
-				random_move_flag[a[i]] = true;
-			}
-		}
-
-		// A counter that keeps track of the number of random moves
-		// When random_move_minply == -1, random moves are performed continuously, so use it at this time.
-		int random_move_c = 0;
-
-		// ply: steps from the initial stage
-		for (int ply = 0; ; ++ply)
-		{
-			//cout << pos << endl;
-
-			// Current search depth
-			// Goto will fly, so declare it first.
-			int depth = search_depth + (int)prng.rand(search_depth2 - search_depth + 1);
-
-			// has it reached the length
-			if (ply >= MAX_PLY2)
-			{
-				if (use_draw_in_training_data_generation) {
-				// Write out as win/loss = draw.
-				// This way it is harder to allow the opponent to enter the ball when I enter (may)
-				flush_psv(0);
-				}
-				break;
-			}
-
-      if (pos.is_draw(ply)) {
-		  if (use_draw_in_training_data_generation) {
-			  // Write if draw.
-			  flush_psv(0);
-		  }
-        break;
-      }
-
-			// Isn't all pieces stuck and stuck?
-			if (MoveList<LEGAL>(pos).size() == 0)
-			{
-        // (write up to the previous phase of this phase)
-        // Write the positions other than this position if checkmated.
-                if (pos.checkers()) // Mate
-                    flush_psv(-1);
-				else if (use_draw_in_training_data_generation) {
-					flush_psv(0); // Stalemate
-				}
-				break;
-			}
-
-			//// constant track
-			//if ((m = book.probe(pos)) != MOVE_NONE)
-			//{
-			//  // Hit the constant track.
-			//  // The move was stored in m.
-
-			//  // Do not use the fixed phase for learning.
-			//  a_psv.clear();
-
-			//  if (random_move_minply != -1)
-			// 		// Random move is performed with a certain probability even in the constant phase.
-			// 		goto RANDOM_MOVE;
-			//  else
-			// 		// When -1 is specified as random_move_minply, it points according to the standard until it goes out of the standard.
-			// 		// Prepare an innumerable number of situations that have left the constant as ConsiderationBookMoveCount true using a huge constant
-			// 		// Used for purposes such as performing a random move 5 times from there.
-			// 		goto DO_MOVE;
-			//}
-
-			{
-				// search_depth～search_depth2 Evaluation value of hand reading and PV (best responder row)
-				// There should be no problem if you narrow the search window.
-
-				auto pv_value1 = search(pos, depth, 1, nodes);
-
-				auto value1 = pv_value1.first;
-				auto& pv1 = pv_value1.second;
-
-				// For situations where the absolute evaluation value is greater than or equal to this value
-				// It doesn't make much sense to use that aspect for learning, so this game ends.
-				// Treat this as having won or lost.
-
-				// If you win one move, declarative win, mate_in(2) will be returned here, so it will be the same value as the upper limit of eval_limit,
-				// This if expression is always true. The same applies to resign.
-
-				if (abs(value1) >= eval_limit)
-				{
-					// sync_cout << pos << "eval limit = "<< eval_limit << "over ,move = "<< pv1[0] << sync_endl;
-
-					// If value1 >= eval_limit in this aspect, you win (the turn side of this aspect).
-					flush_psv((value1 >= eval_limit) ? 1 : -1);
-					break;
-				}
-
-				// Verification of a strange move
-				if (pv1.size() > 0
-					&& (pv1[0] == MOVE_NONE || pv1[0] == MOVE_NULL)
-					)
-				{
-					// MOVE_WIN is checking if it is the declaration victory stage before this
-					// The declarative winning move should never come back here.
-					// Also, when MOVE_RESIGN, value1 is a one-stop score, which should be the minimum value of eval_limit (-31998)...
-					cout << "Error! : " << pos.fen() << m << value1 << endl;
-					break;
-				}
-
-				// Processing according to each thousand-day hand.
-
-        if (pos.is_draw(0)) {
-			if (use_draw_in_training_data_generation) {
-				// Write if draw.
-				flush_psv(0);
-			}
-          break;
-        }
-
-				// Use PV's move to the leaf node and use the value that evaluated() is called on that leaf node.
-				auto evaluate_leaf = [&](Position& pos , vector<Move>& pv)
-				{
-					auto rootColor = pos.side_to_move();
-
-					int ply2 = ply;
-					for (auto m : pv)
-					{
-						// As a verification for debugging, make sure there are no illegal players in the middle.
-						// NULL_MOVE does not come.
-
-						// I tested it out enough so I can comment it out.
-#if 1
-						// I shouldn't be an illegal player.
-						// declarative win and not mated() are tested above so
-						// It is guaranteed that MOVE_WIN and MOVE_RESIGN do not come as a reader. (Should...)
-						if (!pos.pseudo_legal(m) || !pos.legal(m))
-						{
-							cout << "Error! : " << pos.fen() << m << endl;
-						}
-#endif
-						pos.do_move(m, states[ply2++]);
-						
-						//Because the difference calculation of evaluate() cannot be performed unless each node evaluate() is called!
-						// If the depth is 8 or more, it seems faster not to calculate this difference.
-#if defined(EVAL_NNUE)
-            if (depth < 8)
-              Eval::evaluate_with_no_return(pos);
-#endif  // defined(EVAL_NNUE)
-					}
-
-					// reach leaf
-					// cout << pos;
-
-					auto v = Eval::evaluate(pos);
-					// evaluate() returns the evaluation value on the turn side, so
-					// If it's a turn different from root_color, you must invert v and return it.
-					if (rootColor != pos.side_to_move())
-						v = -v;
-
-					// Rewind.
-					// Is it C++x14, and isn't there even foreach to turn in reverse?
-					//  for (auto it : boost::adaptors::reverse(pv))
-
-					for (auto it = pv.rbegin(); it != pv.rend(); ++it)
-						pos.undo_move(*it);
-
-					return v;
-				};
-
-#if 0
-				dbg_hit_on(pv_value1.first == leaf_value);
-				// gensfen depth 3 eval_limit 32000
-				// Total 217749 Hits 203579 hit rate (%) 93.490
-				// gensfen depth 6 eval_limit 32000
-				// Total 78407 Hits 69190 hit rate (%) 88.245
-				// gensfen depth 6 eval_limit 3000
-				// Total 53879 Hits 43713 hit rate (%) 81.132
-
-				// Problems such as pruning with moves in the substitution table.
-				// This is a little uncomfortable as a teacher...
-#endif
-
-				//If depth 0, pv is not obtained, so search again at depth 2.
-				if (search_depth <= 0)
-				{
-					pv_value1 = search(pos, 2);
-					pv1 = pv_value1.second;
-				}
-
-				// The surroundings of the initial stage are all similar
-				// Do not write it out because it can lead to overlearning when used for learning.
-				// → comparative experiment should be done
-				if (ply < write_minply - 1)
-				{
-					a_psv.clear();
-					goto SKIP_SAVE;
-				}
-
-				// Did you just write the same phase?
-				// This may include the same aspect as it is generated in parallel on multiple PCs, so
-				// It is better to do the same process when reading.
-				{
-					auto key = pos.key();
-					auto hash_index = (size_t)(key & (GENSFEN_HASH_SIZE - 1));
-					auto key2 = hash[hash_index];
-					if (key == key2)
-					{
-						// when skipping regarding earlier
-						// Clear the saved situation because the win/loss information will be incorrect.
-						// anyway, when the hash matches, it's likely that the previous phases also match
-						// Not worth writing out.
-						a_psv.clear();
-						goto SKIP_SAVE;
-					}
-					hash[hash_index] = key; // Replace with the current key.
-				}
-
-				// Temporary saving of the situation.
-				{
-					a_psv.emplace_back(PackedSfenValue());
-					auto &psv = a_psv.back();
-
-					// If pack is requested, write the packed sfen and the evaluation value at that time.
-					// The final writing is after winning or losing.
-					pos.sfen_pack(psv.sfen);
-
-          //{
-          //  std::string before_fen = pos.fen();
-          //  pos.set_from_packed_sfen(psv.sfen, &si, th);
-          //  std::string after_fen = pos.fen();
-          //  assert(before_fen == after_fen);
-          //}
-
-					// Get the value of evaluate() as seen from the root color on the leaf node of the PV line.
-					//I don't know the goodness and badness of using the return value of search() as it is.
-					psv.score = evaluate_leaf(pos, pv1);
-					psv.gamePly = ply;
-
-					// Take out the first PV hand. This should be present unless depth 0.
-					assert(pv_value1.second.size() >= 1);
-					Move pv_move1 = pv_value1.second[0];
-					psv.move = pv_move1;
-				}
-
-			SKIP_SAVE:;
-
-				// For some reason, I could not get PV (hit the substitution table etc. and got stuck?) so go to the next game.
-				// It's a rare case, so you can ignore it.
-				if (pv1.size() == 0)
-					break;
-
-				// search_depth Advance the phase by hand reading.
-				m = pv1[0];
-			}
-
-		RANDOM_MOVE:;
-
-			// Phase to randomly choose one from legal hands
-			if (
-				// 1. Random move of random_move_count times from random_move_minply to random_move_maxply
-				(random_move_minply != -1 && ply <(int)random_move_flag.size() && random_move_flag[ply]) ||
-				// 2. A mode to perform random move of random_move_count times after leaving the track
-				(random_move_minply == -1 && random_move_c <random_move_count))
-			{
-				++random_move_c;
-
-				// It's not a mate, so there should be one legal hand...
-				if (random_multi_pv == 0)
-				{
-					// normal random move
-
-					MoveList<LEGAL> list(pos);
-
-					// I don't really know the goodness and badness of making this the Apery method.
-					if (random_move_like_apery == 0
-						|| prng.rand(random_move_like_apery) != 0
-					)
-					{
-						// Normally one move from legal move
-						m = list.at((size_t)prng.rand((uint64_t)list.size()));
-					}
-					else {
-						// if you can move the ball, move the ball
-						Move moves[8]; // Near 8
-						Move* p = &moves[0];
-						for (auto& m : list)
-							if (type_of(pos.moved_piece(m)) == KING)
-								*(p++) = m;
-						size_t n = p - &moves[0];
-						if (n != 0)
-						{
-							// move to move the ball
-							m = moves[prng.rand(n)];
-
-							// In Apery method, at this time there is a 1/2 chance that the opponent will also move randomly
-							if (prng.rand(2) == 0)
-							{
-								// Is it a simple hack to add a "1" next to random_move_flag[ply]?
-								random_move_flag.insert(random_move_flag.begin() + ply + 1, 1, true);
-							}
-						}
-						else
-							// Normally one move from legal move
-							m = list.at((size_t)prng.rand((uint64_t)list.size()));
-					}
-
-					// I put in the code of two handed balls, but if you choose one from legal hands, it should be equivalent to that
-					// I decided it's unnecessary because it just makes the code more complicated.
-				}
-				else {
-					// Since the logic becomes complicated, I'm sorry, I will search again with MultiPV here.
-					Learner::search(pos, random_multi_pv_depth, random_multi_pv);
-					// Select one from the top N hands of root Moves
-
-					auto& rm = pos.this_thread()->rootMoves;
-
-					uint64_t s = min((uint64_t)rm.size(), (uint64_t)random_multi_pv);
-					for (uint64_t i = 1; i < s; ++i)
-					{
-						// The difference from the evaluation value of rm[0] must be within the range of random_multi_pv_diff.
-						// It can be assumed that rm[x].score is arranged in descending order.
-						if (rm[0].score > rm[i].score + random_multi_pv_diff)
-						{
-							s = i;
-							break;
-						}
-					}
-
-					m = rm[prng.rand(s)].pv[0];
-
-					// I haven't written one phase yet, but it ended, so the writing process ends and the next game starts.
-					if (!is_ok(m))
-						break;
-				}
-
-				// When trying to evaluate the move from the outcome of the game,
-				// There is a random move this time, so try not to fall below this.
-				a_psv.clear(); // clear saved aspect
-			}
-
-		DO_MOVE:;
-			pos.do_move(m, states[ply]);
-
-			// Call node evaluate() for each difference calculation.
-			Eval::evaluate_with_no_return(pos);
-
-		} // for (int ply = 0; ; ++ply)
-
-	} // while(!quit)
-
-	sw.finalize(thread_id);
-}
-
-// -----------------------------------
-// Command to generate a game record (master thread)
-// -----------------------------------
-
-// Command to generate a game record
-void gen_sfen(Position&, istringstream& is)
-{
-	// number of threads (given by USI setoption)
-	uint32_t thread_num = (uint32_t)Options["Threads"];
-
-	// Number of generated game records default = 8 billion phases (Ponanza specification)
-	uint64_t loop_max = 8000000000UL;
-
-	// Stop the generation when the evaluation value reaches this value.
-	int eval_limit = 3000;
-
-	// search depth
-	int search_depth = 3;
-	int search_depth2 = INT_MIN;
-
-	// Number of nodes to be searched.
-	uint64_t nodes = 0;
-
-	// minimum ply, maximum ply and number of random moves
-	int random_move_minply = 1;
-	int random_move_maxply = 24;
-	int random_move_count = 5;
-	// A function to move the random move mainly like Apery
-	// If this is set to 3, the ball will move with a probability of 1/3.
-	int random_move_like_apery = 0;
-	// If you search with multipv instead of random move and choose from among them randomly, set random_multi_pv = 1 or more.
-	int random_multi_pv = 0;
-	int random_multi_pv_diff = 32000;
-	int random_multi_pv_depth = INT_MIN;
-
-	// The minimum and maximum ply (number of steps from the initial phase) of the phase to write out.
-	int write_minply = 16;
-	int write_maxply = 400;
-
-	// File name to write
-	string output_file_name = "generated_kifu.bin";
-
-	string token;
-
-	// When hit to eval hash, as a evaluation value near the initial stage, if a hash collision occurs and a large value is written
-	// When eval_limit is set small, eval_limit will be exceeded every time in the initial phase, and phase generation will not proceed.
-	// Therefore, eval hash needs to be disabled.
-	// After that, when the hash of the eval hash collides, the evaluation value of a strange value is used, and it may be unpleasant to use it for the teacher.
-	bool use_eval_hash = false;
-
-	// Save to file in this unit.
-	// File names are serialized like file_1.bin, file_2.bin.
-	uint64_t save_every = UINT64_MAX;
-
-	// Add a random number to the end of the file name.
-	bool random_file_name = false;
-
-	while (true)
-	{
-		token = "";
-		is >> token;
-		if (token == "")
-			break;
-
-		if (token == "depth")
-			is >> search_depth;
-		else if (token == "depth2")
-			is >> search_depth2;
-		else if (token == "nodes")
-			is >> nodes;
-		else if (token == "loop")
-			is >> loop_max;
-		else if (token == "output_file_name")
-			is >> output_file_name;
-		else if (token == "eval_limit")
-		{
-			is >> eval_limit;
-			// Limit the maximum to a one-stop score. (Otherwise you might not end the loop)
-			eval_limit = std::min(eval_limit, (int)mate_in(2));
-		}
-		else if (token == "random_move_minply")
-			is >> random_move_minply;
-		else if (token == "random_move_maxply")
-			is >> random_move_maxply;
-		else if (token == "random_move_count")
-			is >> random_move_count;
-		else if (token == "random_move_like_apery")
-			is >> random_move_like_apery;
-		else if (token == "random_multi_pv")
-			is >> random_multi_pv;
-		else if (token == "random_multi_pv_diff")
-			is >> random_multi_pv_diff;
-		else if (token == "random_multi_pv_depth")
-			is >> random_multi_pv_depth;
-		else if (token == "write_minply")
-			is >> write_minply;
-		else if (token == "write_maxply")
-			is >> write_maxply;
-		else if (token == "use_eval_hash")
-			is >> use_eval_hash;
-		else if (token == "save_every")
-			is >> save_every;
-		else if (token == "random_file_name")
-			is >> random_file_name;
-		else if (token == "use_draw_in_training_data_generation")
-			is >> use_draw_in_training_data_generation;
-		else
-			cout << "Error! : Illegal token " << token << endl;
-	}
-
-#if defined(USE_GLOBAL_OPTIONS)
-	// Save it for later restore.
-	auto oldGlobalOptions = GlobalOptions;
-	GlobalOptions.use_eval_hash = use_eval_hash;
-#endif
-
-	// If search depth2 is not set, leave it the same as search depth.
-	if (search_depth2 == INT_MIN)
-		search_depth2 = search_depth;
-	if (random_multi_pv_depth == INT_MIN)
-		random_multi_pv_depth = search_depth;
-
-	if (random_file_name)
-	{
-		// Give a random number to output_file_name at this point.
-    std::random_device seed_gen;
-    PRNG r(seed_gen());
-		// Just in case, reassign the random numbers.
-		for(int i=0;i<10;++i)
-			r.rand(1);
-		auto to_hex = [](uint64_t u){
-			std::stringstream ss;
-			ss << std::hex << u;
-			return ss.str();
-		};
-		// I don't want to wear 64bit numbers by accident, so I'm going to make a 64bit number 2 just in case.
-		output_file_name = output_file_name + "_" + to_hex(r.rand<uint64_t>()) + to_hex(r.rand<uint64_t>());
-	}
-
-	std::cout << "gensfen : " << endl
-		<< "  search_depth = " << search_depth << " to " << search_depth2 << endl
-		<< "  nodes = " << nodes << endl
-		<< "  loop_max = " << loop_max << endl
-		<< "  eval_limit = " << eval_limit << endl
-		<< "  thread_num (set by USI setoption) = " << thread_num << endl
-		<< "  book_moves (set by USI setoption) = " << Options["BookMoves"] << endl
-		<< "  random_move_minply     = " << random_move_minply << endl
-		<< "  random_move_maxply     = " << random_move_maxply << endl
-		<< "  random_move_count      = " << random_move_count << endl
-		<< "  random_move_like_apery = " << random_move_like_apery << endl
-		<< "  random_multi_pv        = " << random_multi_pv << endl
-		<< "  random_multi_pv_diff   = " << random_multi_pv_diff << endl
-		<< "  random_multi_pv_depth  = " << random_multi_pv_depth << endl
-		<< "  write_minply           = " << write_minply << endl
-		<< "  write_maxply           = " << write_maxply << endl
-		<< "  output_file_name       = " << output_file_name << endl
-		<< "  use_eval_hash          = " << use_eval_hash << endl
-		<< "  save_every             = " << save_every << endl
-		<< "  random_file_name       = " << random_file_name << endl;
-
-	// Create and execute threads as many as Options["Threads"].
-	{
-		SfenWriter sw(output_file_name, thread_num);
-		sw.save_every = save_every;
-
-		MultiThinkGenSfen multi_think(search_depth, search_depth2, sw);
-		multi_think.nodes = nodes;
-		multi_think.set_loop_max(loop_max);
-		multi_think.eval_limit = eval_limit;
-		multi_think.random_move_minply = random_move_minply;
-		multi_think.random_move_maxply = random_move_maxply;
-		multi_think.random_move_count = random_move_count;
-		multi_think.random_move_like_apery = random_move_like_apery;
-		multi_think.random_multi_pv = random_multi_pv;
-		multi_think.random_multi_pv_diff = random_multi_pv_diff;
-		multi_think.random_multi_pv_depth = random_multi_pv_depth;
-		multi_think.write_minply = write_minply;
-		multi_think.write_maxply = write_maxply;
-		multi_think.start_file_write_worker();
-		multi_think.go_think();
-
-		// Since we are joining with the destructor of SfenWriter, please give a message that it has finished after the join
-		// Enclose this in a block because it should be displayed.
-	}
-
-	std::cout << "gensfen finished." << endl;
-
-#if defined(USE_GLOBAL_OPTIONS)
-	// Restore Global Options.
-	GlobalOptions = oldGlobalOptions;
-#endif
-
-}
-
-// -----------------------------------
-// command to learn from the generated game (learn)
-// -----------------------------------
-
-// ordinary sigmoid function
-double sigmoid(double x)
-{
-	return 1.0 / (1.0 + std::exp(-x));
-}
-
-// A function that converts the evaluation value to the winning rate [0,1]
-double winning_percentage(double value)
-{
-	// 1/(1+10^(-Eval/4))
-	// = 1/(1+e^(-Eval/4*ln(10))
-	// = sigmoid(Eval/4*ln(10))
-	return sigmoid(value / PawnValueEg / 4.0 * log(10.0));
-}
-double dsigmoid(double x)
-{
-	// Sigmoid function
-	// f(x) = 1/(1+exp(-x))
-	// the first derivative is
-	// f'(x) = df/dx = f(x)・{ 1-f(x)}
-	// becomes
-
-	return sigmoid(x) * (1.0 - sigmoid(x));
-}
-
-// When the objective function is the sum of squares of the difference in winning percentage
-#if defined (LOSS_FUNCTION_IS_WINNING_PERCENTAGE)
-// function to calculate the gradient
-double calc_grad(Value deep, Value shallow, PackedSfenValue& psv)
-{
-	// The square of the win rate difference minimizes it in the objective function.
-	// Objective function J = 1/2m Σ (win_rate(shallow)-win_rate(deep) )^2
-	// However, σ is a sigmoid function that converts the evaluation value into the difference in the winning percentage.
-	// m is the number of samples. shallow is the evaluation value for a shallow search (qsearch()). deep is the evaluation value for deep search.
-	// If W is the feature vector (parameter of the evaluation function) and Xi and Yi are teachers
-	// shallow = W*Xi // * is the Hadamard product, transposing W and meaning X
-	// f(Xi) = win_rate(W*Xi)
-	// If σ(i th deep) = Yi,
-	// J = m/2 Σ (f(Xi)-Yi )^2
-	// becomes a common expression.
-	// W is a vector, and if we write the jth element as Wj, from the chain rule
-	// ∂J/∂Wj = ∂J/∂f ・∂f/∂W ・∂W/∂Wj
-	// = 1/m Σ (f(Xi)-y) ・f'(Xi) ・ 1
-
-	// 1/m will be multiplied later, but the contents of Σ can be retained in the array as the value of the gradient.
-	// f'(Xi) = win_rate'(shallow) = sigmoid'(shallow/600) = dsigmoid(shallow / 600) / 600
-	// This /600 at the end is adjusted by the learning rate, so do not write it..
-	// Also, the coefficient of 1/m is unnecessary if you use the update formula that has the automatic gradient adjustment function like Adam and AdaGrad.
-	// Therefore, it is not necessary to save it in memory.
-
-	double p = winning_percentage(deep);
-	double q = winning_percentage(shallow);
-	return (q - p) * dsigmoid(double(shallow) / 600.0);
-}
-#endif
-
-#if defined (LOSS_FUNCTION_IS_CROSS_ENTOROPY)
-double calc_grad(Value deep, Value shallow, const PackedSfenValue& psv)
-{
-	// Objective function with cross entropy
-
-	// For the concept and nature of cross entropy,
-	// http://nnadl-ja.github.io/nnadl_site_ja/chap3.html#the_cross-entropy_cost_function
-	// http://postd.cc/visual-information-theory-3/
-	// Refer to etc.
-
-	// Objective function design)
-	// We want to make the distribution of p closer to the distribution of q → Think of it as the problem of minimizing the cross entropy between the probability distributions of p and q.
-	// J = H(p,q) =-Σ p(x) log(q(x)) = -p log q-(1-p) log(1-q)
-	// x
-
-	// p is a constant and q is a Wi function (q = σ(W・Xi) ).
-	// ∂J/∂Wi = -p・q'/q-(1-p)(1-q)'/(1-q)
-	// = ...
-	// = q-p.
-
-	double p = winning_percentage(deep);
-	double q = winning_percentage(shallow);
-
-	return q - p;
-}
-#endif
-
-#if defined ( LOSS_FUNCTION_IS_CROSS_ENTOROPY_FOR_VALUE )
-double calc_grad(Value deep, Value shallow, const PackedSfenValue& psv)
-{
-	// Version that does not pass the winning percentage function
-	// This, unless EVAL_LIMIT is set low, trying to match the evaluation value with the shape of the end stage
-	// eval may exceed the range of eval.
-	return shallow - deep;
-}
-#endif
-
-#if defined ( LOSS_FUNCTION_IS_ELMO_METHOD )
-
-// A constant used in elmo (WCSC27). Adjustment required.
-// Since elmo does not internally divide the expression, the value is different.
-// You can set this value with the learn command.
-// 0.33 is equivalent to the constant (0.5) used in elmo (WCSC27)
-double ELMO_LAMBDA = 0.33;
-double ELMO_LAMBDA2 = 0.33;
-double ELMO_LAMBDA_LIMIT = 32000;
-
-double calc_grad(Value deep, Value shallow , const PackedSfenValue& psv)
-{
-	// elmo (WCSC27) method
-	// Correct with the actual game wins and losses.
-
-	const double q = winning_percentage(shallow);
-	const double p = winning_percentage(deep);
-
-	// Use 1 as the correction term if the expected win rate is 1, 0 if you lose, and 0.5 if you draw.
-	// game_result = 1,0,-1 so add 1 and divide by 2.
-	const double t = double(psv.game_result + 1) / 2;
-
-	// If the evaluation value in deep search exceeds ELMO_LAMBDA_LIMIT, apply ELMO_LAMBDA2 instead of ELMO_LAMBDA.
-	const double lambda = (abs(deep) >= ELMO_LAMBDA_LIMIT) ? ELMO_LAMBDA2 : ELMO_LAMBDA;
-
-	// Use the actual win rate as a correction term.
-	// This is the idea of ​​elmo (WCSC27), modern O-parts.
-	const double grad = lambda * (q - p) + (1.0 - lambda) * (q - t);
-
-	return grad;
-}
-
-// Calculate cross entropy during learning
-// The individual cross entropy of the win/loss term and win rate term of the elmo expression is returned to the arguments cross_entropy_eval and cross_entropy_win.
-void calc_cross_entropy(Value deep, Value shallow, const PackedSfenValue& psv,
-	double& cross_entropy_eval, double& cross_entropy_win, double& cross_entropy,
-	double& entropy_eval, double& entropy_win, double& entropy)
-{
-	const double p /* teacher_winrate */ = winning_percentage(deep);
-	const double q /* eval_winrate    */ = winning_percentage(shallow);
-	const double t = double(psv.game_result + 1) / 2;
-
-	constexpr double epsilon = 0.000001;
-
-	// If the evaluation value in deep search exceeds ELMO_LAMBDA_LIMIT, apply ELMO_LAMBDA2 instead of ELMO_LAMBDA.
-	const double lambda = (abs(deep) >= ELMO_LAMBDA_LIMIT) ? ELMO_LAMBDA2 : ELMO_LAMBDA;
-
-	const double m = (1.0 - lambda) * t + lambda * p;
-
-	cross_entropy_eval =
-		(-p * std::log(q + epsilon) - (1.0 - p) * std::log(1.0 - q + epsilon));
-	cross_entropy_win =
-		(-t * std::log(q + epsilon) - (1.0 - t) * std::log(1.0 - q + epsilon));
-	entropy_eval =
-		(-p * std::log(p + epsilon) - (1.0 - p) * std::log(1.0 - p + epsilon));
-	entropy_win =
-		(-t * std::log(t + epsilon) - (1.0 - t) * std::log(1.0 - t + epsilon));
-
-	cross_entropy =
-		(-m * std::log(q + epsilon) - (1.0 - m) * std::log(1.0 - q + epsilon));
-	entropy =
-		(-m * std::log(m + epsilon) - (1.0 - m) * std::log(1.0 - m + epsilon));
-}
-
-#endif
-
-
-// Other variations may be prepared as the objective function..
-
-
-double calc_grad(Value shallow, const PackedSfenValue& psv) {
-	return calc_grad((Value)psv.score, shallow, psv);
-}
-
-// Sfen reader
-struct SfenReader
-{
-	SfenReader(int thread_num) : prng((std::random_device())())
-	{
-		packed_sfens.resize(thread_num);
-		total_read = 0;
-		total_done = 0;
-		last_done = 0;
-		next_update_weights = 0;
-		save_count = 0;
-		end_of_files = false;
-		no_shuffle = false;
-		stop_flag = false;
-
-		hash.resize(READ_SFEN_HASH_SIZE);
-	}
-
-	~SfenReader()
-	{
-		if (file_worker_thread.joinable())
-			file_worker_thread.join();
-
-		for (auto p : packed_sfens)
-			delete p;
-		for (auto p : packed_sfens_pool)
-			delete p;
-	}
-
-	// number of phases used for calculation such as mse
-	// mini-batch size = 1M is standard, so 0.2% of that should be negligible in terms of time.
-	//Since search() is performed with depth = 1 in calculation of move match rate, simple comparison is not possible...
-	const uint64_t sfen_for_mse_size = 2000;
-
-	// Load the phase for calculation such as mse.
-	void read_for_mse()
-	{
-		auto th = Threads.main();
-		Position& pos = th->rootPos;
-		for (uint64_t i = 0; i < sfen_for_mse_size; ++i)
-		{
-			PackedSfenValue ps;
-			if (!read_to_thread_buffer(0, ps))
-			{
-				cout << "Error! read packed sfen , failed." << endl;
-				break;
-			}
-			sfen_for_mse.push_back(ps);
-
-			// Get the hash key.
-			StateInfo si;
-			pos.set_from_packed_sfen(ps.sfen,&si,th);
-			sfen_for_mse_hash.insert(pos.key());
-		}
-	}
-
-	void read_validation_set(const string file_name, int eval_limit)
-	{
-		ifstream fs(file_name, ios::binary);
-
-		while (fs)
-		{
-			PackedSfenValue p;
-			if (fs.read((char*)&p, sizeof(PackedSfenValue)))
-			{
-				if (eval_limit < abs(p.score))
-					continue;
-				if (!use_draw_in_validation && p.game_result == 0)
-					continue;
-				sfen_for_mse.push_back(p);
-			} else {
-				break;
-			}
-		}
-	}
-
-	// Number of phases buffered by each thread 0.1M phases. 4M phase at 40HT
-	const size_t THREAD_BUFFER_SIZE = 10 * 1000;
-
-	// Buffer for reading files (If this is made larger, the shuffle becomes larger and the phases may vary.
-	// If it is too large, the memory consumption will increase.
-	// SFEN_READ_SIZE is a multiple of THREAD_BUFFER_SIZE.
-	const size_t SFEN_READ_SIZE = LEARN_SFEN_READ_SIZE;
-
-	// [ASYNC] Thread returns one aspect. Otherwise returns false.
-	bool read_to_thread_buffer(size_t thread_id, PackedSfenValue& ps)
-	{
-		// If there are any positions left in the thread buffer, retrieve one and return it.
-		auto& thread_ps = packed_sfens[thread_id];
-
-		// Fill the read buffer if there is no remaining buffer, but if it doesn't even exist, finish.
-		if ((thread_ps == nullptr || thread_ps->size() == 0) // If the buffer is empty, fill it.
-			&& !read_to_thread_buffer_impl(thread_id))
-			return false;
-
-		// read_to_thread_buffer_impl() returned true,
-		// Since the filling of the thread buffer with the phase has been completed successfully
-		// thread_ps->rbegin() is alive.
-
-		ps = *(thread_ps->rbegin());
-		thread_ps->pop_back();
-
-		// If you've run out of buffers, call delete yourself to free this buffer.
-		if (thread_ps->size() == 0)
-		{
-
-			delete thread_ps;
-			thread_ps = nullptr;
-		}
-
-		return true;
-	}
-
-	// [ASYNC] Read some aspects into thread buffer.
-	bool read_to_thread_buffer_impl(size_t thread_id)
-	{
-		while (true)
-		{
-			{
-				std::unique_lock<std::mutex> lk(mutex);
-				// If you can fill from the file buffer, that's fine.
-				if (packed_sfens_pool.size() != 0)
-				{
-					// It seems that filling is possible, so fill and finish.
-
-					packed_sfens[thread_id] = packed_sfens_pool.front();
-					packed_sfens_pool.pop_front();
-
-					total_read += THREAD_BUFFER_SIZE;
-
-					return true;
-				}
-			}
-
-			// The file to read is already gone. No more use.
-			if (end_of_files)
-				return false;
-
-			// Waiting for file worker to fill packed_sfens_pool.
-			// The mutex isn't locked, so it should fill up soon.
-			sleep(1);
-		}
-
-	}
-
-	// Start a thread that loads the phase file in the background.
-	void start_file_read_worker()
-	{
-		file_worker_thread = std::thread([&] { this->file_read_worker(); });
-	}
-
-	// for file read-only threads
-	void file_read_worker()
-	{
-		auto open_next_file = [&]()
-		{
-			if (fs.is_open())
-				fs.close();
-
-			// no more
-			if (filenames.size() == 0)
-				return false;
-
-			// Get the next file name.
-			string filename = *filenames.rbegin();
-			filenames.pop_back();
-
-			fs.open(filename, ios::in | ios::binary);
-			cout << "open filename = " << filename << endl;
-			assert(fs);
-
-			return true;
-		};
-
-		while (true)
-		{
-			// Wait for the buffer to run out.
-			// This size() is read only, so you don't need to lock it.
-			while (!stop_flag && packed_sfens_pool.size() >= SFEN_READ_SIZE / THREAD_BUFFER_SIZE)
-				sleep(100);
-			if (stop_flag)
-				return;
-
-			PSVector sfens;
-			sfens.reserve(SFEN_READ_SIZE);
-
-			// Read from the file into the file buffer.
-			while (sfens.size() < SFEN_READ_SIZE)
-			{
-				PackedSfenValue p;
-				if (fs.read((char*)&p, sizeof(PackedSfenValue)))
-				{
-					sfens.push_back(p);
-				} else
-				{
-					// read failure
-					if (!open_next_file())
-					{
-						// There was no next file. Abon.
-						cout << "..end of files." << endl;
-						end_of_files = true;
-						return;
-					}
-				}
-			}
-
-			// Shuffle the read phase data.
-			// random shuffle by Fisher-Yates algorithm
-
-			if (!no_shuffle)
-			{
-				auto size = sfens.size();
-				for (size_t i = 0; i < size; ++i)
-					swap(sfens[i], sfens[(size_t)(prng.rand((uint64_t)size - i) + i)]);
-			}
-
-			// Divide this by THREAD_BUFFER_SIZE. There should be size pieces.
-			// SFEN_READ_SIZE shall be a multiple of THREAD_BUFFER_SIZE.
-			assert((SFEN_READ_SIZE % THREAD_BUFFER_SIZE)==0);
-
-			auto size = size_t(SFEN_READ_SIZE / THREAD_BUFFER_SIZE);
-			std::vector<PSVector*> ptrs;
-			ptrs.reserve(size);
-
-			for (size_t i = 0; i < size; ++i)
-			{
-				// Delete this pointer on the receiving side.
-				PSVector* ptr = new PSVector();
-				ptr->resize(THREAD_BUFFER_SIZE);
-				memcpy(&((*ptr)[0]), &sfens[i * THREAD_BUFFER_SIZE], sizeof(PackedSfenValue) * THREAD_BUFFER_SIZE);
-
-				ptrs.push_back(ptr);
-			}
-
-			// Since sfens is ready, look at the occasion and copy
-			{
-				std::unique_lock<std::mutex> lk(mutex);
-
-				// You can ignore this time because you just copy the pointer...
-				// The mutex lock is required because the contents of packed_sfens_pool are changed.
-
-				for (size_t i = 0; i < size; ++i)
-					packed_sfens_pool.push_back(ptrs[i]);
-			}
-		}
-	}
-
-	// sfen files
-	vector<string> filenames;
-
-	// number of phases read (file to memory buffer)
-	atomic<uint64_t> total_read;
-
-	// number of processed phases
-	atomic<uint64_t> total_done;
-
-	// number of cases processed so far
-	uint64_t last_done;
-
-	// If total_read exceeds this value, update_weights() and calculate mse.
-	uint64_t next_update_weights;
-
-	uint64_t save_count;
-
-	// Do not shuffle when reading the phase.
-	bool no_shuffle;
-
-	bool stop_flag;
-
-	// Determine if it is a phase for calculating rmse.
-	// (The computational aspects of rmse should not be used for learning.)
-	bool is_for_rmse(Key key) const
-	{
-			return sfen_for_mse_hash.count(key) != 0;
-	}
-
-	// hash to limit the reading of the same situation
-	// Is there too many 64 million phases? Or Not really..
-	// It must be 2**N because it will be used as the mask to calculate hash_index.
-	static const uint64_t READ_SFEN_HASH_SIZE = 64 * 1024 * 1024;
-	vector<Key> hash; // 64MB*8 = 512MB
-
-	// test phase for mse calculation
-	PSVector sfen_for_mse;
-
-protected:
-
-	// worker thread reading file in background
-	std::thread file_worker_thread;
-
-	// Random number to shuffle when reading the phase
-	PRNG prng;
-
-	// Did you read the files and reached the end?
-	atomic<bool> end_of_files;
-
-
-	// handle of sfen file
-	std::fstream fs;
-
-	// sfen for each thread
-	// (When the thread is used up, the thread should call delete to release it.)
-	std::vector<PSVector*> packed_sfens;
-
-	// Mutex when accessing packed_sfens_pool
-	std::mutex mutex;
-
-	// pool of sfen. The worker thread read from the file is added here.
-	// Each worker thread fills its own packed_sfens[thread_id] from here.
-	// * Lock and access the mutex.
-	std::list<PSVector*> packed_sfens_pool;
-
-	// Hold the hash key so that the mse calculation phase is not used for learning.
-	std::unordered_set<Key> sfen_for_mse_hash;
-};
-
-// Class to generate sfen with multiple threads
-struct LearnerThink: public MultiThink
-{
-	LearnerThink(SfenReader& sr_):sr(sr_),stop_flag(false), save_only_once(false)
-	{
-#if defined ( LOSS_FUNCTION_IS_ELMO_METHOD )
-		learn_sum_cross_entropy_eval = 0.0;
-		learn_sum_cross_entropy_win = 0.0;
-		learn_sum_cross_entropy = 0.0;
-		learn_sum_entropy_eval = 0.0;
-		learn_sum_entropy_win = 0.0;
-		learn_sum_entropy = 0.0;
-#endif
-#if defined(EVAL_NNUE)
-		newbob_scale = 1.0;
-		newbob_decay = 1.0;
-		newbob_num_trials = 2;
-		best_loss = std::numeric_limits<double>::infinity();
-		latest_loss_sum = 0.0;
-		latest_loss_count = 0;
-#endif
-	}
-
-	virtual void thread_worker(size_t thread_id);
-
-	// Start a thread that loads the phase file in the background.
-	void start_file_read_worker() { sr.start_file_read_worker(); }
-
-	// save merit function parameters to a file
-	bool save(bool is_final=false);
-
-	// sfen reader
-	SfenReader& sr;
-
-	// Learning iteration counter
-	uint64_t epoch = 0;
-
-	// Mini batch size size. Be sure to set it on the side that uses this class.
-	uint64_t mini_batch_size = 1000*1000;
-
-	bool stop_flag;
-
-	// Discount rate
-	double discount_rate;
-
-	// Option to exclude early stage from learning
-	int reduction_gameply;
-
-	// Option not to learn kk/kkp/kpp/kppp
-	std::array<bool,4> freeze;
-
-	// If the absolute value of the evaluation value of the deep search of the teacher phase exceeds this value, discard the teacher phase.
-	int eval_limit;
-
-	// Flag whether to dig a folder each time the evaluation function is saved.
-	// If true, do not dig the folder.
-	bool save_only_once;
-
-	// --- loss calculation
-
-#if defined (LOSS_FUNCTION_IS_ELMO_METHOD)
-	// For calculation of learning data loss
-	atomic<double> learn_sum_cross_entropy_eval;
-	atomic<double> learn_sum_cross_entropy_win;
-	atomic<double> learn_sum_cross_entropy;
-	atomic<double> learn_sum_entropy_eval;
-	atomic<double> learn_sum_entropy_win;
-	atomic<double> learn_sum_entropy;
-#endif
-
-#if defined(EVAL_NNUE)
-	shared_timed_mutex nn_mutex;
-	double newbob_scale;
-	double newbob_decay;
-	int newbob_num_trials;
-	double best_loss;
-	double latest_loss_sum;
-	uint64_t latest_loss_count;
-	std::string best_nn_directory;
-#endif
-
-	uint64_t eval_save_interval;
-	uint64_t loss_output_interval;
-	uint64_t mirror_percentage;
-
-	// Loss calculation.
-	// done: Number of phases targeted this time
-	void calc_loss(size_t thread_id , uint64_t done);
-
-	// Define the loss calculation in ↑ as a task and execute it
-	TaskDispatcher task_dispatcher;
-};
-
-void LearnerThink::calc_loss(size_t thread_id, uint64_t done)
-{
-	// There is no point in hitting the replacement table, so at this timing the generation of the replacement table is updated.
-	// It doesn't matter if you have disabled the substitution table.
-	TT.new_search();
-
-
-#if defined(EVAL_NNUE)
-	std::cout << "PROGRESS: " << now_string() << ", ";
-	std::cout << sr.total_done << " sfens";
-	std::cout << ", iteration " << epoch;
-	std::cout << ", eta = " << Eval::get_eta() << ", ";
-#endif
-
-#if !defined(LOSS_FUNCTION_IS_ELMO_METHOD)
-	double sum_error = 0;
-	double sum_error2 = 0;
-	double sum_error3 = 0;
-#endif
-
-#if defined (LOSS_FUNCTION_IS_ELMO_METHOD)
-	// For calculation of verification data loss
-	atomic<double> test_sum_cross_entropy_eval,test_sum_cross_entropy_win,test_sum_cross_entropy;
-	atomic<double> test_sum_entropy_eval,test_sum_entropy_win,test_sum_entropy;
-	test_sum_cross_entropy_eval = 0;
-	test_sum_cross_entropy_win = 0;
-	test_sum_cross_entropy = 0;
-	test_sum_entropy_eval = 0;
-	test_sum_entropy_win = 0;
-	test_sum_entropy = 0;
-
-	// norm for learning
-	atomic<double> sum_norm;
-	sum_norm = 0;
-#endif
-
-	// The number of times the pv first move of deep search matches the pv first move of search(1).
-	atomic<int> move_accord_count;
-	move_accord_count = 0;
-
-	// Display the value of eval() in the initial stage of Hirate and see the shaking.
-	auto th = Threads[thread_id];
-	auto& pos = th->rootPos;
-	StateInfo si;
-  pos.set(StartFEN, false, &si, th);
-  std::cout << "hirate eval = " << Eval::evaluate(pos);
-
-	//Eval::print_eval_stat(pos);
-
-	// It's better to parallelize here, but it's a bit troublesome because the search before slave has not finished.
-	// I created a mechanism to call task, so I will use it.
-
-	// The number of tasks to do.
-	atomic<int> task_count;
-	task_count = (int)sr.sfen_for_mse.size();
-	task_dispatcher.task_reserve(task_count);
-
-	// Create a task to search for the situation and give it to each thread.
-	for (const auto& ps : sr.sfen_for_mse)
-	{
-		// Assign work to each thread using TaskDispatcher.
-		// A task definition for that.
-		// It is not possible to capture pos used in ↑, so specify the variables you want to capture one by one.
-		auto task = [&ps,&test_sum_cross_entropy_eval,&test_sum_cross_entropy_win,&test_sum_cross_entropy,&test_sum_entropy_eval,&test_sum_entropy_win,&test_sum_entropy, &sum_norm,&task_count ,&move_accord_count](size_t thread_id)
-		{
-			// Does C++ properly capture a new ps instance for each loop?.
-			auto th = Threads[thread_id];
-			auto& pos = th->rootPos;
-			StateInfo si;
-			if (pos.set_from_packed_sfen(ps.sfen ,&si, th) != 0)
-			{
-				// Unfortunately, as an sfen for rmse calculation, an invalid sfen was drawn.
-				cout << "Error! : illegal packed sfen " << pos.fen() << endl;
-			}
-
-			// Evaluation value for shallow search
-			// The value of evaluate() may be used, but when calculating loss, learn_cross_entropy and
-			// Use qsearch() because it is difficult to compare the values.
-			// EvalHash has been disabled in advance. (If not, the same value will be returned every time)
-			auto r = qsearch(pos);
-
-			auto shallow_value = r.first;
-			{
-				const auto rootColor = pos.side_to_move();
-				const auto pv = r.second;
-				std::vector<StateInfo,AlignedAllocator<StateInfo>> states(pv.size());
-				for (size_t i = 0; i < pv.size(); ++i)
-				{
-					pos.do_move(pv[i], states[i]);
-					Eval::evaluate_with_no_return(pos);
-				}
-				shallow_value = (rootColor == pos.side_to_move()) ? Eval::evaluate(pos) : -Eval::evaluate(pos);
-				for (auto it = pv.rbegin(); it != pv.rend(); ++it)
-					pos.undo_move(*it);
-			}
-
-			// Evaluation value of deep search
-			auto deep_value = (Value)ps.score;
-
-			// Note) This code does not consider when eval_limit is specified in the learn command.
-
-			// --- error calculation
-
-#if !defined(LOSS_FUNCTION_IS_ELMO_METHOD)
-			auto grad = calc_grad(deep_value, shallow_value, ps);
-
-			// something like rmse
-			sum_error += grad*grad;
-			// Add the absolute value of the gradient
-			sum_error2 += abs(grad);
-			// Add the absolute value of the difference between the evaluation values
-			sum_error3 += abs(shallow_value - deep_value);
-#endif
-
-			// --- calculation of cross entropy
-
-			// For the time being, regarding the win rate and loss terms only in the elmo method
-			// Calculate and display the cross entropy.
-
-#if defined (LOSS_FUNCTION_IS_ELMO_METHOD)
-			double test_cross_entropy_eval, test_cross_entropy_win, test_cross_entropy;
-			double test_entropy_eval, test_entropy_win, test_entropy;
-			calc_cross_entropy(deep_value, shallow_value, ps, test_cross_entropy_eval, test_cross_entropy_win, test_cross_entropy, test_entropy_eval, test_entropy_win, test_entropy);
-			// The total cross entropy need not be abs() by definition.
-			test_sum_cross_entropy_eval += test_cross_entropy_eval;
-			test_sum_cross_entropy_win += test_cross_entropy_win;
-			test_sum_cross_entropy += test_cross_entropy;
-			test_sum_entropy_eval += test_entropy_eval;
-			test_sum_entropy_win += test_entropy_win;
-			test_sum_entropy += test_entropy;
-			sum_norm += (double)abs(shallow_value);
-#endif
-
-			// Determine if the teacher's move and the score of the shallow search match
-			{
-				auto r = search(pos,1);
-				if ((uint16_t)r.second[0] == ps.move)
-					move_accord_count.fetch_add(1, std::memory_order_relaxed);
-			}
-
-			// Reduced one task because I did it
-			--task_count;
-		};
-
-		// Throw the defined task to slave.
-		task_dispatcher.push_task_async(task);
-	}
-
-	// join yourself as a slave
-	task_dispatcher.on_idle(thread_id);
-
-	// wait for all tasks to complete
-	while (task_count)
-		sleep(1);
-
-#if !defined(LOSS_FUNCTION_IS_ELMO_METHOD)
-	// rmse = root mean square error: mean square error
-	// mae = mean absolute error: mean absolute error
-	auto dsig_rmse = std::sqrt(sum_error / (sfen_for_mse.size() + epsilon));
-	auto dsig_mae = sum_error2 / (sfen_for_mse.size() + epsilon);
-	auto eval_mae = sum_error3 / (sfen_for_mse.size() + epsilon);
-	cout << " , dsig rmse = " << dsig_rmse << " , dsig mae = " << dsig_mae
-		<< " , eval mae = " << eval_mae;
-#endif
-
-#if defined ( LOSS_FUNCTION_IS_ELMO_METHOD )
-#if defined(EVAL_NNUE)
-	latest_loss_sum += test_sum_cross_entropy - test_sum_entropy;
-	latest_loss_count += sr.sfen_for_mse.size();
-#endif
-
-// learn_cross_entropy may be called train cross entropy in the world of machine learning,
-// When omitting the acronym, it is nice to be able to distinguish it from test cross entropy(tce) by writing it as lce.
-
-	if (sr.sfen_for_mse.size() && done)
-	{
-		cout
-			<< " , test_cross_entropy_eval = "  << test_sum_cross_entropy_eval / sr.sfen_for_mse.size()
-			<< " , test_cross_entropy_win = "   << test_sum_cross_entropy_win / sr.sfen_for_mse.size()
-			<< " , test_entropy_eval = "        << test_sum_entropy_eval / sr.sfen_for_mse.size()
-			<< " , test_entropy_win = "         << test_sum_entropy_win / sr.sfen_for_mse.size()
-			<< " , test_cross_entropy = "       << test_sum_cross_entropy / sr.sfen_for_mse.size()
-			<< " , test_entropy = "             << test_sum_entropy / sr.sfen_for_mse.size()
-			<< " , norm = "						<< sum_norm
-			<< " , move accuracy = "			<< (move_accord_count * 100.0 / sr.sfen_for_mse.size()) << "%";
-		if (done != static_cast<uint64_t>(-1))
-		{
-			cout
-				<< " , learn_cross_entropy_eval = " << learn_sum_cross_entropy_eval / done
-				<< " , learn_cross_entropy_win = "  << learn_sum_cross_entropy_win / done
-				<< " , learn_entropy_eval = "       << learn_sum_entropy_eval / done
-				<< " , learn_entropy_win = "        << learn_sum_entropy_win / done
-				<< " , learn_cross_entropy = "      << learn_sum_cross_entropy / done
-				<< " , learn_entropy = "            << learn_sum_entropy / done;
-		}
-		cout << endl;
-	}
-	else {
-		cout << "Error! : sr.sfen_for_mse.size() = " << sr.sfen_for_mse.size() << " ,  done = " << done << endl;
-	}
-
-	// Clear 0 for next time.
-	learn_sum_cross_entropy_eval = 0.0;
-	learn_sum_cross_entropy_win = 0.0;
-	learn_sum_cross_entropy = 0.0;
-	learn_sum_entropy_eval = 0.0;
-	learn_sum_entropy_win = 0.0;
-	learn_sum_entropy = 0.0;
-#else
-	<< endl;
-#endif
-}
-
-
-void LearnerThink::thread_worker(size_t thread_id)
-{
-#if defined(_OPENMP)
-	omp_set_num_threads((int)Options["Threads"]);
-#endif
-
-	auto th = Threads[thread_id];
-	auto& pos = th->rootPos;
-
-	while (true)
-	{
-	// display mse (this is sometimes done only for thread 0)
-	// Immediately after being read from the file...
-
-#if defined(EVAL_NNUE)
-		// Lock the evaluation function so that it is not used during updating.
-		shared_lock<shared_timed_mutex> read_lock(nn_mutex, defer_lock);
-		if (sr.next_update_weights <= sr.total_done ||
-		    (thread_id != 0 && !read_lock.try_lock()))
-#else
-		if (sr.next_update_weights <= sr.total_done)
-#endif
-		{
-			if (thread_id != 0)
-			{
-				// Wait except thread_id == 0.
-
-				if (stop_flag)
-					break;
-
-				// I want to parallelize rmse calculation etc., so if task() is loaded, process it.
-				task_dispatcher.on_idle(thread_id);
-				continue;
-			}
-			else
-			{
-				// Only thread_id == 0 performs the following update process.
-
-				// The weight array is not updated for the first time.
-				if (sr.next_update_weights == 0)
-				{
-					sr.next_update_weights += mini_batch_size;
-					continue;
-				}
-
-#if !defined(EVAL_NNUE)
-				// Output the current time. Output every time.
-				std::cout << sr.total_done << " sfens , at " << now_string() << std::endl;
-
-				// Reflect the gradient in the weight array at this timing. The calculation of the gradient is just right for each 1M phase in terms of mini-batch.
-				Eval::update_weights(epoch , freeze);
-
-				// Display epoch and current eta for debugging.
-				std::cout << "epoch = " << epoch << " , eta = " << Eval::get_eta() << std::endl;
-#else
-				{
-					// update parameters
-
-					// Lock the evaluation function so that it is not used during updating.
-					lock_guard<shared_timed_mutex> write_lock(nn_mutex);
-					Eval::NNUE::UpdateParameters(epoch);
-				}
-#endif
-				++epoch;
-
-				// Save once every 1 billion phases.
-
-				// However, the elapsed time during update_weights() and calc_rmse() is ignored.
-				if (++sr.save_count * mini_batch_size >= eval_save_interval)
-				{
-					sr.save_count = 0;
-
-					// During this time, as the gradient calculation proceeds, the value becomes too large and I feel annoyed, so stop other threads.
-					const bool converged = save();
-					if (converged)
-					{
-						stop_flag = true;
-						sr.stop_flag = true;
-						break;
-					}
-				}
-
-				// Calculate rmse. This is done for samples of 10,000 phases.
-				// If you do with 40 cores, update_weights every 1 million phases
-				// I don't think it's so good to be tiring.
-				static uint64_t loss_output_count = 0;
-				if (++loss_output_count * mini_batch_size >= loss_output_interval)
-				{
-					loss_output_count = 0;
-
-					// Number of cases processed this time
-					uint64_t done = sr.total_done - sr.last_done;
-
-					// loss calculation
-					calc_loss(thread_id , done);
-
-#if defined(EVAL_NNUE)
-					Eval::NNUE::CheckHealth();
-#endif
-
-					// Make a note of how far you have totaled.
-					sr.last_done = sr.total_done;
-				}
-
-				// Next time, I want you to do this series of processing again when you process only mini_batch_size.
-				sr.next_update_weights += mini_batch_size;
-
-				// Since I was waiting for the update of this sr.next_update_weights except the main thread,
-				// Once this value is updated, it will start moving again.
-			}
-		}
-
-		PackedSfenValue ps;
-	RetryRead:;
-		if (!sr.read_to_thread_buffer(thread_id, ps))
-		{
-			// ran out of thread pool for my thread.
-			// Because there are almost no phases left,
-			// Terminate all other threads.
-
-			stop_flag = true;
-			break;
-		}
-
-		// The evaluation value exceeds the learning target value.
-		// Ignore this aspect information.
-		if (eval_limit <abs(ps.score))
-			goto RetryRead;
-
-
-		if (!use_draw_in_training && ps.game_result == 0)
-			goto RetryRead;
-
-
-		// Skip over the opening phase
-		if (ps.gamePly < prng.rand(reduction_gameply))
-			goto RetryRead;
-
-#if 0
-		auto sfen = pos.sfen_unpack(ps.data);
-		pos.set(sfen);
-#endif
-		// ↑ Since it is slow when passing through sfen, I made a dedicated function.
-		StateInfo si;
-		const bool mirror = prng.rand(100) < mirror_percentage;
-		if (pos.set_from_packed_sfen(ps.sfen,&si,th,mirror) != 0)
-		{
-			// I got a strange sfen. Should be debugged!
-			// Since it is an illegal sfen, it may not be displayed with pos.sfen(), but it is better than not.
-			cout << "Error! : illigal packed sfen = " << pos.fen() << endl;
-			goto RetryRead;
-		}
-#if !defined(EVAL_NNUE)
-		{
-			auto key = pos.key();
-			// Exclude the phase used for rmse calculation.
-			if (sr.is_for_rmse(key) && use_hash_in_training)
-				goto RetryRead;
-
-			// Exclude the most recently used aspect.
-			auto hash_index = size_t(key & (sr.READ_SFEN_HASH_SIZE - 1));
-			auto key2 = sr.hash[hash_index];
-			if (key == key2 && use_hash_in_training)
-				goto RetryRead;
-			sr.hash[hash_index] = key; // Replace with the current key.
-		}
-#endif
-
-		// There is a possibility that all the pieces are blocked and stuck.
-		// Also, the declaration win phase is excluded from learning because you cannot go to leaf with PV moves.
-		// (shouldn't write out such teacher aspect itself, but may have written it out with an old generation routine)
-	// Skip the position if there are no legal moves (=checkmated or stalemate).
-		if (MoveList<LEGAL>(pos).size() == 0)
-			goto RetryRead;
-
-		// I can read it, so try displaying it.
-		//		cout << pos << value << endl;
-
-		// Evaluation value of shallow search (qsearch)
-		auto r = qsearch(pos);
-		auto pv = r.second;
-
-		// Evaluation value of deep search
-		auto deep_value = (Value)ps.score;
-
-		// I feel that the mini batch has a better gradient.
-		// Go to the leaf node as it is, add only to the gradient array, and later try AdaGrad at the time of rmse aggregation.
-
-		auto rootColor = pos.side_to_move();
-
-		// If the initial PV is different, it is better not to use it for learning.
-		// If it is the result of searching a completely different place, it may become noise.
-		// It may be better not to study where the difference in evaluation values ​​is too large.
-
-#if 0
-		// If you do this, about 13% of the phases will be excluded from the learning target. Good and bad are subtle.
-		if (pv.size() >= 1 && (uint16_t)pv[0] != ps.move)
-		{
-			// dbg_hit_on(false);
-			continue;
-		}
-#endif
-
-#if 0
-		// It may be better not to study where the difference in evaluation values ​​is too large.
-		// → It's okay because it passes the win rate function... About 30% of the phases are out of the scope of learning...
-		if (abs((int16_t)r.first - ps.score) >= Eval::PawnValue * 4)
-		{
-//			dbg_hit_on(false);
-			continue;
-		}
-		//		dbg_hit_on(true);
-#endif
-
-		int ply = 0;
-
-		// A helper function that adds the gradient to the current phase.
-		auto pos_add_grad = [&]() {
-			// Use the value of evaluate in leaf as shallow_value.
-			// Using the return value of qsearch() as shallow_value,
-			// If PV is interrupted in the middle, the phase where evaluate() is called to calculate the gradient, and
-			// I don't think this is a very desirable property, as the aspect that gives that gradient will be different.
-			// I have turned off the substitution table, but since the pv array has not been updated due to one stumbling block etc...
-
-			Value shallow_value = (rootColor == pos.side_to_move()) ? Eval::evaluate(pos) : -Eval::evaluate(pos);
-
-#if defined (LOSS_FUNCTION_IS_ELMO_METHOD)
-			// Calculate loss for training data
-			double learn_cross_entropy_eval, learn_cross_entropy_win, learn_cross_entropy;
-			double learn_entropy_eval, learn_entropy_win, learn_entropy;
-			calc_cross_entropy(deep_value, shallow_value, ps, learn_cross_entropy_eval, learn_cross_entropy_win, learn_cross_entropy, learn_entropy_eval, learn_entropy_win, learn_entropy);
-			learn_sum_cross_entropy_eval += learn_cross_entropy_eval;
-			learn_sum_cross_entropy_win += learn_cross_entropy_win;
-			learn_sum_cross_entropy += learn_cross_entropy;
-			learn_sum_entropy_eval += learn_entropy_eval;
-			learn_sum_entropy_win += learn_entropy_win;
-			learn_sum_entropy += learn_entropy;
-#endif
-
-#if !defined(EVAL_NNUE)
-			// Slope
-			double dj_dw = calc_grad(deep_value, shallow_value, ps);
-
-			// Add jd_dw as the gradient (∂J/∂Wj) for the feature vector currently appearing in the leaf node.
-
-			// If it is not PV termination, apply a discount rate.
-			if (discount_rate != 0 && ply != (int)pv.size())
-				dj_dw *= discount_rate;
-
-			// Since we have reached leaf, add the gradient to the features that appear in this phase.
-			// Update based on gradient later.
-			Eval::add_grad(pos, rootColor, dj_dw, freeze);
-#else
-			const double example_weight =
-			    (discount_rate != 0 && ply != (int)pv.size()) ? discount_rate : 1.0;
-			Eval::NNUE::AddExample(pos, rootColor, ps, example_weight);
-#endif
-
-			// Since the processing is completed, the counter of the processed number is incremented
-			sr.total_done++;
-		};
-
-		StateInfo state[MAX_PLY]; // PV of qsearch cannot be so long.
-		bool illegal_move = false;
-		for (auto m : pv)
-		{
-			// I shouldn't be an illegal player.
-			// An illegal move sometimes comes here...
-			if (!pos.pseudo_legal(m) || !pos.legal(m))
-			{
-				//cout << pos << m << endl;
-				//assert(false);
-				illegal_move = true;
-				break;
-			}
-
-			// Processing when adding the gradient to the node on each PV.
-			//If discount_rate is 0, this process is not performed.
-			if (discount_rate != 0)
-				pos_add_grad();
-
-			pos.do_move(m, state[ply++]);
-
-			// Since the value of evaluate in leaf is used, the difference is updated.
-			Eval::evaluate_with_no_return(pos);
-		}
-
-		if (illegal_move) {
-			sync_cout << "An illical move was detected... Excluded the position from the learning data..." << sync_endl;
-			continue;
-		}
-
-		// Since we have reached the end phase of PV, add the slope here.
-		pos_add_grad();
-
-		// rewind the phase
-		for (auto it = pv.rbegin(); it != pv.rend(); ++it)
-			pos.undo_move(*it);
-
-#if 0
-		// When adding the gradient to the root phase
-		shallow_value = (rootColor == pos.side_to_move()) ? Eval::evaluate(pos) : -Eval::evaluate(pos);
-		dj_dw = calc_grad(deep_value, shallow_value, ps);
-		Eval::add_grad(pos, rootColor, dj_dw , without_kpp);
-#endif
-
-	}
-
-}
-
-// Write evaluation function file.
-bool LearnerThink::save(bool is_final)
-{
-	// Calculate and output check sum before saving. (To check if it matches the next time)
-	std::cout << "Check Sum = "<< std::hex << Eval::calc_check_sum() << std::dec << std::endl;
-
-	// Each time you save, change the extension part of the file name like "0","1","2",..
-	// (Because I want to compare the winning rate for each evaluation function parameter later)
-
-	if (save_only_once)
-	{
-		// When EVAL_SAVE_ONLY_ONCE is defined,
-		// Do not dig a subfolder because I want to save it only once.
-		Eval::save_eval("");
-	}
-	else if (is_final) {
-		Eval::save_eval("final");
-		return true;
-	}
-	else {
-		static int dir_number = 0;
-		const std::string dir_name = std::to_string(dir_number++);
-		Eval::save_eval(dir_name);
-#if defined(EVAL_NNUE)
-		if (newbob_decay != 1.0 && latest_loss_count > 0) {
-			static int trials = newbob_num_trials;
-			const double latest_loss = latest_loss_sum / latest_loss_count;
-			latest_loss_sum = 0.0;
-			latest_loss_count = 0;
-			cout << "loss: " << latest_loss;
-			if (latest_loss < best_loss) {
-				cout << " < best (" << best_loss << "), accepted" << endl;
-				best_loss = latest_loss;
-				best_nn_directory = Path::Combine((std::string)Options["EvalSaveDir"], dir_name);
-				trials = newbob_num_trials;
-			} else {
-				cout << " >= best (" << best_loss << "), rejected" << endl;
-				if (best_nn_directory.empty()) {
-					cout << "WARNING: no improvement from initial model" << endl;
-				} else {
-					cout << "restoring parameters from " << best_nn_directory << endl;
-					Eval::NNUE::RestoreParameters(best_nn_directory);
-				}
-				if (--trials > 0 && !is_final) {
-					cout << "reducing learning rate scale from " << newbob_scale
-					     << " to " << (newbob_scale * newbob_decay)
-					     << " (" << trials << " more trials)" << endl;
-					newbob_scale *= newbob_decay;
-					Eval::NNUE::SetGlobalLearningRateScale(newbob_scale);
-				}
-			}
-			if (trials == 0) {
-				cout << "converged" << endl;
-				return true;
-			}
-		}
-#endif
-	}
-	return false;
-}
-
-// Shuffle_files(), shuffle_files_quick() subcontracting, writing part.
-// output_file_name: Name of the file to write
-// prng: random number
-// afs: fstream of each teacher phase file
-// a_count: The number of teacher positions inherent in each file.
-void shuffle_write(const string& output_file_name , PRNG& prng , vector<fstream>& afs , vector<uint64_t>& a_count)
-{
-	uint64_t total_sfen_count = 0;
-	for (auto c : a_count)
-		total_sfen_count += c;
-
-	// number of exported phases
-	uint64_t write_sfen_count = 0;
-
-	// Output the progress on the screen for each phase.
-	const uint64_t buffer_size = 10000000;
-
-	auto print_status = [&]()
-	{
-		// Output progress every 10M phase or when all writing is completed
-		if (((write_sfen_count % buffer_size) == 0) ||
-			(write_sfen_count == total_sfen_count))
-			cout << write_sfen_count << " / " << total_sfen_count << endl;
-	};
-
-
-	cout << endl <<  "write : " << output_file_name << endl;
-
-	fstream fs(output_file_name, ios::out | ios::binary);
-
-	// total teacher positions
-	uint64_t sum = 0;
-	for (auto c : a_count)
-		sum += c;
-
-	while (sum != 0)
-	{
-		auto r = prng.rand(sum);
-
-		// Aspects stored in fs[0] file ... Aspects stored in fs[1] file ...
-		//Think of it as a series like, and determine in which file r is pointing.
-		// The contents of the file are shuffled, so you can take the next element from that file.
-		// Each file has a_count[x] phases, so this process can be written as follows.
-
-		uint64_t n = 0;
-		while (a_count[n] <= r)
-			r -= a_count[n++];
-
-		// This confirms n. Before you forget it, reduce the remaining number.
-
-		--a_count[n];
-		--sum;
-
-		PackedSfenValue psv;
-		// It's better to read and write all at once until the performance is not so good...
-		if (afs[n].read((char*)&psv, sizeof(PackedSfenValue)))
-		{
-			fs.write((char*)&psv, sizeof(PackedSfenValue));
-			++write_sfen_count;
-			print_status();
-		}
-	}
-	print_status();
-	fs.close();
-	cout << "done!" << endl;
-}
-
-// Subcontracting the teacher shuffle "learn shuffle" command.
-// output_file_name: name of the output file where the shuffled teacher positions will be written
-void shuffle_files(const vector<string>& filenames , const string& output_file_name , uint64_t buffer_size )
-{
-	// The destination folder is
-	// tmp/ for temporary writing
-
-	// Temporary file is written to tmp/ folder for each buffer_size phase.
-	// For example, if buffer_size = 20M, you need a buffer of 20M*40bytes = 800MB.
-	// In a PC with a small memory, it would be better to reduce this.
-	// However, if the number of files increases too much, it will not be possible to open at the same time due to OS restrictions.
-	// There should have been a limit of 512 per process on Windows, so you can open here as 500,
-	// The current setting is 500 files x 20M = 10G = 10 billion phases.
-
-	PSVector buf;
-	buf.resize(buffer_size);
-	// ↑ buffer, a marker that indicates how much you have used
-	uint64_t buf_write_marker = 0;
-
-	// File name to write (incremental counter because it is a serial number)
-	uint64_t write_file_count = 0;
-
-	// random number to shuffle
-	PRNG prng((std::random_device())());
-
-	// generate the name of the temporary file
-	auto make_filename = [](uint64_t i)
-	{
-		return "tmp/" + to_string(i) + ".bin";
-	};
-
-	// Exported files in tmp/ folder, number of teacher positions stored in each
-	vector<uint64_t> a_count;
-
-	auto write_buffer = [&](uint64_t size)
-	{
-		// shuffle from buf[0] to buf[size-1]
-		for (uint64_t i = 0; i < size; ++i)
-			swap(buf[i], buf[(uint64_t)(prng.rand(size - i) + i)]);
-
-		// write to a file
-		fstream fs;
-		fs.open(make_filename(write_file_count++), ios::out | ios::binary);
-		fs.write((char*)&buf[0], size * sizeof(PackedSfenValue));
-		fs.close();
-		a_count.push_back(size);
-
-		buf_write_marker = 0;
-		cout << ".";
-	};
-
-	Dependency::mkdir("tmp");
-
-	// Shuffle and export as a 10M phase shredded file.
-	for (auto filename : filenames)
-	{
-		fstream fs(filename, ios::in | ios::binary);
-		cout << endl << "open file = " << filename;
-		while (fs.read((char*)&buf[buf_write_marker], sizeof(PackedSfenValue)))
-			if (++buf_write_marker == buffer_size)
-				write_buffer(buffer_size);
-
-		// Read in units of sizeof(PackedSfenValue),
-		// Ignore the last remaining fraction. (Fails in fs.read, so exit while)
-		// (The remaining fraction seems to be half-finished data that was created because it was stopped halfway during teacher generation.)
-
-	}
-
-	if (buf_write_marker != 0)
-		write_buffer(buf_write_marker);
-
-	// Only shuffled files have been written write_file_count.
-	// As a second pass, if you open all of them at the same time, select one at random and load one phase at a time
-	// Now you have shuffled.
-
-	// Original file for shirt full + tmp file + file to write requires 3 times the storage capacity of the original file.
-	// 1 billion SSD is not enough for shuffling because it is 400GB for 10 billion phases.
-	// If you want to delete (or delete by hand) the original file at this point after writing to tmp,
-	// The storage capacity is about twice that of the original file.
-	// So, maybe we should have an option to delete the original file.
-
-	// Files are opened at the same time. It is highly possible that this will exceed FOPEN_MAX.
-	// In that case, rather than adjusting buffer_size to reduce the number of files.
-
-	vector<fstream> afs;
-	for (uint64_t i = 0; i < write_file_count; ++i)
-		afs.emplace_back(fstream(make_filename(i),ios::in | ios::binary));
-
-	// Throw to the subcontract function and end.
-	shuffle_write(output_file_name, prng, afs, a_count);
-}
-
-// Subcontracting the teacher shuffle "learn shuffleq" command.
-// This is written in 1 pass.
-// output_file_name: name of the output file where the shuffled teacher positions will be written
-void shuffle_files_quick(const vector<string>& filenames, const string& output_file_name)
-{
-	// number of phases read
-	uint64_t read_sfen_count = 0;
-
-	// random number to shuffle
-	PRNG prng((std::random_device())());
-
-	// number of files
-	size_t file_count = filenames.size();
-
-	// Number of teacher positions stored in each file in filenames
-	vector<uint64_t> a_count(file_count);
-
-	// Count the number of teacher aspects in each file.
-	vector<fstream> afs(file_count);
-
-	for (size_t i = 0; i <file_count ;++i)
-	{
-		auto filename = filenames[i];
-		auto& fs = afs[i];
-
-		fs.open(filename, ios::in | ios::binary);
-		fs.seekg(0, fstream::end);
-		uint64_t eofPos = (uint64_t)fs.tellg();
-		fs.clear(); // Otherwise, the next seek may fail.
-		fs.seekg(0, fstream::beg);
-		uint64_t begPos = (uint64_t)fs.tellg();
-		uint64_t file_size = eofPos - begPos;
-		uint64_t sfen_count = file_size / sizeof(PackedSfenValue);
-		a_count[i] = sfen_count;
-
-		// Output the number of sfen stored in each file.
-		cout << filename << " = " << sfen_count << " sfens." << endl;
-	}
-
-	// Since we know the file size of each file,
-	// open them all at once (already open),
-	// Select one at a time and load one phase at a time
-	// Now you have shuffled.
-
-	// Throw to the subcontract function and end.
-	shuffle_write(output_file_name, prng, afs, a_count);
-}
-
-// Subcontracting the teacher shuffle "learn shufflem" command.
-// Read the whole memory and write it out with the specified file name.
-void shuffle_files_on_memory(const vector<string>& filenames,const string output_file_name)
-{
-	PSVector buf;
-
-	for (auto filename : filenames)
-	{
-		std::cout << "read : " << filename << std::endl;
-		read_file_to_memory(filename, [&buf](uint64_t size) {
-			assert((size % sizeof(PackedSfenValue)) == 0);
-			// Expand the buffer and read after the last end.
-			uint64_t last = buf.size();
-			buf.resize(last + size / sizeof(PackedSfenValue));
-			return (void*)&buf[last];
-		});
-	}
-
-	// shuffle from buf[0] to buf[size-1]
-	PRNG prng((std::random_device())());
-	uint64_t size = (uint64_t)buf.size();
-	std::cout << "shuffle buf.size() = " << size << std::endl;
-	for (uint64_t i = 0; i < size; ++i)
-		swap(buf[i], buf[(uint64_t)(prng.rand(size - i) + i)]);
-
-	std::cout << "write : " << output_file_name << endl;
-
-	// If the file to be written exceeds 2GB, it cannot be written in one shot with fstream::write, so use wrapper.
-	write_memory_to_file(output_file_name, (void*)&buf[0], (uint64_t)sizeof(PackedSfenValue)*(uint64_t)buf.size());
-
-	std::cout << "..shuffle_on_memory done." << std::endl;
-}
-
-void convert_bin(const vector<string>& filenames, const string& output_file_name, const int ply_minimum, const int ply_maximum, const int interpolate_eval)
-{
-	std::fstream fs;
-	uint64_t data_size=0;
-	uint64_t filtered_size = 0;
-	auto th = Threads.main();
-	auto &tpos = th->rootPos;
-	// convert plain rag to packed sfenvalue for Yaneura king
-	fs.open(output_file_name, ios::app | ios::binary);
-	StateListPtr states;
-	for (auto filename : filenames) {
-		std::cout << "convert " << filename << " ... ";
-		std::string line;
-		ifstream ifs;
-		ifs.open(filename);
-		PackedSfenValue p;
-		data_size = 0;
-		filtered_size = 0;
-		p.gamePly = 1; // Not included in apery format. Should be initialized
-		bool ignore_flag = false;
-		while (std::getline(ifs, line)) {
-			std::stringstream ss(line);
-			std::string token;
-			std::string value;
-			ss >> token;
-			if (token == "fen") {
-			  states = StateListPtr(new std::deque<StateInfo>(1)); // Drop old and create a new one
-			  tpos.set(line.substr(4), false, &states->back(), Threads.main());
-			  tpos.sfen_pack(p.sfen);
-			}
-			else if (token == "move") {
-				ss >> value;
-				p.move = UCI::to_move(tpos, value);
-			}
-			else if (token == "score") {
-				ss >> p.score;
-			}
-			else if (token == "ply") {
-				int temp;
-				ss >> temp;
-				if(temp < ply_minimum || temp > ply_maximum){
-				  ignore_flag = true;
-				}
-				p.gamePly = uint16_t(temp); // No cast here?
-				if (interpolate_eval != 0){
-				  p.score = min(3000, interpolate_eval * temp);
-				}
-			}
-			else if (token == "result") {
-				int temp;
-				ss >> temp;
-				p.game_result = int8_t(temp); // Do you need a cast here?
-				if (interpolate_eval){
-				  p.score = p.score * p.game_result;
-				}
-			}
-			else if (token == "e") {
-			  if(!ignore_flag){
-				fs.write((char*)&p, sizeof(PackedSfenValue));
-				data_size+=1;
-				// debug
-				// std::cout<<tpos<<std::endl;
-				// std::cout<<p.score<<","<<int(p.gamePly)<<","<<int(p.game_result)<<std::endl;
-			  }else{
-			    ignore_flag = false;
-			    filtered_size += 1;
-			  }
-				
-			}
-		}
-		std::cout << "done" << data_size <<" parsed " << filtered_size<<" is filtered"<< std::endl;
-		ifs.close();
-	}
-	std::cout << "all done" << std::endl;
-	fs.close();
-}
-
-static inline void ltrim(std::string &s) {
-	s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](int ch) {
-		return !std::isspace(ch);
-	}));
-}
-
-static inline void rtrim(std::string &s) {
-	s.erase(std::find_if(s.rbegin(), s.rend(), [](int ch) {
-		return !std::isspace(ch);
-	}).base(), s.end());
-}
-
-static inline void trim(std::string &s) {
-	ltrim(s);
-	rtrim(s);
-}
-
-int parse_game_result_from_pgn_extract(std::string result) {
-	// White Win
-	if (result == "\"1-0\"") {
-		return 1;
-	}
-	// Black Win
-	else if (result == "\"0-1\"") {
-		return -1;
-	}
-	// Draw
-	else {
-		return 0;
-	}
-}
-
-// 0.25 -->  0.25 * PawnValueEg
-// #-4  --> -mate_in(4)
-// #3   -->  mate_in(3)
-// -M4  --> -mate_in(4)
-// +M3  -->  mate_in(3)
-Value parse_score_from_pgn_extract(std::string eval, bool& success) {
-	success = true;
-
-	if (eval.substr(0, 1) == "#") {
-		if (eval.substr(1, 1) == "-") {
-			return -mate_in(stoi(eval.substr(2, eval.length() - 2)));
-		}
-		else {
-			return mate_in(stoi(eval.substr(1, eval.length() - 1)));
-		}
-	}
-	else if (eval.substr(0, 2) == "-M") {
-		//std::cout << "eval=" << eval << std::endl;
-		return -mate_in(stoi(eval.substr(2, eval.length() - 2)));
-	}
-	else if (eval.substr(0, 2) == "+M") {
-		//std::cout << "eval=" << eval << std::endl;
-		return mate_in(stoi(eval.substr(2, eval.length() - 2)));
-	}
-	else {
-		char *endptr;
-		double value = strtod(eval.c_str(), &endptr);
-
-		if (*endptr != '\0') {
-			success = false;
-			return VALUE_ZERO;
-		}
-		else {
-			return Value(value * static_cast<double>(PawnValueEg));
-		}
-	}
-}
-
-void convert_bin_from_pgn_extract(const vector<string>& filenames, const string& output_file_name, const bool pgn_eval_side_to_move)
-{
-	std::cout << "pgn_eval_side_to_move=" << pgn_eval_side_to_move << std::endl;
-
-	auto th = Threads.main();
-	auto &pos = th->rootPos;
-
-	std::fstream ofs;
-	ofs.open(output_file_name, ios::out | ios::binary);
-
-	int game_count = 0;
-	int fen_count = 0;
-
-	for (auto filename : filenames) {
-		std::cout << now_string() << " convert " << filename << std::endl;
-		ifstream ifs;
-		ifs.open(filename);
-
-		int game_result = 0;
-
-		std::string line;
-		while (std::getline(ifs, line)) {
-
-			if (line.empty()) {
-				continue;
-			}
-
-			else if (line.substr(0, 1) == "[") {
-				std::regex pattern_result(R"(\[Result (.+?)\])");
-				std::smatch match;
-
-				// example: [Result "1-0"]
-				if (std::regex_search(line, match, pattern_result)) {
-					game_result = parse_game_result_from_pgn_extract(match.str(1));
-					//std::cout << "game_result=" << game_result << std::endl;
-
-					game_count++;
-					if (game_count % 10000 == 0) {
-						std::cout << now_string() << " game_count=" << game_count << ", fen_count=" << fen_count << std::endl;
-					}
-				}
-
-				continue;
-			}
-
-			else {
-				int gamePly = 0;
-				bool first = true;
-
-				PackedSfenValue psv;
-				memset((char*)&psv, 0, sizeof(PackedSfenValue));
-
-				auto itr = line.cbegin();
-
-				while (true) {
-					gamePly++;
-
-					std::regex pattern_bracket(R"(\{(.+?)\})");
-
-					std::regex pattern_eval1(R"(\[\%eval (.+?)\])");
-					std::regex pattern_eval2(R"((.+?)\/)");
-
-					// very slow
-					//std::regex pattern_eval1(R"(\[\%eval (#?[+-]?(?:\d+\.?\d*|\.\d+))\])");
-					//std::regex pattern_eval2(R"((#?[+-]?(?:\d+\.?\d*|\.\d+)\/))");
-
-					std::regex pattern_move(R"((.+?)\{)");
-					std::smatch match;
-
-					// example: { [%eval 0.25] [%clk 0:10:00] }
-					// example: { +0.71/22 1.2s }
-					// example: { book }
-					if (!std::regex_search(itr, line.cend(), match, pattern_bracket)) {
-						break;
-					}
-
-					itr += match.position(0) + match.length(0);
-					std::string str_eval_clk = match.str(1);
-					trim(str_eval_clk);
-					//std::cout << "str_eval_clk="<< str_eval_clk << std::endl;
-
-					if (str_eval_clk == "book") {
-						//std::cout << "book" << std::endl;
-
-						// example: { rnbqkbnr/pppppppp/8/8/8/4P3/PPPP1PPP/RNBQKBNR b KQkq - 0 1 }
-						if (!std::regex_search(itr, line.cend(), match, pattern_bracket)) {
-							break;
-						}
-						itr += match.position(0) + match.length(0);
-						continue;
-					}
-
-					// example: [%eval 0.25]
-					// example: [%eval #-4]
-					// example: [%eval #3]
-					// example: +0.71/
-					if (std::regex_search(str_eval_clk, match, pattern_eval1) ||
-						std::regex_search(str_eval_clk, match, pattern_eval2)) {
-						std::string str_eval = match.str(1);
-						trim(str_eval);
-						//std::cout << "str_eval=" << str_eval << std::endl;
-
-						bool success = false;
-						psv.score = Math::clamp(parse_score_from_pgn_extract(str_eval, success), -VALUE_MATE , VALUE_MATE);
-						//std::cout << "success=" << success << ", psv.score=" << psv.score << std::endl;
-
-						if (!success) {
-							//std::cout << "str_eval=" << str_eval << std::endl;
-							//std::cout << "success=" << success << ", psv.score=" << psv.score << std::endl;
-							break;
-						}
-					}
-					else {
-						break;
-					}
-
-					if (first) {
-						first = false;
-					}
-					else {
-						psv.gamePly = gamePly;
-						psv.game_result = game_result;
-
-						if (pos.side_to_move() == BLACK) {
-							if (!pgn_eval_side_to_move) {
-								psv.score *= -1;
-							}
-							psv.game_result *= -1;
-						}
-
-#if 0
-						std::cout << "write: "
-								  << "score=" << psv.score
-								  << ", move=" << psv.move
-								  << ", gamePly=" << psv.gamePly
-								  << ", game_result=" << (int)psv.game_result
-								  << std::endl;
-#endif
-
-						ofs.write((char*)&psv, sizeof(PackedSfenValue));
-						memset((char*)&psv, 0, sizeof(PackedSfenValue));
-
-						fen_count++;
-					}
-
-					// example: { rnbqkbnr/pppppppp/8/8/3P4/8/PPP1PPPP/RNBQKBNR b KQkq d3 0 1 }
-					if (!std::regex_search(itr, line.cend(), match, pattern_bracket)) {
-						break;
-					}
-
-					itr += match.position(0) + match.length(0);
-					std::string str_fen = match.str(1);
-					trim(str_fen);
-					//std::cout << "str_fen=" << str_fen << std::endl;
-
-					StateInfo si;
-					pos.set(str_fen, false, &si, th);
-					pos.sfen_pack(psv.sfen);
-
-					// example: d7d5 {
-					if (!std::regex_search(itr, line.cend(), match, pattern_move)) {
-						break;
-					}
-
-					itr += match.position(0) + match.length(0) - 1;
-					std::string str_move = match.str(1);
-					trim(str_move);
-					//std::cout << "str_move=" << str_move << std::endl;
-					psv.move = UCI::to_move(pos, str_move);
-				}
-
-				game_result = 0;
-			}
-		}
-	}
-
-	std::cout << now_string() << " game_count=" << game_count << ", fen_count=" << fen_count << std::endl;
-	std::cout << now_string() << " all done" << std::endl;
-	ofs.close();
-}
-
-void convert_plain(const vector<string>& filenames, const string& output_file_name)
-{
-	Position tpos;
-	std::ofstream ofs;
-	ofs.open(output_file_name, ios::app);
-	auto th = Threads.main();
-	for (auto filename : filenames) {
-		std::cout << "convert " << filename << " ... ";
-
-		// Just convert packedsfenvalue to text
-		std::fstream fs;
-		fs.open(filename, ios::in | ios::binary);
-		PackedSfenValue p;
-		while (true)
-		{
-			if (fs.read((char*)&p, sizeof(PackedSfenValue))) {
-				StateInfo si;
-				tpos.set_from_packed_sfen(p.sfen, &si, th, false);
-
-				// write as plain text
-				ofs << "fen " << tpos.fen() << std::endl;
-				ofs << "move " << UCI::move(Move(p.move), false) << std::endl;
-				ofs << "score " << p.score << std::endl;
-				ofs << "ply " << int(p.gamePly) << std::endl;
-				ofs << "result " << int(p.game_result) << std::endl;
-				ofs << "e" << std::endl;
-			}
-			else {
-				break;
-			}
-		}
-		fs.close();
-		std::cout << "done" << std::endl;
-	}
-	ofs.close();
-	std::cout << "all done" << std::endl;
-}
-
-// Learning from the generated game record
-void learn(Position&, istringstream& is)
-{
-	auto thread_num = (int)Options["Threads"];
-	SfenReader sr(thread_num);
-
-	LearnerThink learn_think(sr);
-	vector<string> filenames;
-
-	// mini_batch_size 1M aspect by default. This can be increased.
-	auto mini_batch_size = LEARN_MINI_BATCH_SIZE;
-
-	// Number of loops (read the game record file this number of times)
-	int loop = 1;
-
-	// Game file storage folder (get game file with relative path from here)
-	string base_dir;
-
-	string target_dir;
-
-	// If 0, it will be the default value.
-	double eta1 = 0.0;
-	double eta2 = 0.0;
-	double eta3 = 0.0;
-	uint64_t eta1_epoch = 0; // eta2 is not applied by default
-	uint64_t eta2_epoch = 0; // eta3 is not applied by default
-
-#if defined(USE_GLOBAL_OPTIONS)
-	// Save it for later restore.
-	auto oldGlobalOptions = GlobalOptions;
-	// If you hit the eval hash, you can not calculate rmse etc. so turn it off.
-	GlobalOptions.use_eval_hash = false;
-	// If you hit the replacement table, pruning may occur at the previous evaluation value, so turn it off.
-	GlobalOptions.use_hash_probe = false;
-#endif
-
-	// --- Function that only shuffles the teacher aspect
-
-	// normal shuffle
-	bool shuffle_normal = false;
-	uint64_t buffer_size = 20000000;
-	// fast shuffling assuming each file is shuffled
-	bool shuffle_quick = false;
-	// A function to read the entire file in memory and shuffle it. (Requires file size memory)
-	bool shuffle_on_memory = false;
-	// Conversion of packed sfen. In plain, it consists of sfen(string), evaluation value (integer), move (eg 7g7f, string), result (loss-1, win 1, draw 0)
-	bool use_convert_plain = false;
-	// convert plain format teacher to Yaneura King's bin
-	bool use_convert_bin = false;
-	int ply_minimum = 0;
-	int ply_maximum = 114514;
-	bool interpolate_eval = 0;
-	// convert teacher in pgn-extract format to Yaneura King's bin
-	bool use_convert_bin_from_pgn_extract = false;
-	bool pgn_eval_side_to_move = false;
-	// File name to write in those cases (default is "shuffled_sfen.bin")
-	string output_file_name = "shuffled_sfen.bin";
-
-	// If the absolute value of the evaluation value in the deep search of the teacher phase exceeds this value, that phase is discarded.
-	int eval_limit = 32000;
-
-	// Flag to save the evaluation function file only once near the end.
-	bool save_only_once = false;
-
-	// Shuffle about what you are pre-reading on the teacher aspect. (Shuffle of about 10 million phases)
-	// Turn on if you want to pass a pre-shuffled file.
-	bool no_shuffle = false;
-
-#if defined (LOSS_FUNCTION_IS_ELMO_METHOD)
-	// elmo lambda
-	ELMO_LAMBDA = 0.33;
-	ELMO_LAMBDA2 = 0.33;
-	ELMO_LAMBDA_LIMIT = 32000;
-#endif
-
-	// Discount rate. If this is set to a value other than 0, the slope will be added even at other than the PV termination. (At that time, apply this discount rate)
-	double discount_rate = 0;
-
-	// if (gamePly <rand(reduction_gameply)) continue;
-	// An option to exclude the early stage from the learning target moderately like
-	// If set to 1, rand(1)==0, so nothing is excluded.
-	int reduction_gameply = 1;
-
-	// Optional item that does not let you learn KK/KKP/KPP/KPPP
-	array<bool,4> freeze = {};
-
-#if defined(EVAL_NNUE)
-	uint64_t nn_batch_size = 1000;
-	double newbob_decay = 1.0;
-	int newbob_num_trials = 2;
-	string nn_options;
-#endif
-
-	uint64_t eval_save_interval = LEARN_EVAL_SAVE_INTERVAL;
-	uint64_t loss_output_interval = 0;
-	uint64_t mirror_percentage = 0;
-
-	string validation_set_file_name;
-
-	// Assume the filenames are staggered.
-	while (true)
-	{
-		string option;
-		is >> option;
-
-		if (option == "")
-			break;
-
-		// specify the number of phases of mini-batch
-		if (option == "bat")
-		{
-			is >> mini_batch_size;
-			mini_batch_size *= 10000; // Unit is ten thousand
-		}
-
-		// Specify the folder in which the game record is stored and make it the rooting target.
-		else if (option == "targetdir") is >> target_dir;
-
-		// Specify the number of loops
-		else if (option == "loop")      is >> loop;
-
-		// Game file storage folder (get game file with relative path from here)
-		else if (option == "basedir")   is >> base_dir;
-
-		// Mini batch size
-		else if (option == "batchsize") is >> mini_batch_size;
-
-		// learning rate
-		else if (option == "eta")        is >> eta1;
-		else if (option == "eta1")       is >> eta1; // alias
-		else if (option == "eta2")       is >> eta2;
-		else if (option == "eta3")       is >> eta3;
-		else if (option == "eta1_epoch") is >> eta1_epoch;
-		else if (option == "eta2_epoch") is >> eta2_epoch;
-		else if (option == "use_draw_in_training") is >> use_draw_in_training;
-		else if (option == "use_draw_in_validation") is >> use_draw_in_validation;
-		else if (option == "use_hash_in_training") is >> use_hash_in_training;
-		// Discount rate
-		else if (option == "discount_rate") is >> discount_rate;
-
-		// No learning of KK/KKP/KPP/KPPP.
-		else if (option == "freeze_kk")    is >> freeze[0];
-		else if (option == "freeze_kkp")   is >> freeze[1];
-		else if (option == "freeze_kpp")   is >> freeze[2];
-
-#if defined(EVAL_KPPT) || defined(EVAL_KPP_KKPT) || defined(EVAL_KPP_KKPT_FV_VAR) || defined(EVAL_NABLA)
-
-#elif defined(EVAL_KPPPT) || defined(EVAL_KPPP_KKPT) || defined(EVAL_HELICES)
-		else if (option == "freeze_kppp")  is >> freeze[3];
-#elif defined(EVAL_KKPP_KKPT) || defined(EVAL_KKPPT)
-		else if (option == "freeze_kkpp")  is >> freeze[3];
-#endif
-
-#if defined (LOSS_FUNCTION_IS_ELMO_METHOD)
-		// LAMBDA
-		else if (option == "lambda")       is >> ELMO_LAMBDA;
-		else if (option == "lambda2")      is >> ELMO_LAMBDA2;
-		else if (option == "lambda_limit") is >> ELMO_LAMBDA_LIMIT;
-
-#endif
-		else if (option == "reduction_gameply") is >> reduction_gameply;
-
-		// shuffle related
-		else if (option == "shuffle")	shuffle_normal = true;
-		else if (option == "buffer_size") is >> buffer_size;
-		else if (option == "shuffleq")	shuffle_quick = true;
-		else if (option == "shufflem")	shuffle_on_memory = true;
-		else if (option == "output_file_name") is >> output_file_name;
-
-		else if (option == "eval_limit") is >> eval_limit;
-		else if (option == "save_only_once") save_only_once = true;
-		else if (option == "no_shuffle") no_shuffle = true;
-
-#if defined(EVAL_NNUE)
-		else if (option == "nn_batch_size") is >> nn_batch_size;
-		else if (option == "newbob_decay") is >> newbob_decay;
-		else if (option == "newbob_num_trials") is >> newbob_num_trials;
-		else if (option == "nn_options") is >> nn_options;
-#endif
-		else if (option == "eval_save_interval") is >> eval_save_interval;
-		else if (option == "loss_output_interval") is >> loss_output_interval;
-		else if (option == "mirror_percentage") is >> mirror_percentage;
-		else if (option == "validation_set_file_name") is >> validation_set_file_name;
-
-		// Rabbit convert related
-		else if (option == "convert_plain") use_convert_plain = true;
-		else if (option == "convert_bin") use_convert_bin = true;
-		else if (option == "interpolate_eval") is >> interpolate_eval;
-		else if (option == "convert_bin_from_pgn-extract") use_convert_bin_from_pgn_extract = true;
-		else if (option == "pgn_eval_side_to_move") is >> pgn_eval_side_to_move;
-
-		// Otherwise, it's a filename.
-		else
-			filenames.push_back(option);
-	}
-	if (loss_output_interval == 0)
-		loss_output_interval = LEARN_RMSE_OUTPUT_INTERVAL * mini_batch_size;
-
-	cout << "learn command , ";
-
-	// Issue a warning if OpenMP is disabled.
-#if !defined(_OPENMP)
-	cout << "Warning! OpenMP disabled." << endl;
-#endif
-
-	// Display learning game file
-	if (target_dir != "")
-	{
-		string kif_base_dir = Path::Combine(base_dir, target_dir);
-
-		// Remove this folder. Keep it relative to base_dir.
-#if defined(_MSC_VER)
-		// If you use std::tr2, warning C4996 will appear, so suppress it.
-		// * std::tr2 issued a deprecation warning by default under std:c++14, and was deleted by default in /std:c++17.
-		#pragma warning(push)
-		#pragma warning(disable:4996)
-
-		namespace sys = std::filesystem;
-		sys::path p(kif_base_dir); // Origin of enumeration
-		std::for_each(sys::directory_iterator(p), sys::directory_iterator(),
-			[&](const sys::path& p) {
-			if (sys::is_regular_file(p))
-				filenames.push_back(Path::Combine(target_dir, p.filename().generic_string()));
-		});
-		#pragma warning(pop)
-
-#elif defined(__GNUC__)
-
-		auto ends_with = [](std::string const & value, std::string const & ending)
-		{
-			if (ending.size() > value.size()) return false;
-			return std::equal(ending.rbegin(), ending.rend(), value.rbegin());
-		};
-
-		// It can't be helped, so read it using dirent.h.
-		DIR *dp; // pointer to directory
-		dirent* entry; // entry point returned by readdir()
-
-		dp = opendir(kif_base_dir.c_str());
-		if (dp != NULL)
-		{
-			do {
-				entry = readdir(dp);
-				// Only list files ending with ".bin"
-				// →I hate this restriction when generating files with serial numbers...
-				if (entry != NULL  && ends_with(entry->d_name, ".bin")  )
-				{
-					//cout << entry->d_name << endl;
-					filenames.push_back(Path::Combine(target_dir, entry->d_name));
-				}
-			} while (entry != NULL);
-			closedir(dp);
-		}
-#endif
-	}
-
-	cout << "learn from ";
-	for (auto s : filenames)
-		cout << s << " , ";
-	cout << endl;
-	if (!validation_set_file_name.empty())
-	{
-		cout << "validation set  : " << validation_set_file_name << endl;
-	}
-
-	cout << "base dir        : " << base_dir   << endl;
-	cout << "target dir      : " << target_dir << endl;
-
-	// shuffle mode
-	if (shuffle_normal)
-	{
-		cout << "buffer_size     : " << buffer_size << endl;
-		cout << "shuffle mode.." << endl;
-		shuffle_files(filenames,output_file_name , buffer_size);
-		return;
-	}
-	if (shuffle_quick)
-	{
-		cout << "quick shuffle mode.." << endl;
-		shuffle_files_quick(filenames, output_file_name);
-		return;
-	}
-	if (shuffle_on_memory)
-	{
-		cout << "shuffle on memory.." << endl;
-		shuffle_files_on_memory(filenames,output_file_name);
-		return;
-	}
-	if (use_convert_plain)
-	{
-		init_nnue(true);
-		cout << "convert_plain.." << endl;
-		convert_plain(filenames, output_file_name);
-		return;
-	}
-	if (use_convert_bin)
-	{
-	  	init_nnue(true);
-		cout << "convert_bin.." << endl;
-		convert_bin(filenames,output_file_name, ply_minimum, ply_maximum, interpolate_eval);
-		return;
-		
-	}
-	if (use_convert_bin_from_pgn_extract)
-	{
-		init_nnue(true);
-		cout << "convert_bin_from_pgn-extract.." << endl;
-		convert_bin_from_pgn_extract(filenames, output_file_name, pgn_eval_side_to_move);
-		return;
-	}
-
-	cout << "loop              : " << loop << endl;
-	cout << "eval_limit        : " << eval_limit << endl;
-	cout << "save_only_once    : " << (save_only_once ? "true" : "false") << endl;
-	cout << "no_shuffle        : " << (no_shuffle ? "true" : "false") << endl;
-
-	// Insert the file name for the number of loops.
-	for (int i = 0; i < loop; ++i)
-		// sfen reader, I'll read it in reverse order so I'll reverse it here. I'm sorry.
-		for (auto it = filenames.rbegin(); it != filenames.rend(); ++it)
-			sr.filenames.push_back(Path::Combine(base_dir, *it));
-
-#if !defined(EVAL_NNUE)
-	cout << "Gradient Method   : " << LEARN_UPDATE      << endl;
-#endif
-	cout << "Loss Function     : " << LOSS_FUNCTION     << endl;
-	cout << "mini-batch size   : " << mini_batch_size   << endl;
-#if defined(EVAL_NNUE)
-	cout << "nn_batch_size     : " << nn_batch_size     << endl;
-	cout << "nn_options        : " << nn_options        << endl;
-#endif
-	cout << "learning rate     : " << eta1 << " , " << eta2 << " , " << eta3 << endl;
-	cout << "eta_epoch         : " << eta1_epoch << " , " << eta2_epoch << endl;
-#if defined(EVAL_NNUE)
-	if (newbob_decay != 1.0) {
-		cout << "scheduling        : newbob with decay = " << newbob_decay
-		     << ", " << newbob_num_trials << " trials" << endl;
-	} else {
-		cout << "scheduling        : default" << endl;
-	}
-#endif
-	cout << "discount rate     : " << discount_rate     << endl;
-
-	// If reduction_gameply is set to 0, rand(0) will be divided by 0, so correct it to 1.
-	reduction_gameply = max(reduction_gameply, 1);
-	cout << "reduction_gameply : " << reduction_gameply << endl;
-
-#if defined (LOSS_FUNCTION_IS_ELMO_METHOD)
-	cout << "LAMBDA            : " << ELMO_LAMBDA       << endl;
-	cout << "LAMBDA2           : " << ELMO_LAMBDA2      << endl;
-	cout << "LAMBDA_LIMIT      : " << ELMO_LAMBDA_LIMIT << endl;
-#endif
-	cout << "mirror_percentage : " << mirror_percentage << endl;
-	cout << "eval_save_interval  : " << eval_save_interval << " sfens" << endl;
-	cout << "loss_output_interval: " << loss_output_interval << " sfens" << endl;
-
-#if defined(EVAL_KPPT) || defined(EVAL_KPP_KKPT) || defined(EVAL_KPP_KKPT_FV_VAR) || defined(EVAL_NABLA)
-	cout << "freeze_kk/kkp/kpp      : " << freeze[0] << " , " << freeze[1] << " , " << freeze[2] << endl;
-#elif defined(EVAL_KPPPT) || defined(EVAL_KPPP_KKPT) || defined(EVAL_HELICES)
-	cout << "freeze_kk/kkp/kpp/kppp : " << freeze[0] << " , " << freeze[1] << " , " << freeze[2] << " , " << freeze[3] << endl;
-#elif defined(EVAL_KKPP_KKPT) || defined(EVAL_KKPPT)
-	cout << "freeze_kk/kkp/kpp/kkpp : " << freeze[0] << " , " << freeze[1] << " , " << freeze[2] << " , " << freeze[3] << endl;
-#endif
-
-	// -----------------------------------
-	// various initialization
-	// -----------------------------------
-
-	cout << "init.." << endl;
-
-	// Read evaluation function parameters
-	init_nnue(true);
-
-#if !defined(EVAL_NNUE)
-	cout << "init_grad.." << endl;
-
-	// Initialize gradient array of merit function parameters
-	Eval::init_grad(eta1,eta1_epoch,eta2,eta2_epoch,eta3);
-#else
-	cout << "init_training.." << endl;
-	Eval::NNUE::InitializeTraining(eta1,eta1_epoch,eta2,eta2_epoch,eta3);
-	Eval::NNUE::SetBatchSize(nn_batch_size);
-	Eval::NNUE::SetOptions(nn_options);
-	if (newbob_decay != 1.0 && !Options["SkipLoadingEval"]) {
-		learn_think.best_nn_directory = std::string(Options["EvalDir"]);
-	}
-#endif
-
-#if 0
-	// A test to give a gradient of 1.0 to the initial stage of Hirate.
-	pos.set_hirate();
-	cout << Eval::evaluate(pos) << endl;
-	//Eval::print_eval_stat(pos);
-	Eval::add_grad(pos, BLACK, 32.0 , false);
-	Eval::update_weights(1);
-	pos.state()->sum.p[2][0] = VALUE_NOT_EVALUATED;
-	cout << Eval::evaluate(pos) << endl;
-	//Eval::print_eval_stat(pos);
-#endif
-
-	cout << "init done." << endl;
-
-	// Reflect other option settings.
-	learn_think.discount_rate = discount_rate;
-	learn_think.eval_limit = eval_limit;
-	learn_think.save_only_once = save_only_once;
-	learn_think.sr.no_shuffle = no_shuffle;
-	learn_think.freeze = freeze;
-	learn_think.reduction_gameply = reduction_gameply;
-#if defined(EVAL_NNUE)
-	learn_think.newbob_scale = 1.0;
-	learn_think.newbob_decay = newbob_decay;
-	learn_think.newbob_num_trials = newbob_num_trials;
-#endif
-	learn_think.eval_save_interval = eval_save_interval;
-	learn_think.loss_output_interval = loss_output_interval;
-	learn_think.mirror_percentage = mirror_percentage;
-
-	// Start a thread that loads the phase file in the background
-	// (If this is not started, mse cannot be calculated.)
-	learn_think.start_file_read_worker();
-
-	learn_think.mini_batch_size = mini_batch_size;
-
-	if (validation_set_file_name.empty()) {
-	// Get about 10,000 data for mse calculation.
-		sr.read_for_mse();
-	} else {
-		sr.read_validation_set(validation_set_file_name, eval_limit);
-	}
-
-	// Calculate rmse once at this point (timing of 0 sfen)
-	// sr.calc_rmse();
-#if defined(EVAL_NNUE)
-	if (newbob_decay != 1.0) {
-		learn_think.calc_loss(0, -1);
-		learn_think.best_loss = learn_think.latest_loss_sum / learn_think.latest_loss_count;
-		learn_think.latest_loss_sum = 0.0;
-		learn_think.latest_loss_count = 0;
-		cout << "initial loss: " << learn_think.best_loss << endl;
-	}
-#endif
-
-	// -----------------------------------
-	// start learning evaluation function parameters
-	// -----------------------------------
-
-	// Start learning.
-	learn_think.go_think();
-
-	// Save once at the end.
-	learn_think.save(true);
-
-#if defined(USE_GLOBAL_OPTIONS)
-	// Restore Global Options.
-	GlobalOptions = oldGlobalOptions;
-#endif
-}
-
-
-} // namespace Learner
-
-#if defined(GENSFEN2019)
-#include "gensfen2019.cpp"
-#endif
-
-
-#endif // EVAL_LEARN
diff --git a/src/learn/learning_tools.cpp b/src/learn/learning_tools.cpp
deleted file mode 100644
index 4bcecab8..00000000
--- a/src/learn/learning_tools.cpp
+++ /dev/null
@@ -1,256 +0,0 @@
-﻿#include "learning_tools.h"
-
-#if defined (EVAL_LEARN)
-
-#if defined(_OPENMP)
-#include <omp.h>
-#endif
-#include "../misc.h"
-
-using namespace Eval;
-
-namespace EvalLearningTools
-{
-
-	// --- static variables
-
-	double Weight::eta;
-	double Weight::eta1;
-	double Weight::eta2;
-	double Weight::eta3;
-	uint64_t Weight::eta1_epoch;
-	uint64_t Weight::eta2_epoch;
-
-	std::vector<bool> min_index_flag;
-
-	// --- initialization for each individual table
-
-	void init_min_index_flag()
-	{
-		// Initialization of mir_piece and inv_piece must be completed.
-		assert(mir_piece(Eval::f_pawn) == Eval::e_pawn);
-
-		// Initialize the flag array for dimension reduction
-		// Not involved in KPPP.
-
-		KK g_kk;
-		g_kk.set(SQUARE_NB, Eval::fe_end, 0);
-		KKP g_kkp;
-		g_kkp.set(SQUARE_NB, Eval::fe_end, g_kk.max_index());
-		KPP g_kpp;
-		g_kpp.set(SQUARE_NB, Eval::fe_end, g_kkp.max_index());
-
-		uint64_t size = g_kpp.max_index();
-		min_index_flag.resize(size);
-
-#pragma omp parallel
-		{
-#if defined(_OPENMP)
-			// To prevent the logical 64 cores from being used when there are two CPUs under Windows
-			// explicitly assign to CPU here
-			int thread_index = omp_get_thread_num(); // get your thread number
-			WinProcGroup::bindThisThread(thread_index);
-#endif
-
-#pragma omp for schedule(dynamic,20000)
-
-			for (int64_t index_ = 0; index_ < (int64_t)size; ++index_)
-			{
-				// It seems that the loop variable must be a sign type due to OpenMP restrictions, but
-				// It's really difficult to use.
-				uint64_t index = (uint64_t)index_;
-
-				if (g_kk.is_ok(index))
-				{
-					// Make sure that the original index will be restored by conversion from index and reverse conversion.
-					// It is a process that is executed only once at startup, so write it in assert.
-					assert(g_kk.fromIndex(index).toIndex() == index);
-
-					KK a[KK_LOWER_COUNT];
-					g_kk.fromIndex(index).toLowerDimensions(a);
-
-					// Make sure that the first element of dimension reduction is the same as the original index.
-					assert(a[0].toIndex() == index);
-
-					uint64_t min_index = UINT64_MAX;
-					for (auto& e : a)
-						min_index = std::min(min_index, e.toIndex());
-					min_index_flag[index] = (min_index == index);
-				}
-				else if (g_kkp.is_ok(index))
-				{
-					assert(g_kkp.fromIndex(index).toIndex() == index);
-
-					KKP x = g_kkp.fromIndex(index);
-					KKP a[KKP_LOWER_COUNT];
-					x.toLowerDimensions(a);
-
-					assert(a[0].toIndex() == index);
-
-					uint64_t min_index = UINT64_MAX;
-					for (auto& e : a)
-						min_index = std::min(min_index, e.toIndex());
-					min_index_flag[index] = (min_index == index);
-				}
-				else if (g_kpp.is_ok(index))
-				{
-					assert(g_kpp.fromIndex(index).toIndex() == index);
-
-					KPP x = g_kpp.fromIndex(index);
-					KPP a[KPP_LOWER_COUNT];
-					x.toLowerDimensions(a);
-
-					assert(a[0].toIndex() == index);
-
-					uint64_t min_index = UINT64_MAX;
-					for (auto& e : a)
-						min_index = std::min(min_index, e.toIndex());
-					min_index_flag[index] = (min_index == index);
-				}
-				else
-				{
-					assert(false);
-				}
-			}
-		}
-	}
-
-	void learning_tools_unit_test_kpp()
-	{
-
-		// test KPP triangulation for bugs
-		// All combinations of k-p0-p1 are properly handled by KPP, and the dimension reduction at that time is
-		// Determine if it is correct.
-
-		KK g_kk;
-		g_kk.set(SQUARE_NB, Eval::fe_end, 0);
-		KKP g_kkp;
-		g_kkp.set(SQUARE_NB, Eval::fe_end, g_kk.max_index());
-		KPP g_kpp;
-		g_kpp.set(SQUARE_NB, Eval::fe_end, g_kkp.max_index());
-
-		std::vector<bool> f;
-		f.resize(g_kpp.max_index() - g_kpp.min_index());
-
-		for(auto k = SQUARE_ZERO ; k < SQUARE_NB ; ++k)
-			for(auto p0 = BonaPiece::BONA_PIECE_ZERO; p0 < fe_end ; ++p0)
-				for (auto p1 = BonaPiece::BONA_PIECE_ZERO; p1 < fe_end; ++p1)
-				{
-					KPP kpp_org = g_kpp.fromKPP(k,p0,p1);
-					KPP kpp0;
-					KPP kpp1 = g_kpp.fromKPP(Mir(k), mir_piece(p0), mir_piece(p1));
-					KPP kpp_array[2];
-
-					auto index = kpp_org.toIndex();
-					assert(g_kpp.is_ok(index));
-
-					kpp0 = g_kpp.fromIndex(index);
-
-					//if (kpp0 != kpp_org)
-					//	std::cout << "index = " << index << "," << kpp_org << "," << kpp0 << std::endl;
-
-					kpp0.toLowerDimensions(kpp_array);
-
-					assert(kpp_array[0] == kpp0);
-					assert(kpp0 == kpp_org);
-					assert(kpp_array[1] == kpp1);
-
-					auto index2 = kpp1.toIndex();
-					f[index - g_kpp.min_index()] = f[index2-g_kpp.min_index()] = true;
-				}
-
-		// Check if there is no missing index.
-		for(size_t index = 0 ; index < f.size(); index++)
-			if (!f[index])
-			{
-				std::cout << index << g_kpp.fromIndex(index + g_kpp.min_index()) << std::endl;
-			}
-	}
-
-	void learning_tools_unit_test_kppp()
-	{
-		// Test for missing KPPP calculations
-
-		KPPP g_kppp;
-		g_kppp.set(15, Eval::fe_end,0);
-		uint64_t min_index = g_kppp.min_index();
-		uint64_t max_index = g_kppp.max_index();
-
-		// Confirm last element.
-		//KPPP x = KPPP::fromIndex(max_index-1);
-		//std::cout << x << std::endl;
-
-		for (uint64_t index = min_index; index < max_index; ++index)
-		{
-			KPPP x = g_kppp.fromIndex(index);
-			//std::cout << x << std::endl;
-
-#if 0
-			if ((index % 10000000) == 0)
-				std::cout << "index = " << index << std::endl;
-
-			// index = 9360000000
-			//	done.
-
-			if (x.toIndex() != index)
-			{
-				std::cout << "assertion failed , index = " << index << std::endl;
-			}
-#endif
-
-			assert(x.toIndex() == index);
-
-//			ASSERT((&kppp_ksq_pcpcpc(x.king(), x.piece0(), x.piece1(), x.piece2()) - &kppp[0][0]) == (index - min_index));
-		}
-
-	}
-
-	void learning_tools_unit_test_kkpp()
-	{
-		KKPP g_kkpp;
-		g_kkpp.set(SQUARE_NB, 10000, 0);
-		uint64_t n = 0;
-		for (int k = 0; k<SQUARE_NB; ++k)
-			for (int i = 0; i<10000; ++i) // As a test, assuming a large fe_end, try turning at 10000.
-				for (int j = 0; j < i; ++j)
-				{
-					auto kkpp = g_kkpp.fromKKPP(k, (BonaPiece)i, (BonaPiece)j);
-					auto r = kkpp.toRawIndex();
-					assert(n++ == r);
-					auto kkpp2 = g_kkpp.fromIndex(r + g_kkpp.min_index());
-					assert(kkpp2.king() == k && kkpp2.piece0() == i && kkpp2.piece1() == j);
-				}
-	}
-
-	// Initialize this entire EvalLearningTools
-	void init()
-	{
-		// Initialization is required only once after startup, so a flag for that.
-		static bool first = true;
-
-		if (first)
-		{
-			std::cout << "EvalLearningTools init..";
-
-			// Make mir_piece() and inv_piece() available.
-			// After this, the min_index_flag is initialized, but
-			// It depends on this, so you need to do this first.
-			init_mir_inv_tables();
-
-			//learning_tools_unit_test_kpp();
-			//learning_tools_unit_test_kppp();
-			//learning_tools_unit_test_kkpp();
-
-			// It may be the last time to execute UnitTest, but since init_min_index_flag() takes a long time,
-			// I want to do this at the time of debugging.
-
-			init_min_index_flag();
-
-			std::cout << "done." << std::endl;
-
-			first = false;
-		}
-	}
-}
-
-#endif
diff --git a/src/learn/learning_tools.h b/src/learn/learning_tools.h
deleted file mode 100644
index a1de03dd..00000000
--- a/src/learn/learning_tools.h
+++ /dev/null
@@ -1,1034 +0,0 @@
-﻿#ifndef __LEARN_WEIGHT_H__
-#define __LEARN_WEIGHT_H__
-
-// A set of machine learning tools related to the weight array used for machine learning of evaluation functions
-
-#include "learn.h"
-#if defined (EVAL_LEARN)
-#include <array>
-
-#include "../eval/evaluate_mir_inv_tools.h"
-
-#if defined(SGD_UPDATE) || defined(USE_KPPP_MIRROR_WRITE)
-#include "../misc.h"  // PRNG , my_insertion_sort
-#endif
-
-#include <cmath>	// std::sqrt()
-
-namespace EvalLearningTools
-{
-	// -------------------------------------------------
-	//                  Initialization
-	// -------------------------------------------------
-
-	// Initialize the tables in this EvalLearningTools namespace.
-	// Be sure to call once before learning starts.
-	// In this function, we also call init_mir_inv_tables().
-	// (It is not necessary to call init_mir_inv_tables() when calling this function.)
-	void init();
-
-	// -------------------------------------------------
-	//                     flags
-	// -------------------------------------------------
-
-	// When the dimension is lowered, it may become the smallest index among them
-	// A flag array that is true for the known index.
-	// This array is also initialized by init().
-	// KPPP is not involved.
-	// Therefore, the valid index range of this array is from KK::min_index() to KPP::max_index().
-	extern std::vector<bool> min_index_flag;
-
-	// -------------------------------------------------
-	//   Array for learning that stores gradients etc.
-	// -------------------------------------------------
-
-#if defined(_MSC_VER)
-#pragma pack(push,2)
-#elif defined(__GNUC__)
-#pragma pack(2)
-#endif
-	struct Weight
-	{
-		// cumulative value of one mini-batch gradient
-		LearnFloatType g = LearnFloatType(0);
-
-		// When ADA_GRAD_UPDATE. LearnFloatType == float,
-		// total 4*2 + 4*2 + 1*2 = 18 bytes
-		// It suffices to secure a Weight array that is 4.5 times the size of the evaluation function parameter of 1GB.
-		// However, sizeof(Weight)==20 code is generated if the structure alignment is in 4-byte units, so
-		// Specify pragma pack(2).
-
-		// For SGD_UPDATE, this structure is reduced by 10 bytes to 8 bytes.
-
-		// Learning rate η(eta) such as AdaGrad.
-		// It is assumed that eta1,2,3,eta1_epoch,eta2_epoch have been set by the time updateFV() is called.
-		// The epoch of update_weights() gradually changes from eta1 to eta2 until eta1_epoch.
-		// After eta2_epoch, gradually change from eta2 to eta3.
-		static double eta;
-		static double eta1;
-		static double eta2;
-		static double eta3;
-		static uint64_t eta1_epoch;
-		static uint64_t eta2_epoch;
-
-		// Batch initialization of eta. If 0 is passed, the default value will be set.
-		static void init_eta(double eta1, double eta2, double eta3, uint64_t eta1_epoch, uint64_t eta2_epoch)
-		{
-			Weight::eta1 = (eta1 != 0) ? eta1 : 30.0;
-			Weight::eta2 = (eta2 != 0) ? eta2 : 30.0;
-			Weight::eta3 = (eta3 != 0) ? eta3 : 30.0;
-			Weight::eta1_epoch = (eta1_epoch != 0) ? eta1_epoch : 0;
-			Weight::eta2_epoch = (eta2_epoch != 0) ? eta2_epoch : 0;
-		}
-
-		// Set eta according to epoch.
-		static void calc_eta(uint64_t epoch)
-		{
-			if (Weight::eta1_epoch == 0) // Exclude eta2
-				Weight::eta = Weight::eta1;
-			else if (epoch < Weight::eta1_epoch)
-				// apportion
-				Weight::eta = Weight::eta1 + (Weight::eta2 - Weight::eta1) * epoch / Weight::eta1_epoch;
-			else if (Weight::eta2_epoch == 0) // Exclude eta3
-				Weight::eta = Weight::eta2;
-			else if (epoch < Weight::eta2_epoch)
-				Weight::eta = Weight::eta2 + (Weight::eta3 - Weight::eta2) * (epoch - Weight::eta1_epoch) / (Weight::eta2_epoch - Weight::eta1_epoch);
-			else
-				Weight::eta = Weight::eta3;
-		}
-
-		template <typename T> void updateFV(T& v) { updateFV(v, 1.0); }
-
-#if defined (ADA_GRAD_UPDATE)
-
-		// Since the maximum value that can be accurately calculated with float is INT16_MAX*256-1
-		// Keep the small value as a marker.
-		const LearnFloatType V0_NOT_INIT = (INT16_MAX * 128);
-
-		// What holds v internally. The previous implementation kept a fixed decimal with only a fractional part to save memory,
-		// Since it is doubtful in accuracy and the visibility is bad, it was abolished.
-		LearnFloatType v0 = LearnFloatType(V0_NOT_INIT);
-
-		// AdaGrad g2
-		LearnFloatType g2 = LearnFloatType(0);
-
-		// update with AdaGrad
-		// When executing this function, the value of g and the member do not change
-		// Guaranteed by the caller. It does not have to be an atomic operation.
-		// k is a coefficient for eta. 1.0 is usually sufficient. If you want to lower eta for your turn item, set this to 1/8.0 etc.
-		template <typename T>
-		void updateFV(T& v,double k)
-		{
-			// AdaGrad update formula
-			// Gradient vector is g, vector to be updated is v, η(eta) is a constant,
-			//     g2 = g2 + g^2
-			//     v = v - ηg/sqrt(g2)
-
-			constexpr double epsilon = 0.000001;
-
-			if (g == LearnFloatType(0))
-				return;
-
-			g2 += g * g;
-
-			// If v0 is V0_NOT_INIT, it means that the value is not initialized with the value of KK/KKP/KPP array,
-			// In this case, read the value of v from the one passed in the argument.
-			double V = (v0 == V0_NOT_INIT) ? v : v0;
-
-			V -= k * eta * (double)g / sqrt((double)g2 + epsilon);
-
-			// Limit the value of V to be within the range of types.
-			// By the way, windows.h defines the min and max macros, so to avoid it,
-			// Here, it is enclosed in parentheses so that it is not treated as a function-like macro.
-			V = (std::min)((double)(std::numeric_limits<T>::max)() , V);
-			V = (std::max)((double)(std::numeric_limits<T>::min)() , V);
-
-			v0 = (LearnFloatType)V;
-			v = (T)round(V);
-
-			// Clear g because one update of mini-batch for this element is over
-			// g[i] = 0;
-			// → There is a problem of dimension reduction, so this will be done by the caller.
-		}
-
-#elif defined(SGD_UPDATE)
-
-		// See only the sign of the gradient Update with SGD
-		// When executing this function, the value of g and the member do not change
-		// Guaranteed by the caller. It does not have to be an atomic operation.
-		template <typename T>
-		void updateFV(T & v , double k)
-		{
-			if (g == 0)
-				return;
-
-			// See only the sign of g and update.
-			// If g <0, add v a little.
-			// If g> 0, subtract v slightly.
-
-			// Since we only add integers, no decimal part is required.
-
-			// It's a good idea to move around 0-5.
-			// It is better to have a Gaussian distribution, so generate a 5-bit random number (each bit has a 1/2 probability of 1),
-			// Pop_count() it. At this time, it has a binomial distribution.
-			//int16_t diff = (int16_t)POPCNT32((u32)prng.rand(31));
-			// → If I do this with 80 threads, this AsyncPRNG::rand() locks, so I slowed down. This implementation is not good.
-			int16_t diff = 1;
-
-			double V = v;
-			if (g > 0.0)
-				V-= diff;
-			else
-				V+= diff;
-
-			V = (std::min)((double)(std::numeric_limits<T>::max)(), V);
-			V = (std::max)((double)(std::numeric_limits<T>::min)(), V);
-
-			v = (T)V;
-		}
-
-#endif
-
-		// grad setting
-		template <typename T> void set_grad(const T& g_) { g = g_; }
-
-		// Add grad
-		template <typename T> void add_grad(const T& g_) { g += g_; }
-
-		LearnFloatType get_grad() const { return g; }
-	};
-#if defined(_MSC_VER)
-#pragma pack(pop)
-#elif defined(__GNUC__)
-#pragma pack(0)
-#endif
-
-	// Turned weight array
-	// In order to be able to handle it transparently, let's have the same member as Weight.
-	struct Weight2
-	{
-		Weight w[2];
-
-		//Evaluate your turn, eta 1/8.
-		template <typename T> void updateFV(std::array<T, 2>& v) { w[0].updateFV(v[0] , 1.0); w[1].updateFV(v[1],1.0/8.0); }
-
-		template <typename T> void set_grad(const std::array<T, 2>& g) { for (int i = 0; i<2; ++i) w[i].set_grad(g[i]); }
-		template <typename T> void add_grad(const std::array<T, 2>& g) { for (int i = 0; i<2; ++i) w[i].add_grad(g[i]); }
-
-		std::array<LearnFloatType, 2> get_grad() const { return std::array<LearnFloatType, 2>{w[0].get_grad(), w[1].get_grad()}; }
-	};
-
-	// ------------------------------------------------ -
-	// A helper that calculates the index when the Weight array is serialized.
-	// ------------------------------------------------ -
-
-	// Base class for KK,KKP,KPP,KKPP
-	// How to use these classes
-	//
-	// 1. Initialize with set() first. Example) KK g_kk; g_kk.set(SQUARE_NB,fe_end,0);
-	// 2. Next create an instance with fromIndex(), fromKK(), etc.
-	// 3. Access using properties such as king(), piece0(), piece1().
-	//
-	// It may be difficult to understand just by this explanation, but if you look at init_grad(), add_grad(), update_weights() etc. in the learning part
-	// I think you can understand it including the necessity.
-	//
-	// Note: this derived class may indirectly reference the above inv_piece/mir_piece for dimension reduction, so
-	// Initialize by calling EvalLearningTools::init() or init_mir_inv_tables() first.
-	//
-	// Remarks) /*final*/ is written for the function name that should not be overridden on the derived class side.
-	// The function that should be overridden on the derived class side is a pure virtual function with "= 0".
-	// Only virtual functions are added to the derived class that may or may not be overridden.
-	//
-	struct SerializerBase
-	{
-
-		// Minimum value and maximum value of serial number +1 when serializing KK, KKP, KPP arrays.
-		/*final*/ uint64_t min_index() const { return min_index_; }
-		/*final*/ uint64_t max_index() const { return min_index() + max_raw_index_; }
-
-		// max_index() - min_index() the value of.
-		// Calculate the value from max_king_sq_,fe_end_ etc. on the derived class side and return it.
-		virtual uint64_t size() const = 0;
-
-		// Determine if the given index is more than min_index() and less than max_index().
-		/*final*/ bool is_ok(uint64_t index) { return min_index() <= index && index < max_index(); }
-
-		// Make sure to call this set(). Otherwise, construct an instance using fromKK()/fromIndex() etc. on the derived class side.
-		virtual void set(int max_king_sq, uint64_t fe_end, uint64_t min_index)
-		{
-			max_king_sq_ = max_king_sq;
-			fe_end_ = fe_end;
-			min_index_ = min_index;
-			max_raw_index_ = size();
-		}
-
-		// Get the index when serialized, based on the value of the current member.
-		/*final*/ uint64_t toIndex() const {
-			return min_index() + toRawIndex();
-		}
-
-		// Returns the index when serializing. (The value of min_index() is before addition)
-		virtual uint64_t toRawIndex() const = 0;
-
-	protected:
-		// The value of min_index() returned by this class
-		uint64_t min_index_;
-
-		// The value of max_index() returned by this class = min_index() + max_raw_index_
-		// This variable is calculated by size() of the derived class.
-		uint64_t max_raw_index_;
-
-		// The number of balls to support (normally SQUARE_NB)
-		int max_king_sq_;
-
-		// Maximum BonaPiece value supported
-		uint64_t fe_end_;
-
-	};
-
-	struct KK : public SerializerBase
-	{
-	protected:
-		KK(Square king0, Square king1,bool inverse) : king0_(king0), king1_(king1) , inverse_sign(inverse) {}
-	public:
-		KK() {}
-
-		virtual uint64_t size() const { return max_king_sq_ * max_king_sq_; }
-
-		// builder that creates KK object from index (serial number)
-		KK fromIndex(uint64_t index) const { assert(index >= min_index()); return fromRawIndex(index - min_index()); }
-
-		// builder that creates KK object from raw_index (number starting from 0, not serial number)
-		KK fromRawIndex(uint64_t raw_index) const
-		{
-			int king1 = (int)(raw_index % SQUARE_NB);
-			raw_index /= SQUARE_NB;
-			int king0 = (int)(raw_index  /* % SQUARE_NB */);
-			assert(king0 < SQUARE_NB);
-			return fromKK((Square)king0, (Square)king1 , false);
-		}
-		KK fromKK(Square king0, Square king1 , bool inverse) const
-		{
-			// The variable name kk is used in the Eval::kk array etc., so it needs to be different. (The same applies to KKP, KPP classes, etc.)
-			KK my_kk(king0, king1, inverse);
-			my_kk.set(max_king_sq_, fe_end_, min_index());
-			return my_kk;
-		}
-		KK fromKK(Square king0, Square king1) const { return fromKK(king0, king1, false); }
-
-		// When you construct this object using fromIndex(), you can get information with the following accessors.
-		Square king0() const { return king0_; }
-		Square king1() const { return king1_; }
-
-// number of dimension reductions
-#if defined(USE_KK_INVERSE_WRITE)
-	#define KK_LOWER_COUNT 4
-#elif defined(USE_KK_MIRROR_WRITE)
-	#define KK_LOWER_COUNT 2
-#else 
-	#define KK_LOWER_COUNT 1
-#endif
-
-#if defined(USE_KK_INVERSE_WRITE) && !defined(USE_KK_MIRROR_WRITE) 
-		// USE_KK_INVERSE_WRITE If you use it, please also define USE_KK_MIRROR_WRITE.
-		static_assert(false, "define also USE_KK_MIRROR_WRITE!");
-#endif
-
-		// Get the index of the low-dimensional array.
-		// When USE_KK_INVERSE_WRITE is enabled, the inverse of them will be in [2] and [3].
-		// Note that the sign of grad must be reversed for this dimension reduction.
-		// You can use is_inverse() because it can be determined.
-		void toLowerDimensions(/*out*/KK kk_[KK_LOWER_COUNT]) const {
-			kk_[0] = fromKK(king0_, king1_,false);
-#if defined(USE_KK_MIRROR_WRITE)
-			kk_[1] = fromKK(Mir(king0_),Mir(king1_),false);
-#if defined(USE_KK_INVERSE_WRITE)
-			kk_[2] = fromKK(Inv(king1_), Inv(king0_),true);
-			kk_[3] = fromKK(Inv(Mir(king1_)) , Inv(Mir(king0_)),true);
-#endif
-#endif
-		}
-
-		// Get the index when counting the value of min_index() of this class as 0.
-		virtual uint64_t toRawIndex() const {
-			return (uint64_t)king0_ * (uint64_t)max_king_sq_ + (uint64_t)king1_;
-		}
-
-		// Returns whether or not the dimension lowered with toLowerDimensions is inverse.
-		bool is_inverse() const {
-			return inverse_sign;
-		}
-
-		// When is_inverse() == true, reverse the sign that is not grad's turn and return it.
-		template <typename T>
-		std::array<T, 2> apply_inverse_sign(const std::array<T, 2>& rhs)
-		{
-			return !is_inverse() ? rhs : std::array<T, 2>{-rhs[0], rhs[1]};
-		}
-
-		// comparison operator
-		bool operator==(const KK& rhs) { return king0() == rhs.king0() && king1() == rhs.king1(); }
-		bool operator!=(const KK& rhs) { return !(*this == rhs); }
-
-	private:
-		Square king0_, king1_ ;
-		bool inverse_sign;
-	};
-
-	// Output for debugging.
-	static std::ostream& operator<<(std::ostream& os, KK rhs)
-	{
-		os << "KK(" << rhs.king0() << "," << rhs.king1() << ")";
-		return os;
-	}
-
-		// Same as KK. For KKP.
-	struct KKP : public SerializerBase
-	{
-	protected:
-		KKP(Square king0, Square king1, Eval::BonaPiece p) : king0_(king0), king1_(king1), piece_(p), inverse_sign(false) {}
-		KKP(Square king0, Square king1, Eval::BonaPiece p, bool inverse) : king0_(king0), king1_(king1), piece_(p),inverse_sign(inverse) {}
-	public:
-		KKP() {}
-
-		virtual uint64_t size() const { return (uint64_t)max_king_sq_*(uint64_t)max_king_sq_*(uint64_t)fe_end_; }
-
-		// builder that creates KKP object from index (serial number)
-		KKP fromIndex(uint64_t index) const { assert(index >= min_index()); return fromRawIndex(index - min_index()); }
-
-		// A builder that creates a KKP object from raw_index (a number that starts from 0, not a serial number)
-		KKP fromRawIndex(uint64_t raw_index) const
-		{
-			int piece = (int)(raw_index % Eval::fe_end);
-			raw_index /= Eval::fe_end;
-			int king1 = (int)(raw_index % SQUARE_NB);
-			raw_index /= SQUARE_NB;
-			int king0 = (int)(raw_index  /* % SQUARE_NB */);
-			assert(king0 < SQUARE_NB);
-			return fromKKP((Square)king0, (Square)king1, (Eval::BonaPiece)piece,false);
-		}
-
-		KKP fromKKP(Square king0, Square king1, Eval::BonaPiece p, bool inverse) const
-		{
-			KKP my_kkp(king0, king1, p, inverse);
-			my_kkp.set(max_king_sq_,fe_end_,min_index());
-			return my_kkp;
-		}
-		KKP fromKKP(Square king0, Square king1, Eval::BonaPiece p) const { return fromKKP(king0, king1, p, false); }
-
-		// When you construct this object using fromIndex(), you can get information with the following accessors.
-		Square king0() const { return king0_; }
-		Square king1() const { return king1_; }
-		Eval::BonaPiece piece() const { return piece_; }
-
-		// Number of KKP dimension reductions
-#if defined(USE_KKP_INVERSE_WRITE)
-		#define KKP_LOWER_COUNT 4
-#elif defined(USE_KKP_MIRROR_WRITE)
-		#define KKP_LOWER_COUNT 2
-#else
-		#define KKP_LOWER_COUNT 1
-#endif
-
-#if defined(USE_KKP_INVERSE_WRITE) && !defined(USE_KKP_MIRROR_WRITE) 
-		// USE_KKP_INVERSE_WRITE If you use it, please also define USE_KKP_MIRROR_WRITE.
-		static_assert(false, "define also USE_KKP_MIRROR_WRITE!");
-#endif
-
-		// Get the index of the low-dimensional array. The mirrored one is returned to kkp_[1].
-		// When USE_KKP_INVERSE_WRITE is enabled, the inverse of them will be in [2] and [3].
-		// Note that the sign of grad must be reversed for this dimension reduction.
-		// You can use is_inverse() because it can be determined.
-		void toLowerDimensions(/*out*/ KKP kkp_[KKP_LOWER_COUNT]) const {
-			kkp_[0] = fromKKP(king0_, king1_, piece_,false);
-#if defined(USE_KKP_MIRROR_WRITE)
-			kkp_[1] = fromKKP(Mir(king0_), Mir(king1_), mir_piece(piece_),false);
-#if defined(USE_KKP_INVERSE_WRITE)
-			kkp_[2] = fromKKP( Inv(king1_), Inv(king0_), inv_piece(piece_),true);
-			kkp_[3] = fromKKP( Inv(Mir(king1_)), Inv(Mir(king0_)) , inv_piece(mir_piece(piece_)),true);
-#endif
-#endif
-		}
-
-		// Get the index when counting the value of min_index() of this class as 0.
-		virtual uint64_t toRawIndex() const {
-			return  ((uint64_t)king0_ * (uint64_t)max_king_sq_ + (uint64_t)king1_) * (uint64_t)fe_end_ + (uint64_t)piece_;
-		}
-
-		// Returns whether or not the dimension lowered with toLowerDimensions is inverse.
-		bool is_inverse() const {
-			return inverse_sign;
-		}
-
-		// When is_inverse() == true, reverse the sign that is not grad's turn and return it.
-		template <typename T>
-		std::array<T, 2> apply_inverse_sign(const std::array<T, 2>& rhs)
-		{
-			return !is_inverse() ? rhs : std::array<T, 2>{-rhs[0], rhs[1]};
-		}
-
-		// comparison operator
-		bool operator==(const KKP& rhs) { return king0() == rhs.king0() && king1() == rhs.king1() && piece() == rhs.piece(); }
-		bool operator!=(const KKP& rhs) { return !(*this == rhs); }
-
-	private:
-		Square king0_, king1_;
-		Eval::BonaPiece piece_;
-		bool inverse_sign;
-	};
-
-	// Output for debugging.
-	static std::ostream& operator<<(std::ostream& os, KKP rhs)
-	{
-		os << "KKP(" << rhs.king0() << "," << rhs.king1() << "," << rhs.piece() << ")";
-		return os;
-	}
-
-
-	// Same as KK and KKP. For KPP
-	struct KPP : public SerializerBase
-	{
-	protected:
-		KPP(Square king, Eval::BonaPiece p0, Eval::BonaPiece p1) : king_(king), piece0_(p0), piece1_(p1) {}
-
-	public:
-		KPP() {}
-
-		// The minimum and maximum KPP values ​​of serial numbers when serializing KK, KKP, KPP arrays.
-#if !defined(USE_TRIANGLE_WEIGHT_ARRAY)
-		virtual uint64_t size() const { return (uint64_t)max_king_sq_*(uint64_t)fe_end_*(uint64_t)fe_end_; }
-#else
-		// Triangularize the square array part of [fe_end][fe_end] of kpp[SQUARE_NB][fe_end][fe_end].
-		// If kpp[SQUARE_NB][triangle_fe_end], the first row of this triangular array has one element, the second row has two elements, and so on.
-		// hence triangle_fe_end = 1 + 2 + .. + fe_end = fe_end * (fe_end + 1) / 2
-		virtual uint64_t size() const { return (uint64_t)max_king_sq_*(uint64_t)triangle_fe_end; }
-#endif
-
-		virtual void set(int max_king_sq, uint64_t fe_end, uint64_t min_index)
-		{
-		// This value is used in size(), and size() is used in SerializerBase::set(), so calculate first.
-			triangle_fe_end = (uint64_t)fe_end*((uint64_t)fe_end + 1) / 2;
-
-			SerializerBase::set(max_king_sq, fe_end, min_index);
-		}
-
-		// builder that creates KPP object from index (serial number)
-		KPP fromIndex(uint64_t index) const { assert(index >= min_index()); return fromRawIndex(index - min_index()); }
-
-		// A builder that creates KPP objects from raw_index (a number that starts from 0, not a serial number)
-		KPP fromRawIndex(uint64_t raw_index) const
-		{
-			const uint64_t triangle_fe_end = (uint64_t)fe_end_*((uint64_t)fe_end_ + 1) / 2;
-
-#if !defined(USE_TRIANGLE_WEIGHT_ARRAY)
-			int piece1 = (int)(raw_index % fe_end_);
-			raw_index /= fe_end_;
-			int piece0 = (int)(raw_index % fe_end_);
-			raw_index /= fe_end_;
-#else
-			uint64_t index2 = raw_index % triangle_fe_end;
-
-			// Write the expression to find piece0, piece1 from index2 here.
-			// This is the inverse function of index2 = i * (i+1) / 2 + j.
-			// If j = 0, i^2 + i-2 * index2 == 0
-			// From the solution formula of the quadratic equation i = (sqrt(8*index2+1)-1) / 2.
-			// After i is converted into an integer, j can be calculated as j = index2-i * (i + 1) / 2.
-
-			// BonaPiece assumes 32bit (may not fit in 16bit), so this multiplication must be 64bit.
-			int piece1 = int(sqrt(8 * index2 + 1) - 1) / 2;
-			int piece0 = int(index2 - (uint64_t)piece1*((uint64_t)piece1 + 1) / 2);
-
-			assert(piece1 < (int)fe_end_);
-			assert(piece0 < (int)fe_end_);
-			assert(piece0 > piece1);
-
-			raw_index /= triangle_fe_end;
-#endif
-			int king = (int)(raw_index  /* % SQUARE_NB */);
-			assert(king < max_king_sq_);
-			return fromKPP((Square)king, (Eval::BonaPiece)piece0, (Eval::BonaPiece)piece1);
-		}
-
-		KPP fromKPP(Square king, Eval::BonaPiece p0, Eval::BonaPiece p1) const
-		{
-			KPP my_kpp(king, p0, p1);
-			my_kpp.set(max_king_sq_,fe_end_,min_index());
-			return my_kpp;
-		}
-
-		// When you construct this object using fromIndex(), you can get information with the following accessors.
-		Square king() const { return king_; }
-		Eval::BonaPiece piece0() const { return piece0_; }
-		Eval::BonaPiece piece1() const { return piece1_; }
-
-
-// number of dimension reductions
-#if defined(USE_KPP_MIRROR_WRITE)
-	#if !defined(USE_TRIANGLE_WEIGHT_ARRAY)
-		#define KPP_LOWER_COUNT 4
-	#else
-		#define KPP_LOWER_COUNT 2
-	#endif
-#else
-	#if !defined(USE_TRIANGLE_WEIGHT_ARRAY)
-		#define KPP_LOWER_COUNT 2
-	#else
-		#define KPP_LOWER_COUNT 1
-	#endif
-#endif
-
-		// Get the index of the low-dimensional array. The ones with p1 and p2 swapped, the ones mirrored, etc. are returned.
-		void toLowerDimensions(/*out*/ KPP kpp_[KPP_LOWER_COUNT]) const {
-
-#if defined(USE_TRIANGLE_WEIGHT_ARRAY)
-			// Note that if you use a triangular array, the swapped piece0 and piece1 will not be returned.
-			kpp_[0] = fromKPP(king_, piece0_, piece1_);
-#if defined(USE_KPP_MIRROR_WRITE)
-			kpp_[1] = fromKPP(Mir(king_), mir_piece(piece0_), mir_piece(piece1_));
-#endif
-
-#else
-			// When not using triangular array
-			kpp_[0] = fromKPP(king_, piece0_, piece1_);
-			kpp_[1] = fromKPP(king_, piece1_, piece0_);
-#if defined(USE_KPP_MIRROR_WRITE)
-			kpp_[2] = fromKPP(Mir(king_), mir_piece(piece0_), mir_piece(piece1_));
-			kpp_[3] = fromKPP(Mir(king_), mir_piece(piece1_), mir_piece(piece0_));
-#endif
-#endif
-		}
-
-		// Get the index when counting the value of min_index() of this class as 0.
-		virtual uint64_t toRawIndex() const {
-
-#if !defined(USE_TRIANGLE_WEIGHT_ARRAY)
-
-			return ((uint64_t)king_ * (uint64_t)fe_end_ + (uint64_t)piece0_) * (uint64_t)fe_end_ + (uint64_t)piece1_;
-
-#else
-			// Macro similar to that used in Bonanza 6.0
-			auto PcPcOnSq = [&](Square k, Eval::BonaPiece i, Eval::BonaPiece j)
-			{
-
-				// (i,j) in this triangular array is the element in the i-th row and the j-th column.
-				// 1st row + 2 + ... + i = i * (i+1) / 2 because the i-th row and 0th column is the total of the elements up to that point
-				// The i-th row and the j-th column is j plus this. i*(i+1)/2+j
-
-				// BonaPiece type is assumed to be 32 bits, so if you do not pay attention to multiplication, it will overflow.
-				return (uint64_t)k * triangle_fe_end + (uint64_t)(uint64_t(i)*(uint64_t(i)+1) / 2 + uint64_t(j));
-			};
-
-			auto k = king_;
-			auto i = piece0_;
-			auto j = piece1_;
-
-			return (i >= j) ? PcPcOnSq(k, i, j) : PcPcOnSq(k, j, i);
-#endif
-		}
-
-		// Returns whether or not the dimension lowered with toLowerDimensions is inverse.
-		// Prepared to match KK, KKP and interface. This method always returns false for this KPP class.
-		bool is_inverse() const {
-			return false;
-		}
-
-		// comparison operator
-		bool operator==(const KPP& rhs) {
-			return king() == rhs.king() &&
-				((piece0() == rhs.piece0() && piece1() == rhs.piece1())
-#if defined(USE_TRIANGLE_WEIGHT_ARRAY)
-					// When using a triangular array, allow swapping of piece0 and piece1.
-				|| (piece0() == rhs.piece1() && piece1() == rhs.piece0())
-#endif
-					); }
-		bool operator!=(const KPP& rhs) { return !(*this == rhs); }
-
-
-	private:
-		Square king_;
-		Eval::BonaPiece piece0_, piece1_;
-
-		uint64_t triangle_fe_end; // = (uint64_t)fe_end_*((uint64_t)fe_end_ + 1) / 2;
-	};
-
-	// Output for debugging.
-	static std::ostream& operator<<(std::ostream& os, KPP rhs)
-	{
-		os << "KPP(" << rhs.king() << "," << rhs.piece0() << "," << rhs.piece1() << ")";
-		return os;
-	}
-
-	// 4 pieces related to KPPP. However, if there is a turn and you do not consider mirrors etc., memory of 2 TB or more is required for learning.
-	// Even if you use a triangular array, you need 50GB x 12 bytes = 600GB for learning.
-	// It takes about half as much as storing only the mirrored one.
-	// Here, the triangular array is always used and the mirrored one is stored.
-	//
-	// Also, king() of this class is not limited to Square of the actual king, but a value from 0 to (king_sq-1) is simply returned.
-	// This needs to be converted to an appropriate ball position on the user side when performing compression using a mirror.
-	//
-	// Later, regarding the pieces0,1,2 returned by this class,
-	// piece0() >piece1() >piece2()
-	// It is, and it is necessary to keep this constraint when passing piece0,1,2 in the constructor.
-	struct KPPP : public SerializerBase
-	{
-	protected:
-		KPPP(int king, Eval::BonaPiece p0, Eval::BonaPiece p1, Eval::BonaPiece p2) :
-			king_(king), piece0_(p0), piece1_(p1), piece2_(p2)
-		{
-			assert(piece0_ > piece1_ && piece1_ > piece2_);
-			/* sort_piece(); */
-		}
-
-	public:
-		KPPP() {}
-
-		virtual uint64_t size() const { return (uint64_t)max_king_sq_*triangle_fe_end; }
-
-		// Set fe_end and king_sq.
-		// fe_end: fe_end assumed by this KPPP class
-		// king_sq: Number of balls to handle in KPPP.
-		// 3 layers x 3 mirrors = 3 layers x 5 lines = 15
-		// 2 steps x 2 mirrors without mirror = 18
-		// Set this first using set() on the side that uses this KPPP class.
-		virtual void set(int max_king_sq, uint64_t fe_end,uint64_t min_index) {
-			// This value is used in size(), and size() is used in SerializerBase::set(), so calculate first.
-			triangle_fe_end = fe_end * (fe_end - 1) * (fe_end - 2) / 6;
-
-			SerializerBase::set(max_king_sq, fe_end, min_index);
-		}
-
-		// number of dimension reductions
-		// For the time being, the dimension reduction of the mirror is not supported. I wonder if I'll do it here...
-/*
-#if defined(USE_KPPP_MIRROR_WRITE)
-#define KPPP_LOWER_COUNT 2
-#else
-#define KPPP_LOWER_COUNT 1
-#endif
-*/
-#define KPPP_LOWER_COUNT 1
-
-		// Get the index of the low-dimensional array.
-		// Note that the one with p0,p1,p2 swapped will not be returned.
-		// Also, the mirrored one is returned only when USE_KPPP_MIRROR_WRITE is enabled.
-		void toLowerDimensions(/*out*/ KPPP kppp_[KPPP_LOWER_COUNT]) const
-		{
-			kppp_[0] = fromKPPP(king_, piece0_, piece1_,piece2_);
-#if KPPP_LOWER_COUNT > 1
-			// If mir_piece is done, it will be in a state not sorted. Need code to sort.
-			Eval::BonaPiece p_list[3] = { mir_piece(piece2_), mir_piece(piece1_), mir_piece(piece0_) };
-			my_insertion_sort(p_list, 0, 3);
-			kppp_[1] = fromKPPP((int)Mir((Square)king_), p_list[2] , p_list[1], p_list[0]);
-#endif
-		}
-
-		// builder that creates KPPP object from index (serial number)
-		KPPP fromIndex(uint64_t index) const { assert(index >= min_index()); return fromRawIndex(index - min_index()); }
-
-		// A builder that creates KPPP objects from raw_index (a number that starts from 0, not a serial number)
-		KPPP fromRawIndex(uint64_t raw_index) const
-		{
-			uint64_t index2 = raw_index % triangle_fe_end;
-
-			// Write the expression to find piece0, piece1, piece2 from index2 here.
-			// This is the inverse function of index2 = i(i-1)(i-2)/6-1 + j(j+1)/2 + k.
-			// For j = k = 0, the real root is i = ... from the solution formula of the cubic equation. (The following formula)
-			// However, if index2 is 0 or 1, there are multiple real solutions. You have to consider this. It is necessary to take measures against insufficient calculation accuracy.
-			// After i is calculated, i can be converted into an integer, then put in the first expression and then j can be calculated in the same way as in KPP.
-
-			// This process is a relatively difficult numerical calculation. Various ideas are needed.
-
-			int piece0;
-			if (index2 <= 1)
-			{
-				// There are multiple real solutions only when index2 == 0,1.
-				piece0 = (int)index2 + 2;
-
-			} else {
-
-				//double t = pow(sqrt((243 *index2 * index2-1) * 3) + 27 * index2, 1.0 / 3);
-				// → In this case, the content of sqrt() will overflow if index2 becomes large.
-
-				// Since the contents of sqrt() overflow, do not multiply 3.0 in sqrt, but multiply sqrt(3.0) outside sqrt.
-				// Since the contents of sqrt() will overflow, use an approximate expression when index2 is large.
-
-				double t;
-				
-				if (index2 < 100000000)
-					t = pow(sqrt((243.0 *index2 * index2 - 1)) * sqrt(3.0) + 27 * index2, 1.0 / 3);
-				else
-					// If index2 is very large, we can think of the contents of sqrt as approximately √243 * index2.
-					t = pow( index2 * sqrt(243 * 3.0) + 27 * index2, 1.0 / 3);
-
-				// Add deltas to avoid a slight calculation error when rounding.
-				// If it is too large, it may increase by 1 so adjustment is necessary.
-
-				const double delta = 0.000000001;
-
-				piece0 = int(t / pow(3.0, 2.0 / 3) + 1.0 / (pow(3.0, 1.0 / 3) * t) + delta) + 1;
-				// Uuu. Is it really like this? ('Ω`)
-			}
-
-			//Since piece2 is obtained, substitute piece2 for i of i(i-1)(i-2)/6 (=a) in the above formula. Also substitute k = 0.
-			// j(j+1)/2 = index2-a
-			// This is from the solution formula of the quadratic equation..
-
-			uint64_t a = (uint64_t)piece0*((uint64_t)piece0 - 1)*((uint64_t)piece0 - 2) / 6;
-			int piece1 = int((1 + sqrt(8.0 * (index2 - a ) + 1)) / 2);
-			uint64_t b = (uint64_t)piece1 * (piece1 - 1) / 2;
-			int piece2 = int(index2 - a - b);
-
-#if 0
-			if (!((piece0 > piece1 && piece1 > piece2)))
-			{
-				std::cout << index << " , " << index2 << "," << a << "," << sqrt(8.0 * (index2 - a) + 1);
-			}
-#endif
-
-			assert(piece0 > piece1 && piece1 > piece2);
-
-			assert(piece2 < (int)fe_end_);
-			assert(piece1 < (int)fe_end_);
-			assert(piece0 < (int)fe_end_);
-
-			raw_index /= triangle_fe_end;
-
-			int king = (int)(raw_index  /* % SQUARE_NB */);
-			assert(king < max_king_sq_);
-
-			// Propagate king_sq and fe_end.
-			return fromKPPP((Square)king, (Eval::BonaPiece)piece0, (Eval::BonaPiece)piece1 , (Eval::BonaPiece)piece2);
-		}
-
-		// Specify k,p0,p1,p2 to build KPPP instance.
-		// The king_sq and fe_end passed by set() which is internally retained are inherited.
-		KPPP fromKPPP(int king, Eval::BonaPiece p0, Eval::BonaPiece p1, Eval::BonaPiece p2) const
-		{
-			KPPP kppp(king, p0, p1, p2);
-			kppp.set(max_king_sq_, fe_end_,min_index());
-			return kppp;
-		}
-
-		// Get the index when counting the value of min_index() of this class as 0.
-		virtual uint64_t toRawIndex() const {
-
-			// Macro similar to the one used in Bonanza 6.0
-			// Precondition) i> j> k.
-			// NG in case of i==j,j==k.
-			auto PcPcPcOnSq = [this](int king, Eval::BonaPiece i, Eval::BonaPiece j , Eval::BonaPiece k)
-			{
-				// (i,j,k) in this triangular array is the element in the i-th row and the j-th column.
-				// 0th row 0th column 0th is the sum of the elements up to that point, so 0 + 0 + 1 + 3 + 6 + ... + (i)*(i-1)/2 = i*( i-1)*(i-2)/6
-				// i-th row, j-th column, 0-th is j with j added. + j*(j-1) / 2
-				// i-th row, j-th column and k-th row is k plus it. + k
-				assert(i > j && j > k);
-
-				// BonaPiece type is assumed to be 32 bits, so if you do not pay attention to multiplication, it will overflow.
-				return (uint64_t)king * triangle_fe_end + (uint64_t)(
-						  uint64_t(i)*(uint64_t(i) - 1) * (uint64_t(i) - 2) / 6
-						+ uint64_t(j)*(uint64_t(j) - 1) / 2
-						+ uint64_t(k)
-					);
-			};
-
-			return PcPcPcOnSq(king_, piece0_, piece1_, piece2_);
-		}
-
-		// When you construct this object using fromIndex(), you can get information with the following accessors.
-		int king() const { return king_; }
-		Eval::BonaPiece piece0() const { return piece0_; }
-		Eval::BonaPiece piece1() const { return piece1_; }
-		Eval::BonaPiece piece2() const { return piece2_; }
-		// Returns whether or not the dimension lowered with toLowerDimensions is inverse.
-		// Prepared to match KK, KKP and interface. This method always returns false for this KPPP class.
-		bool is_inverse() const {
-			return false;
-		}
-
-		// Returns the number of elements in a triangular array. It is assumed that the kppp array is the following two-dimensional array.
-		// kppp[king_sq][triangle_fe_end];
-		uint64_t get_triangle_fe_end() const { return triangle_fe_end; }
-
-		// comparison operator
-		bool operator==(const KPPP& rhs) {
-			// piece0> piece1> piece2 is assumed, so there is no possibility of replacement.
-			return king() == rhs.king() && piece0() == rhs.piece0() && piece1() == rhs.piece1() && piece2() == rhs.piece2();
-		}
-		bool operator!=(const KPPP& rhs) { return !(*this == rhs); }
-
-	private:
-
-		int king_;
-		Eval::BonaPiece piece0_, piece1_,piece2_;
-
-		// The part of the square array of [fe_end][fe_end][fe_end] of kppp[king_sq][fe_end][fe_end][fe_end] is made into a triangular array.
-		// If kppp[king_sq][triangle_fe_end], the number of elements from the 0th row of this triangular array is 0,0,1,3,..., The nth row is n(n-1)/2.
-		// therefore,
-		// triangle_fe_end = Σn(n-1)/2 , n=0..fe_end-1
-		//                 =  fe_end * (fe_end - 1) * (fe_end - 2) / 6
-		uint64_t triangle_fe_end; // ((uint64_t)Eval::fe_end)*((uint64_t)Eval::fe_end - 1)*((uint64_t)Eval::fe_end - 2) / 6;
-	};
-
-	// Output for debugging.
-	static std::ostream& operator<<(std::ostream& os, KPPP rhs)
-	{
-		os << "KPPP(" << rhs.king() << "," << rhs.piece0() << "," << rhs.piece1() << "," << rhs.piece2() << ")";
-		return os;
-	}
-
-	// For learning about 4 pieces by KKPP.
-	//
-	// Same design as KPPP class. In KPPP class, treat as one with less p.
-	// The positions of the two balls are encoded as values ​​from 0 to king_sq-1.
-	//
-	// Later, regarding the pieces0 and 1 returned by this class,
-	// piece0() >piece1()
-	// It is, and it is necessary to keep this constraint even when passing piece0,1 in the constructor.
-	//
-	// Due to this constraint, BonaPieceZero cannot be assigned to piece0 and piece1 at the same time and passed.
-	// If you want to support learning of dropped frames, you need to devise with evaluate().
-	struct KKPP: SerializerBase
-	{
-	protected:
-		KKPP(int king, Eval::BonaPiece p0, Eval::BonaPiece p1) :
-			king_(king), piece0_(p0), piece1_(p1)
-		{
-			assert(piece0_ > piece1_);
-			/* sort_piece(); */
-		}
-
-	public:
-		KKPP() {}
-
-		virtual uint64_t size() const { return (uint64_t)max_king_sq_*triangle_fe_end; }
-
-		// Set fe_end and king_sq.
-		// fe_end: fe_end assumed by this KPPP class
-		// king_sq: Number of balls to handle in KPPP.
-		// 9 steps x mirrors 9 steps x 5 squared squares (balls before and after) = 45*45 = 2025.
-		// Set this first using set() on the side that uses this KKPP class.
-		void set(int max_king_sq, uint64_t fe_end , uint64_t min_index) {
-			// This value is used in size(), and size() is used in SerializerBase::set(), so calculate first.
-			triangle_fe_end = fe_end * (fe_end - 1) / 2;
-
-			SerializerBase::set(max_king_sq, fe_end, min_index);
-		}
-
-		// number of dimension reductions
-		// For the time being, the dimension reduction of the mirror is not supported. I wonder if I'll do it here... (Because the memory for learning is a waste)
-#define KKPP_LOWER_COUNT 1
-
-		// Get the index of the low-dimensional array.
-		//Note that the one with p0,p1,p2 swapped will not be returned.
-		// Also, the mirrored one is returned only when USE_KPPP_MIRROR_WRITE is enabled.
-		void toLowerDimensions(/*out*/ KKPP kkpp_[KPPP_LOWER_COUNT]) const
-		{
-			kkpp_[0] = fromKKPP(king_, piece0_, piece1_);
-
-			// When mirroring, mir_piece will not be sorted. Need code to sort.
-			// We also need to define a mirror for king_.
-		}
-
-		// builder that creates KKPP object from index (serial number)
-		KKPP fromIndex(uint64_t index) const { assert(index >= min_index()); return fromRawIndex(index - min_index()); }
-
-		// builder that creates KKPP object from raw_index (number starting from 0, not serial number)
-		KKPP fromRawIndex(uint64_t raw_index) const
-		{
-			uint64_t index2 = raw_index % triangle_fe_end;
-
-			// Write the expression to find piece0, piece1, piece2 from index2 here.
-			// This is the inverse function of index2 = i(i-1)/2 + j.
-			// Use the formula of the solution of the quadratic equation with j=0.
-			// When index2=0, it is a double root, but the smaller one does not satisfy i>j and is ignored.
-
-			int piece0 = (int(sqrt(8 * index2 + 1)) + 1)/2;
-			int piece1 = int(index2 - piece0 * (piece0 - 1) /2 );
-
-			assert(piece0 > piece1);
-
-			assert(piece1 < (int)fe_end_);
-			assert(piece0 < (int)fe_end_);
-
-			raw_index /= triangle_fe_end;
-
-			int king = (int)(raw_index  /* % SQUARE_NB */);
-			assert(king < max_king_sq_);
-
-			// Propagate king_sq and fe_end.
-			return fromKKPP(king, (Eval::BonaPiece)piece0, (Eval::BonaPiece)piece1);
-		}
-
-		// Specify k,p0,p1 to build KKPP instance.
-		// The king_sq and fe_end passed by set() which is internally retained are inherited.
-		KKPP fromKKPP(int king, Eval::BonaPiece p0, Eval::BonaPiece p1) const
-		{
-			KKPP kkpp(king, p0, p1);
-			kkpp.set(max_king_sq_, fe_end_,min_index());
-			return kkpp;
-		}
-
-		// Get the index when counting the value of min_index() of this class as 0.
-		virtual uint64_t toRawIndex() const {
-
-			// Macro similar to the one used in Bonanza 6.0
-			// Precondition) i> j.
-			// NG in case of i==j,j==k.
-			auto PcPcOnSq = [this](int king, Eval::BonaPiece i, Eval::BonaPiece j)
-			{
-				assert(i > j);
-
-				// BonaPiece type is assumed to be 32 bits, so if you do not pay attention to multiplication, it will overflow.
-				return (uint64_t)king * triangle_fe_end + (uint64_t)(
-					+ uint64_t(i)*(uint64_t(i) - 1) / 2
-					+ uint64_t(j)
-					);
-			};
-
-			return PcPcOnSq(king_, piece0_, piece1_);
-		}
-
-		// When you construct this object using fromIndex(), fromKKPP(), you can get information with the following accessors.
-		int king() const { return king_; }
-		Eval::BonaPiece piece0() const { return piece0_; }
-		Eval::BonaPiece piece1() const { return piece1_; }
-
-		// Returns whether or not the dimension lowered with toLowerDimensions is inverse.
-		// Prepared to match KK, KKP and interface. In this KKPP class, this method always returns false.
-		bool is_inverse() const {
-			return false;
-		}
-
-		//Returns the number of elements in a triangular array. It is assumed that the kkpp array is the following two-dimensional array.
-		//   kkpp[king_sq][triangle_fe_end];
-		uint64_t get_triangle_fe_end() const { return triangle_fe_end; }
-
-		// comparison operator
-		bool operator==(const KKPP& rhs) {
-			// Since piece0> piece1 is assumed, there is no possibility of replacement.
-			return king() == rhs.king() && piece0() == rhs.piece0() && piece1() == rhs.piece1();
-		}
-		bool operator!=(const KKPP& rhs) { return !(*this == rhs); }
-
-	private:
-
-		int king_;
-		Eval::BonaPiece piece0_, piece1_;
-
-		// Triangularize the square array part of [fe_end][fe_end] of kppp[king_sq][fe_end][fe_end].
-		uint64_t triangle_fe_end = 0;
-		
-	};
-
-	// Output for debugging.
-	static std::ostream& operator<<(std::ostream& os, KKPP rhs)
-	{
-		os << "KKPP(" << rhs.king() << "," << rhs.piece0() << "," << rhs.piece1() << ")";
-		return os;
-	}
-
-
-}
-
-#endif // defined (EVAL_LEARN)
-#endif
diff --git a/src/learn/multi_think.cpp b/src/learn/multi_think.cpp
deleted file mode 100644
index ba2c47d4..00000000
--- a/src/learn/multi_think.cpp
+++ /dev/null
@@ -1,123 +0,0 @@
-﻿#include "../types.h"
-
-#if defined(EVAL_LEARN)
-
-#include "multi_think.h"
-#include "../tt.h"
-#include "../uci.h"
-
-#include <thread>
-
-void MultiThink::go_think()
-{
-	// Keep a copy to restore the Options settings later.
-	auto oldOptions = Options;
-
-	// When using the constant track, it takes a lot of time to perform on the fly & the part to access the file is
-	// Since it is not thread safe, it is guaranteed here that it is being completely read in memory.
-	Options["BookOnTheFly"] = std::string("false");
-
-	// Read evaluation function, etc.
-	// In the case of the learn command, the value of the evaluation function may be corrected after reading the evaluation function, so
-	// Skip memory corruption check.
-	init_nnue(true);
-
-	// Call the derived class's init().
-	init();
-
-	// The loop upper limit is set with set_loop_max().
-	loop_count = 0;
-	done_count = 0;
-
-	// Create threads as many as Options["Threads"] and start thinking.
-	std::vector<std::thread> threads;
-	auto thread_num = (size_t)Options["Threads"];
-
-	// Secure end flag of worker thread
-	thread_finished.resize(thread_num);
-	
-	// start worker thread
-	for (size_t i = 0; i < thread_num; ++i)
-	{
-		thread_finished[i] = 0;
-		threads.push_back(std::thread([i, this]
-		{ 
-			// exhaust all processor threads.
-			WinProcGroup::bindThisThread(i);
-
-			// execute the overridden process
-			this->thread_worker(i);
-
-			// Set the end flag because the thread has ended
-			this->thread_finished[i] = 1;
-		}));
-	}
-
-	// wait for all threads to finish
-	// for (auto& th :threads)
-	// th.join();
-	// If you write like, the thread will rush here while it is still working,
-	// During that time, callback_func() cannot be called and you cannot save.
-	// Therefore, you need to check the end flag yourself.
-
-	// function to determine if all threads have finished
-	auto threads_done = [&]()
-	{
-		// returns false if no one is finished
-		for (auto& f : thread_finished)
-			if (!f)
-				return false;
-		return true;
-	};
-
-	// Call back if the callback function is set.
-	auto do_a_callback = [&]()
-	{
-		if (callback_func)
-			callback_func();
-	};
-
-
-	for (uint64_t i = 0 ; ; )
-	{
-		// If all threads have finished, exit the loop.
-		if (threads_done())
-			break;
-
-		sleep(1000);
-
-		// callback_func() is called every callback_seconds.
-		if (++i == callback_seconds)
-		{
-			do_a_callback();
-			// Since I am returning from ↑, I reset the counter, so
-			// no matter how long it takes to save() etc. in do_a_callback()
-			// The next call will take a certain amount of time.
-			i = 0;
-		}
-	}
-
-	// Last save.
-	std::cout << std::endl << "finalize..";
-
-	// do_a_callback();
-	// → It should be saved by the caller, so I feel that it is not necessary here.
-
-	// It is possible that the exit code of the thread is running but the exit code of the thread is running, so
-	// We need to wait for the end with join().
-	for (auto& th : threads)
-		th.join();
-
-	// The file writing thread etc. are still running only when all threads are finished
-	// Since the work itself may not have completed, output only that all threads have finished.
-	std::cout << "all threads are joined." << std::endl;
-
-	// Restored because Options were rewritten.
-	// Restore the handler because the handler will not start unless you assign a value.
-	for (auto& s : oldOptions)
-		Options[s.first] = std::string(s.second);
-
-}
-
-
-#endif // defined(EVAL_LEARN)
diff --git a/src/learn/multi_think.h b/src/learn/multi_think.h
deleted file mode 100644
index 55edb049..00000000
--- a/src/learn/multi_think.h
+++ /dev/null
@@ -1,151 +0,0 @@
-﻿#ifndef _MULTI_THINK_
-#define _MULTI_THINK_
-
-#if defined(EVAL_LEARN)
-
-#include <functional>
-
-#include "../misc.h"
-#include "../learn/learn.h"
-#include "../thread_win32_osx.h"
-
-#include <atomic>
-
-// Learning from a game record, when making yourself think and generating a fixed track, etc.
-// Helper class used when multiple threads want to call Search::think() individually.
-// Derive and use this class.
-struct MultiThink
-{
-	MultiThink() : prng(21120903)
-	{
-		loop_count = 0;
-	}
-
-	// Call this function from the master thread, each thread will think,
-	// Return control when the thought ending condition is satisfied.
-	// Do something else.
-	// ・It is safe for each thread to call Learner::search(),qsearch()
-	// Separates the substitution table for each thread. (It will be restored after the end.)
-	// ・Book is not thread safe when in on the fly mode, so temporarily change this mode.
-	// Turn it off.
-	// [Requirements]
-	// 1) Override thread_worker()
-	// 2) Set the loop count with set_loop_max()
-	// 3) set a function to be called back periodically (if necessary)
-	// callback_func and callback_interval
-	void go_think();
-
-	// If there is something you want to initialize on the derived class side, override this,
-	// Called when initialization is completed with go_think().
-	// It is better to read the fixed trace at that timing.
-	virtual void init() {}
-
-	// A thread worker that is called by creating a thread when you go_think()
-	// Override and use this.
-	virtual void thread_worker(size_t thread_id) = 0;
-
-	// Called back every callback_seconds [seconds] when go_think().
-	std::function<void()> callback_func;
-	uint64_t callback_seconds = 600;
-
-	// Set the number of times worker processes (calls Search::think()).
-	void set_loop_max(uint64_t loop_max_) { loop_max = loop_max_; }
-
-	// Get the value set by set_loop_max().
-	uint64_t get_loop_max() const { return loop_max; }
-
-	// [ASYNC] Take the value of the loop counter and add the loop counter after taking it out.
-	// If the loop counter has reached loop_max, return UINT64_MAX.
-	// If you want to generate a phase, you must call this function at the time of generating the phase,
-	// Please note that the number of generated phases and the value of the counter will not match.
-	uint64_t get_next_loop_count() {
-		std::unique_lock<std::mutex> lk(loop_mutex);
-		if (loop_count >= loop_max)
-			return UINT64_MAX;
-		return loop_count++;
-	}
-
-	// [ASYNC] For returning the processed number. Each time it is called, it returns a counter that is incremented.
-	uint64_t get_done_count() {
-		std::unique_lock<std::mutex> lk(loop_mutex);
-		return ++done_count;
-	}
-
-	// Mutex when worker thread accesses I/O
-	std::mutex io_mutex;
-
-protected:
-	// Random number generator body
-	AsyncPRNG prng;
-
-private:
-	// number of times worker processes (calls Search::think())
-	std::atomic<uint64_t> loop_max;
-	// number of times the worker has processed (calls Search::think())
-	std::atomic<uint64_t> loop_count;
-	// To return the number of times it has been processed.
-	std::atomic<uint64_t> done_count;
-
-	// Mutex when changing the variables in ↑
-	std::mutex loop_mutex;
-
-	// Thread end flag.
-	// vector<bool> may not be reflected properly when trying to rewrite from multiple threads...
-	typedef uint8_t Flag;
-	std::vector<Flag> thread_finished;
-
-};
-
-// Mechanism to process task during idle time.
-// master passes the task with push_task_async() whenever you like.
-// When slave executes on_idle() in its spare time, it retrieves one task and continues execution until there is no queue.
-// Convenient to use when you want to write MultiThink thread worker in master-slave method.
-struct TaskDispatcher
-{
-	typedef std::function<void(size_t /* thread_id */)> Task;
-
-	// slave calls this function during idle.
-	void on_idle(size_t thread_id)
-	{
-		Task task;
-		while ((task = get_task_async()) != nullptr)
-			task(thread_id);
-
-		sleep(1);
-	}
-
-	// Stack [ASYNC] task.
-	void push_task_async(Task task)
-	{
-		std::unique_lock<std::mutex> lk(task_mutex);
-		tasks.push_back(task);
-	}
-
-	// Allocate size array elements for task in advance.
-	void task_reserve(size_t size)
-	{
-		tasks.reserve(size);
-	}
-
-protected:
-	// set of tasks
-	std::vector<Task> tasks;
-
-	// Take out one [ASYNC] task. Called from on_idle().
-	Task get_task_async()
-	{
-		std::unique_lock<std::mutex> lk(task_mutex);
-		if (tasks.size() == 0)
-			return nullptr;
-		Task task = *tasks.rbegin();
-		tasks.pop_back();
-		return task;
-	}
-
-	// a mutex for accessing tasks
-	std::mutex task_mutex;
-};
-
-#endif // defined(EVAL_LEARN) && defined(YANEURAOU_2018_OTAFUKU_ENGINE)
-
-#endif
diff --git a/src/misc.cpp b/src/misc.cpp
index 865e21fb..f46c5dd2 100644
--- a/src/misc.cpp
+++ b/src/misc.cpp
@@ -42,7 +42,6 @@ typedef bool(*fun3_t)(HANDLE, CONST GROUP_AFFINITY*, PGROUP_AFFINITY);
 #endif
 
 #include <fstream>
-#include <functional>
 #include <iomanip>
 #include <iostream>
 #include <sstream>
@@ -140,7 +139,7 @@ const string engine_info(bool to_uci) {
   string month, day, year;
   stringstream ss, date(__DATE__); // From compiler, format is "Sep 21 2008"
 
-  ss << "Stockfish+NNUE " << Version << setfill('0');
+  ss << "Stockfish NNUE " << Version << setfill('0');
 
   if (Version.empty())
   {
@@ -151,7 +150,7 @@ const string engine_info(bool to_uci) {
   ss << (Is64Bit ? " 64" : "")
      << (HasPext ? " BMI2" : (HasPopCnt ? " POPCNT" : ""))
      << (to_uci  ? "\nid author ": " by ")
-     << "T. Romstad, M. Costalba, J. Kiiski, G. Linscott, H. Noda, Y. Nasu, M. Isozaki";
+     << "T. Romstad, M. Costalba, J. Kiiski, G. Linscott";
 
   return ss.str();
 }
@@ -371,8 +370,8 @@ void* aligned_ttmem_alloc(size_t allocSize, void*& mem) {
   {
       if (mem)
           sync_cout << "info string Hash table allocation: Windows large pages used." << sync_endl;
-      //else
-          //sync_cout << "info string Hash table allocation: Windows large pages not used." << sync_endl;
+      else
+          sync_cout << "info string Hash table allocation: Windows large pages not used." << sync_endl;
   }
   firstCall = false;
 
@@ -527,163 +526,18 @@ void bindThisThread(size_t idx) {
 
 } // namespace WinProcGroup
 
-// Returns a string that represents the current time. (Used when learning evaluation functions)
-std::string now_string()
-{
-  // Using std::ctime(), localtime() gives a warning that MSVC is not secure.
-  // This shouldn't happen in the C++ standard, but...
-
-#if defined(_MSC_VER)
-  // C4996 : 'ctime' : This function or variable may be unsafe.Consider using ctime_s instead.
-#pragma warning(disable : 4996)
-#endif
-
-  auto now = std::chrono::system_clock::now();
-  auto tp = std::chrono::system_clock::to_time_t(now);
-  auto result = string(std::ctime(&tp));
-
-  // remove line endings if they are included at the end
-  while (*result.rbegin() == '\n' || (*result.rbegin() == '\r'))
-    result.pop_back();
-  return result;
-}
-
-void sleep(int ms)
-{
-	std::this_thread::sleep_for(std::chrono::milliseconds(ms));
-}
-
 void* aligned_malloc(size_t size, size_t align)
 {
-	void* p = _mm_malloc(size, align);
-	if (p == nullptr)
-	{
-		std::cout << "info string can't allocate memory. sise = " << size << std::endl;
-		exit(1);
-	}
-	return p;
+ void* p = _mm_malloc(size, align);
+ if (p == nullptr)
+ {
+   std::cout << "info string can't allocate memory. sise = " << size << std::endl;
+   exit(1);
+ }
+ return p;
 }
 
-int read_file_to_memory(std::string filename, std::function<void* (uint64_t)> callback_func)
+void aligned_free(void* ptr)
 {
-  fstream fs(filename, ios::in | ios::binary);
-  if (fs.fail())
-    return 1;
-
-  fs.seekg(0, fstream::end);
-  uint64_t eofPos = (uint64_t)fs.tellg();
-  fs.clear(); // Otherwise the next seek may fail.
-  fs.seekg(0, fstream::beg);
-  uint64_t begPos = (uint64_t)fs.tellg();
-  uint64_t file_size = eofPos - begPos;
-  //std::cout << "filename = " << filename << " , file_size = " << file_size << endl;
-
-  // I know the file size, so call callback_func to get a buffer for this,
-  // Get the pointer.
-  void* ptr = callback_func(file_size);
-
-  // If the buffer could not be secured, or if the file size is different from the expected file size,
-  // It is supposed to return nullptr. At this time, reading is interrupted and an error is returned.
-  if (ptr == nullptr)
-    return 2;
-
-  // read in pieces
-
-  const uint64_t block_size = 1024 * 1024 * 1024; // number of elements to read in one read (1GB)
-  for (uint64_t pos = 0; pos < file_size; pos += block_size)
-  {
-    // size to read this time
-    uint64_t read_size = (pos + block_size < file_size) ? block_size : (file_size - pos);
-    fs.read((char*)ptr + pos, read_size);
-
-    // Read error occurred in the middle of the file.
-    if (fs.fail())
-      return 2;
-
-    //cout << ".";
-  }
-  fs.close();
-
-  return 0;
+  _mm_free(ptr);
 }
-
-int write_memory_to_file(std::string filename, void* ptr, uint64_t size)
-{
-  fstream fs(filename, ios::out | ios::binary);
-  if (fs.fail())
-    return 1;
-
-  const uint64_t block_size = 1024 * 1024 * 1024; // number of elements to write in one write (1GB)
-  for (uint64_t pos = 0; pos < size; pos += block_size)
-  {
-    // Memory size to write this time
-    uint64_t write_size = (pos + block_size < size) ? block_size : (size - pos);
-    fs.write((char*)ptr + pos, write_size);
-    //cout << ".";
-  }
-  fs.close();
-  return 0;
-}
-
-// ----------------------------
-//     mkdir wrapper
-// ----------------------------
-
-// Specify relative to the current folder. Returns 0 on success, non-zero on failure.
-// Create a folder. Japanese is not used.
-// In case of gcc under msys2 environment, folder creation fails with _wmkdir(). Cause unknown.
-// Use _mkdir() because there is no help for it.
-
-#if defined(_WIN32)
-// for Windows
-
-#if defined(_MSC_VER)
-#include <codecvt> // I need this because I want wstring to mkdir
-#include <locale> // This is required for wstring_convert.
-
-namespace Dependency {
-  int mkdir(std::string dir_name)
-  {
-    std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t> cv;
-    return _wmkdir(cv.from_bytes(dir_name).c_str());
-    //	::CreateDirectory(cv.from_bytes(dir_name).c_str(),NULL);
-  }
-}
-
-#elif defined(__GNUC__) 
-
-#include <direct.h>
-namespace Dependency {
-  int mkdir(std::string dir_name)
-  {
-    return _mkdir(dir_name.c_str());
-  }
-}
-
-#endif
-#elif defined(__linux__)
-
-// In the linux environment, this symbol _LINUX is defined in the makefile.
-
-// mkdir implementation for Linux.
-#include "sys/stat.h"
-
-namespace Dependency {
-  int mkdir(std::string dir_name)
-  {
-    return ::mkdir(dir_name.c_str(), 0777);
-  }
-}
-#else
-
-// In order to judge whether it is a Linux environment, we have to divide the makefile..
-// The function to dig a folder on linux is good for the time being... Only used to save the evaluation function file...
-
-namespace Dependency {
-  int mkdir(std::string dir_name)
-  {
-    return 0;
-  }
-}
-
-#endif
diff --git a/src/misc.h b/src/misc.h
index 0e2e8403..090f7186 100644
--- a/src/misc.h
+++ b/src/misc.h
@@ -21,20 +21,13 @@
 #ifndef MISC_H_INCLUDED
 #define MISC_H_INCLUDED
 
-#include <algorithm>
 #include <cassert>
 #include <chrono>
-#include <functional>
-#include <mutex>
 #include <ostream>
 #include <string>
 #include <vector>
-#ifndef _MSC_VER
-#include <mm_malloc.h>
-#endif
 
 #include "types.h"
-#include "thread_win32_osx.h"
 
 const std::string engine_info(bool to_uci = false);
 const std::string compiler_info();
@@ -115,20 +108,8 @@ public:
   /// Output values only have 1/8th of their bits set on average.
   template<typename T> T sparse_rand()
   { return T(rand64() & rand64() & rand64()); }
-  // Returns a random number from 0 to n-1. (Not uniform distribution, but this is enough in reality)
-  uint64_t rand(uint64_t n) { return rand<uint64_t>() % n; }
-
-  // Return the random seed used internally.
-  uint64_t get_seed() const { return s; }
 };
 
-// Display a random seed. (For debugging)
-inline std::ostream& operator<<(std::ostream& os, PRNG& prng)
-{
-  os << "PRNG::seed = " << std::hex << prng.get_seed() << std::dec;
-  return os;
-}
-
 inline uint64_t mul_hi64(uint64_t a, uint64_t b) {
 #if defined(__GNUC__) && defined(IS_64BIT)
     __extension__ typedef unsigned __int128 uint128;
@@ -152,155 +133,8 @@ inline uint64_t mul_hi64(uint64_t a, uint64_t b) {
 namespace WinProcGroup {
   void bindThisThread(size_t idx);
 }
-// sleep for the specified number of milliseconds.
-extern void sleep(int ms);
-
-// Returns a string that represents the current time. (Used for log output when learning evaluation function)
-std::string now_string();
-
-// wrapper for end processing on the way
-static void my_exit()
-{
-	sleep(3000); // It is bad to finish before the error message is output, so put wait.
-	exit(EXIT_FAILURE);
-}
-
-// When compiled with gcc/clang such as msys2, Windows Subsystem for Linux,
-// In C++ std::ifstream, ::read() is a wrapper for that because it is not possible to read and write files larger than 2GB in one shot.
-//
-// callback_func of the argument of read_file_to_memory() uses the file size as an argument when the file can be opened
-// It will be called back, so if you allocate a buffer and pass a function that returns the first pointer, it will be read there.
-// These functions return non-zero on error, such as when the file cannot be found.
-//
-// Also, if the buffer cannot be allocated in the callback function or if the file size is different from the expected file size,
-// Return nullptr. At this time, read_file_to_memory() interrupts reading and returns with an error.
-
-int read_file_to_memory(std::string filename, std::function<void* (uint64_t)> callback_func);
-int write_memory_to_file(std::string filename, void* ptr, uint64_t size);
-
-// --------------------
-// async version of PRNG
-// --------------------
-
-// async version of PRNG
-struct AsyncPRNG
-{
-  AsyncPRNG(uint64_t seed) : prng(seed) { assert(seed); }
-  // [ASYNC] Extract one random number.
-  template<typename T> T rand() {
-    std::unique_lock<std::mutex> lk(mutex);
-    return prng.rand<T>();
-  }
-
-  // [ASYNC] Returns a random number from 0 to n-1. (Not uniform distribution, but this is enough in reality)
-  uint64_t rand(uint64_t n) {
-    std::unique_lock<std::mutex> lk(mutex);
-    return prng.rand(n);
-  }
-
-  // Return the random seed used internally.
-  uint64_t get_seed() const { return prng.get_seed(); }
-
-protected:
-  std::mutex mutex;
-  PRNG prng;
-};
-
-// Display a random seed. (For debugging)
-inline std::ostream& operator<<(std::ostream& os, AsyncPRNG& prng)
-{
-  os << "AsyncPRNG::seed = " << std::hex << prng.get_seed() << std::dec;
-  return os;
-}
-
-// --------------------
-//       Math
-// --------------------
-
-// Mathematical function used for progress calculation and learning
-namespace Math {
-	// Sigmoid function
-	// = 1.0 / (1.0 + std::exp(-x))
-	double sigmoid(double x);
-
-	// Differentiation of sigmoid function
-	// = sigmoid(x) * (1.0-sigmoid(x))
-	double dsigmoid(double x);
-
-	// Clip v so that it fits between [lo,hi].
-	// * In Stockfish, this function is written in bitboard.h.
-	template<class T> constexpr const T& clamp(const T& v, const T& lo, const T& hi) {
-		return v < lo ? lo : v > hi ? hi : v;
-	}
-
-}
-
-// --------------------
-//       Path
-// --------------------
-
-// Something like Path class in C#. File name manipulation.
-// Match with the C# method name.
-struct Path
-{
-	// Combine the path name and file name and return it.
-	// If the folder name is not an empty string, append it if there is no'/' or'\\' at the end.
-	static std::string Combine(const std::string& folder, const std::string& filename)
-	{
-		if (folder.length() >= 1 && *folder.rbegin() != '/' && *folder.rbegin() != '\\')
-			return folder + "/" + filename;
-
-		return folder + filename;
-	}
-
-	// Get the file name part (excluding the folder name) from the full path expression.
-	static std::string GetFileName(const std::string& path)
-	{
-		// I don't know which "\" or "/" is used.
-		auto path_index1 = path.find_last_of("\\") + 1;
-		auto path_index2 = path.find_last_of("/") + 1;
-		auto path_index = std::max(path_index1, path_index2);
-
-		return path.substr(path_index);
-	}
-};
 
 extern void* aligned_malloc(size_t size, size_t align);
-static void aligned_free(void* ptr) { _mm_free(ptr); }
-
-// It is ignored when new even though alignas is specified & because it is ignored when the STL container allocates memory,
-// A custom allocator used for that.
-template <typename T>
-class AlignedAllocator {
-public:
-  using value_type = T;
-
-  AlignedAllocator() {}
-  AlignedAllocator(const AlignedAllocator&) {}
-  AlignedAllocator(AlignedAllocator&&) {}
-
-  template <typename U> AlignedAllocator(const AlignedAllocator<U>&) {}
-
-  T* allocate(std::size_t n) { return (T*)aligned_malloc(n * sizeof(T), alignof(T)); }
-  void deallocate(T* p, std::size_t n) { aligned_free(p); }
-};
-
-// --------------------
-//  Dependency Wrapper
-// --------------------
-
-namespace Dependency
-{
-  // In the Linux environment, if you getline() the text file is'\r\n'
-  // Since'\r' remains at the end, write a wrapper to remove this'\r'.
-  // So when calling getline() on fstream,
-  // just write getline() instead of std::getline() and use this function.
-  extern bool getline(std::ifstream& fs, std::string& s);
-
-  // Create a folder.
-  // Specify relative to the current folder. Japanese is not used for dir_name.
-  // Returns 0 on success, non-zero on failure.
-  extern int mkdir(std::string dir_name);
-}
+extern void aligned_free(void* ptr);
 
 #endif // #ifndef MISC_H_INCLUDED
diff --git a/src/movegen.h b/src/movegen.h
index d5f82f16..c2e7c3f1 100644
--- a/src/movegen.h
+++ b/src/movegen.h
@@ -68,9 +68,6 @@ struct MoveList {
     return std::find(begin(), end(), move) != end();
   }
 
-  // returns the i th element
-  const ExtMove at(size_t i) const { assert(0 <= i && i < size()); return begin()[i]; }
-
 private:
   ExtMove moveList[MAX_MOVES], *last;
 };
diff --git a/src/eval/nnue/architectures/halfkp_256x2-32-32.h b/src/nnue/architectures/halfkp_256x2-32-32.h
similarity index 82%
rename from src/eval/nnue/architectures/halfkp_256x2-32-32.h
rename to src/nnue/architectures/halfkp_256x2-32-32.h
index 467d0222..52349ed2 100644
--- a/src/eval/nnue/architectures/halfkp_256x2-32-32.h
+++ b/src/nnue/architectures/halfkp_256x2-32-32.h
@@ -1,7 +1,7 @@
 ﻿// Definition of input features and network structure used in NNUE evaluation function
 
-#ifndef HALFKP_256X2_32_32_H
-#define HALFKP_256X2_32_32_H
+#ifndef NNUE_HALFKP_256X2_32_32_H_INCLUDED
+#define NNUE_HALFKP_256X2_32_32_H_INCLUDED
 
 #include "../features/feature_set.h"
 #include "../features/half_kp.h"
@@ -10,9 +10,7 @@
 #include "../layers/affine_transform.h"
 #include "../layers/clipped_relu.h"
 
-namespace Eval {
-
-namespace NNUE {
+namespace Eval::NNUE {
 
 // Input features used in evaluation function
 using RawFeatures = Features::FeatureSet<
@@ -33,7 +31,6 @@ using OutputLayer = AffineTransform<HiddenLayer2, 1>;
 
 using Network = Layers::OutputLayer;
 
-}  // namespace NNUE
+}  // namespace Eval::NNUE
 
-}  // namespace Eval
-#endif // HALFKP_256X2_32_32_H
+#endif // #ifndef NNUE_HALFKP_256X2_32_32_H_INCLUDED
diff --git a/src/eval/nnue/architectures/halfkp_384x2-32-32.h b/src/nnue/architectures/halfkp_384x2-32-32.h
similarity index 82%
rename from src/eval/nnue/architectures/halfkp_384x2-32-32.h
rename to src/nnue/architectures/halfkp_384x2-32-32.h
index 3d28139a..a29d481d 100644
--- a/src/eval/nnue/architectures/halfkp_384x2-32-32.h
+++ b/src/nnue/architectures/halfkp_384x2-32-32.h
@@ -1,7 +1,7 @@
 ﻿// Definition of input features and network structure used in NNUE evaluation function
 
-#ifndef HALFKP_384X2_32_32_H
-#define HALFKP_384X2_32_32_H
+#ifndef NNUE_HALFKP_384X2_32_32_H_INCLUDED
+#define NNUE_HALFKP_384X2_32_32_H_INCLUDED
 
 #include "../features/feature_set.h"
 #include "../features/half_kp.h"
@@ -10,9 +10,7 @@
 #include "../layers/affine_transform.h"
 #include "../layers/clipped_relu.h"
 
-namespace Eval {
-
-namespace NNUE {
+namespace Eval::NNUE {
 
 // Input features used in evaluation function
 using RawFeatures = Features::FeatureSet<
@@ -33,7 +31,6 @@ using OutputLayer = AffineTransform<HiddenLayer2, 1>;
 
 using Network = Layers::OutputLayer;
 
-}  // namespace NNUE
+}  // namespace Eval::NNUE
 
-}  // namespace Eval
-#endif // HALFKP_384X2_32_32_H
+#endif // #ifndef NNUE_HALFKP_384X2_32_32_H_INCLUDED
diff --git a/src/nnue/evaluate_nnue.cpp b/src/nnue/evaluate_nnue.cpp
new file mode 100644
index 00000000..61c7d444
--- /dev/null
+++ b/src/nnue/evaluate_nnue.cpp
@@ -0,0 +1,168 @@
+﻿// Code for calculating NNUE evaluation function
+
+#include <fstream>
+#include <iostream>
+#include <set>
+
+#include "../evaluate.h"
+#include "../position.h"
+#include "../misc.h"
+#include "../uci.h"
+
+#include "evaluate_nnue.h"
+
+ExtPieceSquare kpp_board_index[PIECE_NB] = {
+ // convention: W - us, B - them
+ // viewed from other side, W and B are reversed
+    { PS_NONE,     PS_NONE     },
+    { PS_W_PAWN,   PS_B_PAWN   },
+    { PS_W_KNIGHT, PS_B_KNIGHT },
+    { PS_W_BISHOP, PS_B_BISHOP },
+    { PS_W_ROOK,   PS_B_ROOK   },
+    { PS_W_QUEEN,  PS_B_QUEEN  },
+    { PS_W_KING,   PS_B_KING   },
+    { PS_NONE,     PS_NONE     },
+    { PS_NONE,     PS_NONE     },
+    { PS_B_PAWN,   PS_W_PAWN   },
+    { PS_B_KNIGHT, PS_W_KNIGHT },
+    { PS_B_BISHOP, PS_W_BISHOP },
+    { PS_B_ROOK,   PS_W_ROOK   },
+    { PS_B_QUEEN,  PS_W_QUEEN  },
+    { PS_B_KING,   PS_W_KING   },
+    { PS_NONE,     PS_NONE     }
+};
+
+
+namespace Eval::NNUE {
+
+  // Input feature converter
+  AlignedPtr<FeatureTransformer> feature_transformer;
+
+  // Evaluation function
+  AlignedPtr<Network> network;
+
+  // Evaluation function file name
+  std::string fileName = "nn.bin";
+
+  // Get a string that represents the structure of the evaluation function
+  std::string GetArchitectureString() {
+
+    return "Features=" + FeatureTransformer::GetStructureString() +
+        ",Network=" + Network::GetStructureString();
+  }
+
+  namespace Detail {
+
+  // Initialize the evaluation function parameters
+  template <typename T>
+  void Initialize(AlignedPtr<T>& pointer) {
+
+    pointer.reset(reinterpret_cast<T*>(aligned_malloc(sizeof(T), alignof(T))));
+    std::memset(pointer.get(), 0, sizeof(T));
+  }
+
+  // read evaluation function parameters
+  template <typename T>
+  bool ReadParameters(std::istream& stream, const AlignedPtr<T>& pointer) {
+
+    std::uint32_t header;
+    stream.read(reinterpret_cast<char*>(&header), sizeof(header));
+    if (!stream || header != T::GetHashValue()) return false;
+    return pointer->ReadParameters(stream);
+  }
+
+  }  // namespace Detail
+
+  // Initialize the evaluation function parameters
+  void Initialize() {
+
+    Detail::Initialize(feature_transformer);
+    Detail::Initialize(network);
+  }
+
+  // read the header
+  bool ReadHeader(std::istream& stream,
+    std::uint32_t* hash_value, std::string* architecture) {
+
+    std::uint32_t version, size;
+    stream.read(reinterpret_cast<char*>(&version), sizeof(version));
+    stream.read(reinterpret_cast<char*>(hash_value), sizeof(*hash_value));
+    stream.read(reinterpret_cast<char*>(&size), sizeof(size));
+    if (!stream || version != kVersion) return false;
+    architecture->resize(size);
+    stream.read(&(*architecture)[0], size);
+    return !stream.fail();
+  }
+
+  // read evaluation function parameters
+  bool ReadParameters(std::istream& stream) {
+
+    std::uint32_t hash_value;
+    std::string architecture;
+    if (!ReadHeader(stream, &hash_value, &architecture)) return false;
+    if (hash_value != kHashValue) return false;
+    if (!Detail::ReadParameters(stream, feature_transformer)) return false;
+    if (!Detail::ReadParameters(stream, network)) return false;
+    return stream && stream.peek() == std::ios::traits_type::eof();
+  }
+
+  // proceed if you can calculate the difference
+  static void UpdateAccumulatorIfPossible(const Position& pos) {
+
+    feature_transformer->UpdateAccumulatorIfPossible(pos);
+  }
+
+  // Calculate the evaluation value
+  static Value ComputeScore(const Position& pos, bool refresh) {
+
+    auto& accumulator = pos.state()->accumulator;
+    if (!refresh && accumulator.computed_score) {
+      return accumulator.score;
+    }
+
+    alignas(kCacheLineSize) TransformedFeatureType
+        transformed_features[FeatureTransformer::kBufferSize];
+    feature_transformer->Transform(pos, transformed_features, refresh);
+    alignas(kCacheLineSize) char buffer[Network::kBufferSize];
+    const auto output = network->Propagate(transformed_features, buffer);
+
+    auto score = static_cast<Value>(output[0] / FV_SCALE);
+
+    accumulator.score = score;
+    accumulator.computed_score = true;
+    return accumulator.score;
+  }
+
+  // read the evaluation function file
+  // Save and restore Options with bench command etc., so EvalFile is changed at this time,
+  // This function may be called twice to flag that the evaluation function needs to be reloaded.
+  void load_eval(const std::string& evalFile) {
+
+    Initialize();
+    fileName = evalFile;
+
+    std::ifstream stream(evalFile, std::ios::binary);
+    const bool result = ReadParameters(stream);
+
+    if (!result)
+        std::cout << "Error! " << fileName << " not found or wrong format" << std::endl;
+    else
+        std::cout << "info string NNUE " << fileName << " found & loaded" << std::endl;
+  }
+
+  // Evaluation function. Perform differential calculation.
+  Value evaluate(const Position& pos) {
+    return ComputeScore(pos, false);
+  }
+
+  // Evaluation function. Perform full calculation.
+  Value compute_eval(const Position& pos) {
+    return ComputeScore(pos, true);
+  }
+
+  // proceed if you can calculate the difference
+  void update_eval(const Position& pos) {
+    UpdateAccumulatorIfPossible(pos);
+  }
+
+} // namespace Eval::NNUE
diff --git a/src/nnue/evaluate_nnue.h b/src/nnue/evaluate_nnue.h
new file mode 100644
index 00000000..f35779e2
--- /dev/null
+++ b/src/nnue/evaluate_nnue.h
@@ -0,0 +1,50 @@
+﻿// header used in NNUE evaluation function
+
+#ifndef NNUE_EVALUATE_NNUE_H_INCLUDED
+#define NNUE_EVALUATE_NNUE_H_INCLUDED
+
+#include "nnue_feature_transformer.h"
+#include "nnue_architecture.h"
+
+#include <memory>
+
+namespace Eval::NNUE {
+
+  // hash value of evaluation function structure
+  constexpr std::uint32_t kHashValue =
+      FeatureTransformer::GetHashValue() ^ Network::GetHashValue();
+
+  // Deleter for automating release of memory area
+  template <typename T>
+  struct AlignedDeleter {
+    void operator()(T* ptr) const {
+      ptr->~T();
+      aligned_free(ptr);
+    }
+  };
+
+  template <typename T>
+  using AlignedPtr = std::unique_ptr<T, AlignedDeleter<T>>;
+
+  // Input feature converter
+  extern AlignedPtr<FeatureTransformer> feature_transformer;
+
+  // Evaluation function
+  extern AlignedPtr<Network> network;
+
+  // Evaluation function file name
+  extern std::string fileName;
+
+  // Get a string that represents the structure of the evaluation function
+  std::string GetArchitectureString();
+
+  // read the header
+  bool ReadHeader(std::istream& stream,
+      std::uint32_t* hash_value, std::string* architecture);
+
+  // read evaluation function parameters
+  bool ReadParameters(std::istream& stream);
+
+}  // namespace Eval::NNUE
+
+#endif // #ifndef NNUE_EVALUATE_NNUE_H_INCLUDED
diff --git a/src/nnue/features/feature_set.h b/src/nnue/features/feature_set.h
new file mode 100644
index 00000000..bd960929
--- /dev/null
+++ b/src/nnue/features/feature_set.h
@@ -0,0 +1,251 @@
+﻿// A class template that represents the input feature set of the NNUE evaluation function
+
+#ifndef NNUE_FEATURE_SET_H_INCLUDED
+#define NNUE_FEATURE_SET_H_INCLUDED
+
+#include "features_common.h"
+#include <array>
+
+namespace Eval::NNUE::Features {
+
+  // A class template that represents a list of values
+  template <typename T, T... Values>
+  struct CompileTimeList;
+
+  template <typename T, T First, T... Remaining>
+  struct CompileTimeList<T, First, Remaining...> {
+    static constexpr bool Contains(T value) {
+      return value == First || CompileTimeList<T, Remaining...>::Contains(value);
+    }
+    static constexpr std::array<T, sizeof...(Remaining) + 1>
+        kValues = {{First, Remaining...}};
+  };
+
+  template <typename T, T First, T... Remaining>
+  constexpr std::array<T, sizeof...(Remaining) + 1>
+      CompileTimeList<T, First, Remaining...>::kValues;
+
+  template <typename T>
+  struct CompileTimeList<T> {
+    static constexpr bool Contains(T /*value*/) {
+      return false;
+    }
+    static constexpr std::array<T, 0> kValues = {{}};
+  };
+
+  // Class template that adds to the beginning of the list
+  template <typename T, typename ListType, T Value>
+  struct AppendToList;
+
+  template <typename T, T... Values, T AnotherValue>
+  struct AppendToList<T, CompileTimeList<T, Values...>, AnotherValue> {
+    using Result = CompileTimeList<T, AnotherValue, Values...>;
+  };
+
+  // Class template for adding to a sorted, unique list
+  template <typename T, typename ListType, T Value>
+  struct InsertToSet;
+
+  template <typename T, T First, T... Remaining, T AnotherValue>
+  struct InsertToSet<T, CompileTimeList<T, First, Remaining...>, AnotherValue> {
+    using Result = std::conditional_t<
+        CompileTimeList<T, First, Remaining...>::Contains(AnotherValue),
+        CompileTimeList<T, First, Remaining...>,
+        std::conditional_t<(AnotherValue <First),
+            CompileTimeList<T, AnotherValue, First, Remaining...>,
+            typename AppendToList<T, typename InsertToSet<
+                T, CompileTimeList<T, Remaining...>, AnotherValue>::Result,
+                First>::Result>>;
+  };
+
+  template <typename T, T Value>
+  struct InsertToSet<T, CompileTimeList<T>, Value> {
+    using Result = CompileTimeList<T, Value>;
+  };
+
+  // Base class of feature set
+  template <typename Derived>
+  class FeatureSetBase {
+
+   public:
+    // Get a list of indices with a value of 1 among the features
+    template <typename IndexListType>
+    static void AppendActiveIndices(
+        const Position& pos, TriggerEvent trigger, IndexListType active[2]) {
+
+      for (Color perspective : { WHITE, BLACK }) {
+        Derived::CollectActiveIndices(
+            pos, trigger, perspective, &active[perspective]);
+      }
+    }
+
+    // Get a list of indices whose values ​​have changed from the previous one in the feature quantity
+    template <typename PositionType, typename IndexListType>
+    static void AppendChangedIndices(
+        const PositionType& pos, TriggerEvent trigger,
+        IndexListType removed[2], IndexListType added[2], bool reset[2]) {
+
+      const auto& dp = pos.state()->dirtyPiece;
+      if (dp.dirty_num == 0) return;
+
+      for (Color perspective : { WHITE, BLACK }) {
+        reset[perspective] = false;
+        switch (trigger) {
+          case TriggerEvent::kNone:
+            break;
+          case TriggerEvent::kFriendKingMoved:
+            reset[perspective] =
+                dp.pieceId[0] == PIECE_ID_KING + perspective;
+            break;
+          case TriggerEvent::kEnemyKingMoved:
+            reset[perspective] =
+                dp.pieceId[0] == PIECE_ID_KING + ~perspective;
+            break;
+          case TriggerEvent::kAnyKingMoved:
+            reset[perspective] = dp.pieceId[0] >= PIECE_ID_KING;
+            break;
+          case TriggerEvent::kAnyPieceMoved:
+            reset[perspective] = true;
+            break;
+          default:
+            assert(false);
+            break;
+        }
+        if (reset[perspective]) {
+          Derived::CollectActiveIndices(
+              pos, trigger, perspective, &added[perspective]);
+        } else {
+          Derived::CollectChangedIndices(
+              pos, trigger, perspective,
+              &removed[perspective], &added[perspective]);
+        }
+      }
+    }
+  };
+
+  // Class template that represents the feature set
+  // do internal processing in reverse order of template arguments in order to linearize the amount of calculation at runtime
+  template <typename FirstFeatureType, typename... RemainingFeatureTypes>
+  class FeatureSet<FirstFeatureType, RemainingFeatureTypes...> :
+      public FeatureSetBase<
+          FeatureSet<FirstFeatureType, RemainingFeatureTypes...>> {
+
+   private:
+    using Head = FirstFeatureType;
+    using Tail = FeatureSet<RemainingFeatureTypes...>;
+
+   public:
+    // Hash value embedded in the evaluation function file
+    static constexpr std::uint32_t kHashValue =
+        Head::kHashValue ^ (Tail::kHashValue << 1) ^ (Tail::kHashValue >> 31);
+    // number of feature dimensions
+    static constexpr IndexType kDimensions =
+        Head::kDimensions + Tail::kDimensions;
+    // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
+    static constexpr IndexType kMaxActiveDimensions =
+        Head::kMaxActiveDimensions + Tail::kMaxActiveDimensions;
+    // List of timings to perform all calculations instead of difference calculation
+    using SortedTriggerSet = typename InsertToSet<TriggerEvent,
+        typename Tail::SortedTriggerSet, Head::kRefreshTrigger>::Result;
+    static constexpr auto kRefreshTriggers = SortedTriggerSet::kValues;
+
+    // Get the feature quantity name
+    static std::string GetName() {
+      return std::string(Head::kName) + "+" + Tail::GetName();
+    }
+
+   private:
+    // Get a list of indices with a value of 1 among the features
+    template <typename IndexListType>
+    static void CollectActiveIndices(
+        const Position& pos, const TriggerEvent trigger, const Color perspective,
+        IndexListType* const active) {
+
+      Tail::CollectActiveIndices(pos, trigger, perspective, active);
+      if (Head::kRefreshTrigger == trigger) {
+        const auto start = active->size();
+        Head::AppendActiveIndices(pos, perspective, active);
+        for (auto i = start; i < active->size(); ++i) {
+          (*active)[i] += Tail::kDimensions;
+        }
+      }
+    }
+
+    // Get a list of indices whose values ​​have changed from the previous one in the feature quantity
+    template <typename IndexListType>
+    static void CollectChangedIndices(
+        const Position& pos, const TriggerEvent trigger, const Color perspective,
+        IndexListType* const removed, IndexListType* const added) {
+
+      Tail::CollectChangedIndices(pos, trigger, perspective, removed, added);
+      if (Head::kRefreshTrigger == trigger) {
+        const auto start_removed = removed->size();
+        const auto start_added = added->size();
+        Head::AppendChangedIndices(pos, perspective, removed, added);
+        for (auto i = start_removed; i < removed->size(); ++i) {
+          (*removed)[i] += Tail::kDimensions;
+        }
+        for (auto i = start_added; i < added->size(); ++i) {
+          (*added)[i] += Tail::kDimensions;
+        }
+      }
+    }
+
+    // Make the base class and the class template that recursively uses itself a friend
+    friend class FeatureSetBase<FeatureSet>;
+    template <typename... FeatureTypes>
+    friend class FeatureSet;
+  };
+
+  // Class template that represents the feature set
+  // Specialization with one template argument
+  template <typename FeatureType>
+  class FeatureSet<FeatureType> : public FeatureSetBase<FeatureSet<FeatureType>> {
+
+   public:
+    // Hash value embedded in the evaluation function file
+    static constexpr std::uint32_t kHashValue = FeatureType::kHashValue;
+    // number of feature dimensions
+    static constexpr IndexType kDimensions = FeatureType::kDimensions;
+    // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
+    static constexpr IndexType kMaxActiveDimensions =
+        FeatureType::kMaxActiveDimensions;
+    // List of timings to perform all calculations instead of difference calculation
+    using SortedTriggerSet =
+        CompileTimeList<TriggerEvent, FeatureType::kRefreshTrigger>;
+    static constexpr auto kRefreshTriggers = SortedTriggerSet::kValues;
+
+    // Get the feature quantity name
+    static std::string GetName() {
+      return FeatureType::kName;
+    }
+
+   private:
+    // Get a list of indices with a value of 1 among the features
+    static void CollectActiveIndices(
+        const Position& pos, const TriggerEvent trigger, const Color perspective,
+        IndexList* const active) {
+      if (FeatureType::kRefreshTrigger == trigger) {
+        FeatureType::AppendActiveIndices(pos, perspective, active);
+      }
+    }
+
+    // Get a list of indices whose values ​​have changed from the previous one in the feature quantity
+    static void CollectChangedIndices(
+        const Position& pos, const TriggerEvent trigger, const Color perspective,
+        IndexList* const removed, IndexList* const added) {
+
+      if (FeatureType::kRefreshTrigger == trigger) {
+        FeatureType::AppendChangedIndices(pos, perspective, removed, added);
+      }
+    }
+
+    // Make the base class and the class template that recursively uses itself a friend
+    friend class FeatureSetBase<FeatureSet>;
+    template <typename... FeatureTypes>
+    friend class FeatureSet;
+  };
+
+}  // namespace Eval::NNUE::Features
+
+#endif // #ifndef NNUE_FEATURE_SET_H_INCLUDED
diff --git a/src/nnue/features/features_common.h b/src/nnue/features/features_common.h
new file mode 100644
index 00000000..8d2c54e0
--- /dev/null
+++ b/src/nnue/features/features_common.h
@@ -0,0 +1,37 @@
+﻿//Common header of input features of NNUE evaluation function
+
+#ifndef NNUE_FEATURES_COMMON_H_INCLUDED
+#define NNUE_FEATURES_COMMON_H_INCLUDED
+
+#include "../../evaluate.h"
+#include "../nnue_common.h"
+
+namespace Eval::NNUE::Features {
+
+  // Index list type
+  class IndexList;
+
+  // Class template that represents the feature set
+  template <typename... FeatureTypes>
+  class FeatureSet;
+
+  // Type of timing to perform all calculations instead of difference calculation
+  enum class TriggerEvent {
+
+    kNone, // Calculate the difference whenever possible
+    kFriendKingMoved, // calculate all when own king moves
+    kEnemyKingMoved, // do all calculations when enemy king moves
+    kAnyKingMoved, // do all calculations if either king moves
+    kAnyPieceMoved, // always do all calculations
+  };
+
+  // turn side or other side
+  enum class Side {
+
+    kFriend, // turn side
+    kEnemy, // opponent
+  };
+
+}  // namespace Eval::NNUE::Features
+
+#endif // #ifndef NNUE_FEATURES_COMMON_H_INCLUDED
diff --git a/src/nnue/features/half_kp.cpp b/src/nnue/features/half_kp.cpp
new file mode 100644
index 00000000..382f0ba0
--- /dev/null
+++ b/src/nnue/features/half_kp.cpp
@@ -0,0 +1,75 @@
+﻿//Definition of input features HalfKP of NNUE evaluation function
+
+#include "half_kp.h"
+#include "index_list.h"
+
+namespace Eval::NNUE::Features {
+
+  // Find the index of the feature quantity from the king position and PieceSquare
+  template <Side AssociatedKing>
+  inline IndexType HalfKP<AssociatedKing>::MakeIndex(Square sq_k, PieceSquare p) {
+    return static_cast<IndexType>(PS_END) * static_cast<IndexType>(sq_k) + p;
+  }
+
+  // Get the piece information
+  template <Side AssociatedKing>
+  inline void HalfKP<AssociatedKing>::GetPieces(
+      const Position& pos, Color perspective,
+      PieceSquare** pieces, Square* sq_target_k) {
+
+    *pieces = (perspective == BLACK) ?
+        pos.eval_list()->piece_list_fb() :
+        pos.eval_list()->piece_list_fw();
+    const PieceId target = (AssociatedKing == Side::kFriend) ?
+        static_cast<PieceId>(PIECE_ID_KING + perspective) :
+        static_cast<PieceId>(PIECE_ID_KING + ~perspective);
+    *sq_target_k = static_cast<Square>(((*pieces)[target] - PS_W_KING) % SQUARE_NB);
+  }
+
+  // Get a list of indices with a value of 1 among the features
+  template <Side AssociatedKing>
+  void HalfKP<AssociatedKing>::AppendActiveIndices(
+      const Position& pos, Color perspective, IndexList* active) {
+
+    // do nothing if array size is small to avoid compiler warning
+    if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;
+
+    PieceSquare* pieces;
+    Square sq_target_k;
+    GetPieces(pos, perspective, &pieces, &sq_target_k);
+    for (PieceId i = PIECE_ID_ZERO; i < PIECE_ID_KING; ++i) {
+      if (pieces[i] != PS_NONE) {
+        active->push_back(MakeIndex(sq_target_k, pieces[i]));
+      }
+    }
+  }
+
+  // Get a list of indices whose values ​​have changed from the previous one in the feature quantity
+  template <Side AssociatedKing>
+  void HalfKP<AssociatedKing>::AppendChangedIndices(
+      const Position& pos, Color perspective,
+      IndexList* removed, IndexList* added) {
+
+    PieceSquare* pieces;
+    Square sq_target_k;
+    GetPieces(pos, perspective, &pieces, &sq_target_k);
+    const auto& dp = pos.state()->dirtyPiece;
+    for (int i = 0; i < dp.dirty_num; ++i) {
+      if (dp.pieceId[i] >= PIECE_ID_KING) continue;
+      const auto old_p = static_cast<PieceSquare>(
+          dp.old_piece[i].from[perspective]);
+      if (old_p != PS_NONE) {
+        removed->push_back(MakeIndex(sq_target_k, old_p));
+      }
+      const auto new_p = static_cast<PieceSquare>(
+          dp.new_piece[i].from[perspective]);
+      if (new_p != PS_NONE) {
+        added->push_back(MakeIndex(sq_target_k, new_p));
+      }
+    }
+  }
+
+  template class HalfKP<Side::kFriend>;
+  template class HalfKP<Side::kEnemy>;
+
+}  // namespace Eval::NNUE::Features
diff --git a/src/nnue/features/half_kp.h b/src/nnue/features/half_kp.h
new file mode 100644
index 00000000..0e7670bf
--- /dev/null
+++ b/src/nnue/features/half_kp.h
@@ -0,0 +1,52 @@
+﻿//Definition of input features HalfKP of NNUE evaluation function
+
+#ifndef NNUE_FEATURES_HALF_KP_H_INCLUDED
+#define NNUE_FEATURES_HALF_KP_H_INCLUDED
+
+#include "../../evaluate.h"
+#include "features_common.h"
+
+namespace Eval::NNUE::Features {
+
+  // Feature HalfKP: Combination of the position of own king or enemy king
+  // and the position of pieces other than kings
+  template <Side AssociatedKing>
+  class HalfKP {
+
+   public:
+    // feature quantity name
+    static constexpr const char* kName =
+        (AssociatedKing == Side::kFriend) ? "HalfKP(Friend)" : "HalfKP(Enemy)";
+    // Hash value embedded in the evaluation function file
+    static constexpr std::uint32_t kHashValue =
+        0x5D69D5B9u ^ (AssociatedKing == Side::kFriend);
+    // number of feature dimensions
+    static constexpr IndexType kDimensions =
+        static_cast<IndexType>(SQUARE_NB) * static_cast<IndexType>(PS_END);
+    // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
+    static constexpr IndexType kMaxActiveDimensions = PIECE_ID_KING;
+    // Timing of full calculation instead of difference calculation
+    static constexpr TriggerEvent kRefreshTrigger =
+        (AssociatedKing == Side::kFriend) ?
+        TriggerEvent::kFriendKingMoved : TriggerEvent::kEnemyKingMoved;
+
+    // Get a list of indices with a value of 1 among the features
+    static void AppendActiveIndices(const Position& pos, Color perspective,
+                                    IndexList* active);
+
+    // Get a list of indices whose values ​​have changed from the previous one in the feature quantity
+    static void AppendChangedIndices(const Position& pos, Color perspective,
+                                     IndexList* removed, IndexList* added);
+
+    // Find the index of the feature quantity from the king position and PieceSquare
+    static IndexType MakeIndex(Square sq_k, PieceSquare p);
+
+   private:
+    // Get the piece information
+    static void GetPieces(const Position& pos, Color perspective,
+                          PieceSquare** pieces, Square* sq_target_k);
+  };
+
+}  // namespace Eval::NNUE::Features
+
+#endif // #ifndef NNUE_FEATURES_HALF_KP_H_INCLUDED
diff --git a/src/nnue/features/index_list.h b/src/nnue/features/index_list.h
new file mode 100644
index 00000000..57ce690d
--- /dev/null
+++ b/src/nnue/features/index_list.h
@@ -0,0 +1,46 @@
+﻿// Definition of index list of input features
+
+#ifndef NNUE_FEATURES_INDEX_LIST_H_INCLUDED
+#define NNUE_FEATURES_INDEX_LIST_H_INCLUDED
+
+#include "../../position.h"
+#include "../nnue_architecture.h"
+
+namespace Eval::NNUE::Features {
+
+  // Class template used for feature index list
+  template <typename T, std::size_t MaxSize>
+  class ValueList {
+
+   public:
+    std::size_t size() const { return size_; }
+    void resize(std::size_t size) { size_ = size; }
+    void push_back(const T& value) { values_[size_++] = value; }
+    T& operator[](std::size_t index) { return values_[index]; }
+    T* begin() { return values_; }
+    T* end() { return values_ + size_; }
+    const T& operator[](std::size_t index) const { return values_[index]; }
+    const T* begin() const { return values_; }
+    const T* end() const { return values_ + size_; }
+
+    void swap(ValueList& other) {
+      const std::size_t max_size = std::max(size_, other.size_);
+      for (std::size_t i = 0; i < max_size; ++i) {
+        std::swap(values_[i], other.values_[i]);
+      }
+      std::swap(size_, other.size_);
+    }
+
+   private:
+    T values_[MaxSize];
+    std::size_t size_ = 0;
+  };
+
+  //Type of feature index list
+  class IndexList
+      : public ValueList<IndexType, RawFeatures::kMaxActiveDimensions> {
+  };
+
+}  // namespace Eval::NNUE::Features
+
+#endif // NNUE_FEATURES_INDEX_LIST_H_INCLUDED
diff --git a/src/nnue/layers/affine_transform.h b/src/nnue/layers/affine_transform.h
new file mode 100644
index 00000000..e3773b26
--- /dev/null
+++ b/src/nnue/layers/affine_transform.h
@@ -0,0 +1,167 @@
+﻿// Definition of layer AffineTransform of NNUE evaluation function
+
+#ifndef NNUE_LAYERS_AFFINE_TRANSFORM_H_INCLUDED
+#define NNUE_LAYERS_AFFINE_TRANSFORM_H_INCLUDED
+
+#include <iostream>
+#include "../nnue_common.h"
+
+namespace Eval::NNUE::Layers {
+
+  // affine transformation layer
+  template <typename PreviousLayer, IndexType OutputDimensions>
+  class AffineTransform {
+   public:
+    // Input/output type
+    using InputType = typename PreviousLayer::OutputType;
+    using OutputType = std::int32_t;
+    static_assert(std::is_same<InputType, std::uint8_t>::value, "");
+
+    // number of input/output dimensions
+    static constexpr IndexType kInputDimensions =
+        PreviousLayer::kOutputDimensions;
+    static constexpr IndexType kOutputDimensions = OutputDimensions;
+    static constexpr IndexType kPaddedInputDimensions =
+        CeilToMultiple<IndexType>(kInputDimensions, kMaxSimdWidth);
+
+    // Size of forward propagation buffer used in this layer
+    static constexpr std::size_t kSelfBufferSize =
+        CeilToMultiple(kOutputDimensions * sizeof(OutputType), kCacheLineSize);
+
+    // Size of the forward propagation buffer used from the input layer to this layer
+    static constexpr std::size_t kBufferSize =
+        PreviousLayer::kBufferSize + kSelfBufferSize;
+
+    // Hash value embedded in the evaluation function file
+    static constexpr std::uint32_t GetHashValue() {
+      std::uint32_t hash_value = 0xCC03DAE4u;
+      hash_value += kOutputDimensions;
+      hash_value ^= PreviousLayer::GetHashValue() >> 1;
+      hash_value ^= PreviousLayer::GetHashValue() << 31;
+      return hash_value;
+    }
+
+    // A string that represents the structure from the input layer to this layer
+    static std::string GetStructureString() {
+      return "AffineTransform[" +
+          std::to_string(kOutputDimensions) + "<-" +
+          std::to_string(kInputDimensions) + "](" +
+          PreviousLayer::GetStructureString() + ")";
+    }
+
+    // read parameters
+    bool ReadParameters(std::istream& stream) {
+      if (!previous_layer_.ReadParameters(stream)) return false;
+      stream.read(reinterpret_cast<char*>(biases_),
+                  kOutputDimensions * sizeof(BiasType));
+      stream.read(reinterpret_cast<char*>(weights_),
+                  kOutputDimensions * kPaddedInputDimensions *
+                  sizeof(WeightType));
+      return !stream.fail();
+    }
+
+    // forward propagation
+    const OutputType* Propagate(
+        const TransformedFeatureType* transformed_features, char* buffer) const {
+      const auto input = previous_layer_.Propagate(
+          transformed_features, buffer + kSelfBufferSize);
+      const auto output = reinterpret_cast<OutputType*>(buffer);
+
+  #if defined(USE_AVX2)
+      constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
+      const __m256i kOnes = _mm256_set1_epi16(1);
+      const auto input_vector = reinterpret_cast<const __m256i*>(input);
+
+  #elif defined(USE_SSSE3)
+      constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
+      const __m128i kOnes = _mm_set1_epi16(1);
+      const auto input_vector = reinterpret_cast<const __m128i*>(input);
+
+  #elif defined(IS_ARM)
+      constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
+      const auto input_vector = reinterpret_cast<const int8x8_t*>(input);
+  #endif
+
+      for (IndexType i = 0; i < kOutputDimensions; ++i) {
+        const IndexType offset = i * kPaddedInputDimensions;
+
+  #if defined(USE_AVX2)
+        __m256i sum = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, biases_[i]);
+        const auto row = reinterpret_cast<const __m256i*>(&weights_[offset]);
+        for (IndexType j = 0; j < kNumChunks; ++j) {
+          __m256i product = _mm256_maddubs_epi16(
+
+  #if defined(__MINGW32__) || defined(__MINGW64__)
+            // HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Because the binary
+            //       compiled with g++ in MSYS2 crashes here because the output memory is not aligned
+            //       even though alignas is specified.
+            _mm256_loadu_si256
+  #else
+            _mm256_load_si256
+  #endif
+
+            (&input_vector[j]), _mm256_load_si256(&row[j]));
+          product = _mm256_madd_epi16(product, kOnes);
+          sum = _mm256_add_epi32(sum, product);
+        }
+        sum = _mm256_hadd_epi32(sum, sum);
+        sum = _mm256_hadd_epi32(sum, sum);
+        const __m128i lo = _mm256_extracti128_si256(sum, 0);
+        const __m128i hi = _mm256_extracti128_si256(sum, 1);
+        output[i] = _mm_cvtsi128_si32(lo) + _mm_cvtsi128_si32(hi);
+
+  #elif defined(USE_SSSE3)
+        __m128i sum = _mm_cvtsi32_si128(biases_[i]);
+        const auto row = reinterpret_cast<const __m128i*>(&weights_[offset]);
+        for (IndexType j = 0; j < kNumChunks; ++j) {
+          __m128i product = _mm_maddubs_epi16(
+              _mm_load_si128(&input_vector[j]), _mm_load_si128(&row[j]));
+          product = _mm_madd_epi16(product, kOnes);
+          sum = _mm_add_epi32(sum, product);
+        }
+        sum = _mm_hadd_epi32(sum, sum);
+        sum = _mm_hadd_epi32(sum, sum);
+        output[i] = _mm_cvtsi128_si32(sum);
+
+  #elif defined(IS_ARM)
+        int32x4_t sum = {biases_[i]};
+        const auto row = reinterpret_cast<const int8x8_t*>(&weights_[offset]);
+        for (IndexType j = 0; j < kNumChunks; ++j) {
+          int16x8_t product = vmull_s8(input_vector[j * 2], row[j * 2]);
+          product = vmlal_s8(product, input_vector[j * 2 + 1], row[j * 2 + 1]);
+          sum = vpadalq_s16(sum, product);
+        }
+        output[i] = sum[0] + sum[1] + sum[2] + sum[3];
+
+  #else
+        OutputType sum = biases_[i];
+        for (IndexType j = 0; j < kInputDimensions; ++j) {
+          sum += weights_[offset + j] * input[j];
+        }
+        output[i] = sum;
+  #endif
+
+      }
+      return output;
+    }
+
+   private:
+    // parameter type
+    using BiasType = OutputType;
+    using WeightType = std::int8_t;
+
+    // Make the learning class a friend
+    friend class Trainer<AffineTransform>;
+
+    // the layer immediately before this layer
+    PreviousLayer previous_layer_;
+
+    // parameter
+    alignas(kCacheLineSize) BiasType biases_[kOutputDimensions];
+    alignas(kCacheLineSize)
+        WeightType weights_[kOutputDimensions * kPaddedInputDimensions];
+  };
+
+}  // namespace Eval::NNUE::Layers
+
+#endif // #ifndef NNUE_LAYERS_AFFINE_TRANSFORM_H_INCLUDED
diff --git a/src/nnue/layers/clipped_relu.h b/src/nnue/layers/clipped_relu.h
new file mode 100644
index 00000000..46df380d
--- /dev/null
+++ b/src/nnue/layers/clipped_relu.h
@@ -0,0 +1,178 @@
+﻿// Definition of layer ClippedReLU of NNUE evaluation function
+
+#ifndef NNUE_LAYERS_CLIPPED_RELU_H_INCLUDED
+#define NNUE_LAYERS_CLIPPED_RELU_H_INCLUDED
+
+#include "../nnue_common.h"
+
+namespace Eval::NNUE::Layers {
+
+  // Clipped ReLU
+  template <typename PreviousLayer>
+  class ClippedReLU {
+   public:
+    // Input/output type
+    using InputType = typename PreviousLayer::OutputType;
+    using OutputType = std::uint8_t;
+    static_assert(std::is_same<InputType, std::int32_t>::value, "");
+
+    // number of input/output dimensions
+    static constexpr IndexType kInputDimensions =
+        PreviousLayer::kOutputDimensions;
+    static constexpr IndexType kOutputDimensions = kInputDimensions;
+
+    // Size of forward propagation buffer used in this layer
+    static constexpr std::size_t kSelfBufferSize =
+        CeilToMultiple(kOutputDimensions * sizeof(OutputType), kCacheLineSize);
+
+    // Size of the forward propagation buffer used from the input layer to this layer
+    static constexpr std::size_t kBufferSize =
+        PreviousLayer::kBufferSize + kSelfBufferSize;
+
+    // Hash value embedded in the evaluation function file
+    static constexpr std::uint32_t GetHashValue() {
+      std::uint32_t hash_value = 0x538D24C7u;
+      hash_value += PreviousLayer::GetHashValue();
+      return hash_value;
+    }
+
+    // A string that represents the structure from the input layer to this layer
+    static std::string GetStructureString() {
+      return "ClippedReLU[" +
+          std::to_string(kOutputDimensions) + "](" +
+          PreviousLayer::GetStructureString() + ")";
+    }
+
+    // read parameters
+    bool ReadParameters(std::istream& stream) {
+      return previous_layer_.ReadParameters(stream);
+    }
+
+    // forward propagation
+    const OutputType* Propagate(
+        const TransformedFeatureType* transformed_features, char* buffer) const {
+      const auto input = previous_layer_.Propagate(
+          transformed_features, buffer + kSelfBufferSize);
+      const auto output = reinterpret_cast<OutputType*>(buffer);
+
+  #if defined(USE_AVX2)
+      constexpr IndexType kNumChunks = kInputDimensions / kSimdWidth;
+      const __m256i kZero = _mm256_setzero_si256();
+      const __m256i kOffsets = _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0);
+      const auto in = reinterpret_cast<const __m256i*>(input);
+      const auto out = reinterpret_cast<__m256i*>(output);
+      for (IndexType i = 0; i < kNumChunks; ++i) {
+        const __m256i words0 = _mm256_srai_epi16(_mm256_packs_epi32(
+
+  #if defined(__MINGW32__) || defined(__MINGW64__)
+          // HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Because the binary
+          //       compiled with g++ in MSYS2 crashes here because the output memory is not aligned
+          //       even though alignas is specified.
+          _mm256_loadu_si256
+  #else
+          _mm256_load_si256
+  #endif
+
+          (&in[i * 4 + 0]),
+
+  #if defined(__MINGW32__) || defined(__MINGW64__)
+          _mm256_loadu_si256
+  #else
+          _mm256_load_si256
+  #endif
+
+          (&in[i * 4 + 1])), kWeightScaleBits);
+        const __m256i words1 = _mm256_srai_epi16(_mm256_packs_epi32(
+
+  #if defined(__MINGW32__) || defined(__MINGW64__)
+          _mm256_loadu_si256
+  #else
+          _mm256_load_si256
+  #endif
+
+          (&in[i * 4 + 2]),
+
+  #if defined(__MINGW32__) || defined(__MINGW64__)
+          _mm256_loadu_si256
+  #else
+          _mm256_load_si256
+  #endif
+
+          (&in[i * 4 + 3])), kWeightScaleBits);
+
+  #if defined(__MINGW32__) || defined(__MINGW64__)
+        _mm256_storeu_si256
+  #else
+        _mm256_store_si256
+  #endif
+
+          (&out[i], _mm256_permutevar8x32_epi32(_mm256_max_epi8(
+            _mm256_packs_epi16(words0, words1), kZero), kOffsets));
+      }
+      constexpr IndexType kStart = kNumChunks * kSimdWidth;
+
+  #elif defined(USE_SSSE3)
+      constexpr IndexType kNumChunks = kInputDimensions / kSimdWidth;
+      const __m128i kZero = _mm_setzero_si128();
+
+  #ifndef USE_SSE41
+      const __m128i k0x80s = _mm_set1_epi8(-128);
+  #endif
+
+      const auto in = reinterpret_cast<const __m128i*>(input);
+      const auto out = reinterpret_cast<__m128i*>(output);
+      for (IndexType i = 0; i < kNumChunks; ++i) {
+        const __m128i words0 = _mm_srai_epi16(_mm_packs_epi32(
+            _mm_load_si128(&in[i * 4 + 0]),
+            _mm_load_si128(&in[i * 4 + 1])), kWeightScaleBits);
+        const __m128i words1 = _mm_srai_epi16(_mm_packs_epi32(
+            _mm_load_si128(&in[i * 4 + 2]),
+            _mm_load_si128(&in[i * 4 + 3])), kWeightScaleBits);
+        const __m128i packedbytes = _mm_packs_epi16(words0, words1);
+        _mm_store_si128(&out[i],
+
+  #ifdef USE_SSE41
+          _mm_max_epi8(packedbytes, kZero)
+  #else
+          _mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s)
+  #endif
+
+        );
+      }
+      constexpr IndexType kStart = kNumChunks * kSimdWidth;
+
+  #elif defined(IS_ARM)
+      constexpr IndexType kNumChunks = kInputDimensions / (kSimdWidth / 2);
+      const int8x8_t kZero = {0};
+      const auto in = reinterpret_cast<const int32x4_t*>(input);
+      const auto out = reinterpret_cast<int8x8_t*>(output);
+      for (IndexType i = 0; i < kNumChunks; ++i) {
+        int16x8_t shifted;
+        const auto pack = reinterpret_cast<int16x4_t*>(&shifted);
+        pack[0] = vqshrn_n_s32(in[i * 2 + 0], kWeightScaleBits);
+        pack[1] = vqshrn_n_s32(in[i * 2 + 1], kWeightScaleBits);
+        out[i] = vmax_s8(vqmovn_s16(shifted), kZero);
+      }
+      constexpr IndexType kStart = kNumChunks * (kSimdWidth / 2);
+  #else
+      constexpr IndexType kStart = 0;
+  #endif
+
+      for (IndexType i = kStart; i < kInputDimensions; ++i) {
+        output[i] = static_cast<OutputType>(
+            std::max(0, std::min(127, input[i] >> kWeightScaleBits)));
+      }
+      return output;
+    }
+
+   private:
+    // Make the learning class a friend
+    friend class Trainer<ClippedReLU>;
+
+    // the layer immediately before this layer
+    PreviousLayer previous_layer_;
+  };
+
+}  // namespace Eval::NNUE::Layers
+
+#endif // NNUE_LAYERS_CLIPPED_RELU_H_INCLUDED
diff --git a/src/eval/nnue/layers/input_slice.h b/src/nnue/layers/input_slice.h
similarity index 80%
rename from src/eval/nnue/layers/input_slice.h
rename to src/nnue/layers/input_slice.h
index ec7627d2..f5d1ddbe 100644
--- a/src/eval/nnue/layers/input_slice.h
+++ b/src/nnue/layers/input_slice.h
@@ -1,17 +1,11 @@
 ﻿// NNUE evaluation function layer InputSlice definition
 
-#ifndef _NNUE_LAYERS_INPUT_SLICE_H_
-#define _NNUE_LAYERS_INPUT_SLICE_H_
-
-#if defined(EVAL_NNUE)
+#ifndef NNUE_LAYERS_INPUT_SLICE_H_INCLUDED
+#define NNUE_LAYERS_INPUT_SLICE_H_INCLUDED
 
 #include "../nnue_common.h"
 
-namespace Eval {
-
-namespace NNUE {
-
-namespace Layers {
+namespace Eval::NNUE::Layers {
 
 // input layer
 template <IndexType OutputDimensions, IndexType Offset = 0>
@@ -48,11 +42,6 @@ class InputSlice {
     return true;
   }
 
-  // write parameters
-  bool WriteParameters(std::ostream& /*stream*/) const {
-    return true;
-  }
-
   // forward propagation
   const OutputType* Propagate(
       const TransformedFeatureType* transformed_features,
@@ -65,10 +54,4 @@ class InputSlice {
 
 }  // namespace Layers
 
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
+#endif // #ifndef NNUE_LAYERS_INPUT_SLICE_H_INCLUDED
diff --git a/src/nnue/layers/sum.h b/src/nnue/layers/sum.h
new file mode 100644
index 00000000..34b3c401
--- /dev/null
+++ b/src/nnue/layers/sum.h
@@ -0,0 +1,141 @@
+﻿// Definition of layer Sum of NNUE evaluation function
+
+#ifndef NNUE_LAYERS_SUM_H_INCLUDED
+#define NNUE_LAYERS_SUM_H_INCLUDED
+
+#include "../nnue_common.h"
+
+namespace Eval::NNUE::Layers {
+
+  // Layer that sums the output of multiple layers
+  template <typename FirstPreviousLayer, typename... RemainingPreviousLayers>
+  class Sum : public Sum<RemainingPreviousLayers...> {
+
+   private:
+    using Head = FirstPreviousLayer;
+    using Tail = Sum<RemainingPreviousLayers...>;
+
+   public:
+    // Input/output type
+    using InputType = typename Head::OutputType;
+    using OutputType = InputType;
+    static_assert(std::is_same<InputType, typename Tail::InputType>::value, "");
+
+    // number of input/output dimensions
+    static constexpr IndexType kInputDimensions = Head::kOutputDimensions;
+    static constexpr IndexType kOutputDimensions = kInputDimensions;
+    static_assert(kInputDimensions == Tail::kInputDimensions ,"");
+
+    // Size of forward propagation buffer used in this layer
+    static constexpr std::size_t kSelfBufferSize =
+        CeilToMultiple(kOutputDimensions * sizeof(OutputType), kCacheLineSize);
+
+    // Size of the forward propagation buffer used from the input layer to this layer
+    static constexpr std::size_t kBufferSize =
+        std::max(Head::kBufferSize + kSelfBufferSize, Tail::kBufferSize);
+
+    // Hash value embedded in the evaluation function file
+    static constexpr std::uint32_t GetHashValue() {
+      std::uint32_t hash_value = 0xBCE400B4u;
+      hash_value ^= Head::GetHashValue() >> 1;
+      hash_value ^= Head::GetHashValue() << 31;
+      hash_value ^= Tail::GetHashValue() >> 2;
+      hash_value ^= Tail::GetHashValue() << 30;
+      return hash_value;
+    }
+
+    // A string that represents the structure from the input layer to this layer
+    static std::string GetStructureString() {
+      return "Sum[" +
+          std::to_string(kOutputDimensions) + "](" + GetSummandsString() + ")";
+    }
+
+    // read parameters
+    bool ReadParameters(std::istream& stream) {
+      if (!Tail::ReadParameters(stream)) return false;
+      return previous_layer_.ReadParameters(stream);
+    }
+
+    // forward propagation
+    const OutputType* Propagate(
+        const TransformedFeatureType* transformed_features, char* buffer) const {
+      Tail::Propagate(transformed_features, buffer);
+      const auto head_output = previous_layer_.Propagate(
+          transformed_features, buffer + kSelfBufferSize);
+      const auto output = reinterpret_cast<OutputType*>(buffer);
+      for (IndexType i = 0; i <kOutputDimensions; ++i) {
+        output[i] += head_output[i];
+      }
+      return output;
+    }
+
+   protected:
+    // A string that represents the list of layers to be summed
+    static std::string GetSummandsString() {
+      return Head::GetStructureString() + "," + Tail::GetSummandsString();
+    }
+
+    // Make the learning class a friend
+    friend class Trainer<Sum>;
+
+    // the layer immediately before this layer
+    FirstPreviousLayer previous_layer_;
+  };
+
+  // Layer that sums the output of multiple layers (when there is one template argument)
+  template <typename PreviousLayer>
+  class Sum<PreviousLayer> {
+   public:
+    // Input/output type
+    using InputType = typename PreviousLayer::OutputType;
+    using OutputType = InputType;
+
+    // number of input/output dimensions
+    static constexpr IndexType kInputDimensions =
+        PreviousLayer::kOutputDimensions;
+    static constexpr IndexType kOutputDimensions = kInputDimensions;
+
+    // Size of the forward propagation buffer used from the input layer to this layer
+    static constexpr std::size_t kBufferSize = PreviousLayer::kBufferSize;
+
+    // Hash value embedded in the evaluation function file
+    static constexpr std::uint32_t GetHashValue() {
+      std::uint32_t hash_value = 0xBCE400B4u;
+      hash_value ^= PreviousLayer::GetHashValue() >> 1;
+      hash_value ^= PreviousLayer::GetHashValue() << 31;
+      return hash_value;
+    }
+
+    // A string that represents the structure from the input layer to this layer
+    static std::string GetStructureString() {
+      return "Sum[" +
+          std::to_string(kOutputDimensions) + "](" + GetSummandsString() + ")";
+    }
+
+    // read parameters
+    bool ReadParameters(std::istream& stream) {
+      return previous_layer_.ReadParameters(stream);
+    }
+
+    // forward propagation
+    const OutputType* Propagate(
+        const TransformedFeatureType* transformed_features, char* buffer) const {
+      return previous_layer_.Propagate(transformed_features, buffer);
+    }
+
+   protected:
+    // A string that represents the list of layers to be summed
+    static std::string GetSummandsString() {
+      return PreviousLayer::GetStructureString();
+    }
+
+    // Make the learning class a friend
+    friend class Trainer<Sum>;
+
+    // the layer immediately before this layer
+    PreviousLayer previous_layer_;
+  };
+
+}  // namespace Eval::NNUE::Layers
+
+#endif // #ifndef NNUE_LAYERS_SUM_H_INCLUDED
diff --git a/src/nnue/nnue_accumulator.h b/src/nnue/nnue_accumulator.h
new file mode 100644
index 00000000..efa9389e
--- /dev/null
+++ b/src/nnue/nnue_accumulator.h
@@ -0,0 +1,22 @@
+﻿// Class for difference calculation of NNUE evaluation function
+
+#ifndef NNUE_ACCUMULATOR_H_INCLUDED
+#define NNUE_ACCUMULATOR_H_INCLUDED
+
+#include "nnue_architecture.h"
+
+namespace Eval::NNUE {
+
+  // Class that holds the result of affine transformation of input features
+  // Keep the evaluation value that is the final output together
+  struct alignas(32) Accumulator {
+    std::int16_t
+        accumulation[2][kRefreshTriggers.size()][kTransformedFeatureDimensions];
+    Value score = VALUE_ZERO;
+    bool computed_accumulation = false;
+    bool computed_score = false;
+  };
+
+}  // namespace Eval::NNUE
+
+#endif // NNUE_ACCUMULATOR_H_INCLUDED
diff --git a/src/nnue/nnue_architecture.h b/src/nnue/nnue_architecture.h
new file mode 100644
index 00000000..a8a7a9ec
--- /dev/null
+++ b/src/nnue/nnue_architecture.h
@@ -0,0 +1,21 @@
+﻿// Input features and network structure used in NNUE evaluation function
+
+#ifndef NNUE_ARCHITECTURE_H_INCLUDED
+#define NNUE_ARCHITECTURE_H_INCLUDED
+
+// include a header that defines the input features and network structure
+#include "architectures/halfkp_256x2-32-32.h"
+//#include "architectures/halfkp_384x2-32-32.h"
+
+namespace Eval::NNUE {
+
+  static_assert(kTransformedFeatureDimensions % kMaxSimdWidth == 0, "");
+  static_assert(Network::kOutputDimensions == 1, "");
+  static_assert(std::is_same<Network::OutputType, std::int32_t>::value, "");
+
+  // List of timings to perform all calculations instead of difference calculation
+  constexpr auto kRefreshTriggers = RawFeatures::kRefreshTriggers;
+
+}  // namespace Eval::NNUE
+
+#endif // #ifndef NNUE_ARCHITECTURE_H_INCLUDED
diff --git a/src/nnue/nnue_common.h b/src/nnue/nnue_common.h
new file mode 100644
index 00000000..0c7fd90d
--- /dev/null
+++ b/src/nnue/nnue_common.h
@@ -0,0 +1,62 @@
+﻿// Constants used in NNUE evaluation function
+
+#ifndef NNUE_COMMON_H_INCLUDED
+#define NNUE_COMMON_H_INCLUDED
+
+#if defined(USE_AVX2)
+#include <immintrin.h>
+
+#elif defined(USE_SSE41)
+#include <smmintrin.h>
+
+#elif defined(USE_SSSE3)
+#include <tmmintrin.h>
+
+#elif defined(USE_SSE2)
+#include <emmintrin.h>
+#endif
+
+namespace Eval::NNUE {
+
+  // A constant that represents the version of the evaluation function file
+  constexpr std::uint32_t kVersion = 0x7AF32F16u;
+
+  // Constant used in evaluation value calculation
+  constexpr int FV_SCALE = 16;
+  constexpr int kWeightScaleBits = 6;
+
+  // Size of cache line (in bytes)
+  constexpr std::size_t kCacheLineSize = 64;
+
+  // SIMD width (in bytes)
+  #if defined(USE_AVX2)
+  constexpr std::size_t kSimdWidth = 32;
+
+  #elif defined(USE_SSE2)
+  constexpr std::size_t kSimdWidth = 16;
+
+  #elif defined(IS_ARM)
+  constexpr std::size_t kSimdWidth = 16;
+  #endif
+
+  constexpr std::size_t kMaxSimdWidth = 32;
+
+  // Type of input feature after conversion
+  using TransformedFeatureType = std::uint8_t;
+
+  // index type
+  using IndexType = std::uint32_t;
+
+  // Forward declaration of learning class template
+  template <typename Layer>
+  class Trainer;
+
+  // find the smallest multiple of n and above
+  template <typename IntType>
+  constexpr IntType CeilToMultiple(IntType n, IntType base) {
+    return (n + base - 1) / base * base;
+  }
+
+}  // namespace Eval::NNUE
+
+#endif // #ifndef NNUE_COMMON_H_INCLUDED
diff --git a/src/nnue/nnue_feature_transformer.h b/src/nnue/nnue_feature_transformer.h
new file mode 100644
index 00000000..83b5e513
--- /dev/null
+++ b/src/nnue/nnue_feature_transformer.h
@@ -0,0 +1,378 @@
+﻿// A class that converts the input features of the NNUE evaluation function
+
+#ifndef NNUE_FEATURE_TRANSFORMER_H_INCLUDED
+#define NNUE_FEATURE_TRANSFORMER_H_INCLUDED
+
+#include "nnue_common.h"
+#include "nnue_architecture.h"
+#include "features/index_list.h"
+
+#include <cstring> // std::memset()
+
+namespace Eval::NNUE {
+
+  // Input feature converter
+  class FeatureTransformer {
+
+   private:
+    // number of output dimensions for one side
+    static constexpr IndexType kHalfDimensions = kTransformedFeatureDimensions;
+
+   public:
+    // output type
+    using OutputType = TransformedFeatureType;
+
+    // number of input/output dimensions
+    static constexpr IndexType kInputDimensions = RawFeatures::kDimensions;
+    static constexpr IndexType kOutputDimensions = kHalfDimensions * 2;
+
+    // size of forward propagation buffer
+    static constexpr std::size_t kBufferSize =
+        kOutputDimensions * sizeof(OutputType);
+
+    // Hash value embedded in the evaluation function file
+    static constexpr std::uint32_t GetHashValue() {
+      return RawFeatures::kHashValue ^ kOutputDimensions;
+    }
+
+    // a string representing the structure
+    static std::string GetStructureString() {
+      return RawFeatures::GetName() + "[" +
+          std::to_string(kInputDimensions) + "->" +
+          std::to_string(kHalfDimensions) + "x2]";
+    }
+
+    // read parameters
+    bool ReadParameters(std::istream& stream) {
+      stream.read(reinterpret_cast<char*>(biases_),
+                  kHalfDimensions * sizeof(BiasType));
+      stream.read(reinterpret_cast<char*>(weights_),
+                  kHalfDimensions * kInputDimensions * sizeof(WeightType));
+      return !stream.fail();
+    }
+
+    // proceed with the difference calculation if possible
+    bool UpdateAccumulatorIfPossible(const Position& pos) const {
+      const auto now = pos.state();
+      if (now->accumulator.computed_accumulation) {
+        return true;
+      }
+      const auto prev = now->previous;
+      if (prev && prev->accumulator.computed_accumulation) {
+        UpdateAccumulator(pos);
+        return true;
+      }
+      return false;
+    }
+
+    // convert input features
+    void Transform(const Position& pos, OutputType* output, bool refresh) const {
+      if (refresh || !UpdateAccumulatorIfPossible(pos)) {
+        RefreshAccumulator(pos);
+      }
+      const auto& accumulation = pos.state()->accumulator.accumulation;
+
+  #if defined(USE_AVX2)
+      constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
+      constexpr int kControl = 0b11011000;
+      const __m256i kZero = _mm256_setzero_si256();
+
+  #elif defined(USE_SSSE3)
+      constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
+      const __m128i kZero = _mm_setzero_si128();
+
+  #ifndef USE_SSE41
+      const __m128i k0x80s = _mm_set1_epi8(-128);
+  #endif
+
+  #elif defined(IS_ARM)
+      constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
+      const int8x8_t kZero = {0};
+  #endif
+
+      const Color perspectives[2] = {pos.side_to_move(), ~pos.side_to_move()};
+      for (IndexType p = 0; p < 2; ++p) {
+        const IndexType offset = kHalfDimensions * p;
+
+  #if defined(USE_AVX2)
+        auto out = reinterpret_cast<__m256i*>(&output[offset]);
+        for (IndexType j = 0; j < kNumChunks; ++j) {
+          __m256i sum0 =
+
+  #if defined(__MINGW32__) || defined(__MINGW64__)
+            // HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Because the binary
+            //       compiled with g++ in MSYS2 crashes here because the output memory is not aligned
+            //       even though alignas is specified.
+            _mm256_loadu_si256
+  #else
+            _mm256_load_si256
+  #endif
+
+            (&reinterpret_cast<const __m256i*>(
+              accumulation[perspectives[p]][0])[j * 2 + 0]);
+          __m256i sum1 =
+
+  #if defined(__MINGW32__) || defined(__MINGW64__)
+            _mm256_loadu_si256
+  #else
+            _mm256_load_si256
+  #endif
+
+            (&reinterpret_cast<const __m256i*>(
+              accumulation[perspectives[p]][0])[j * 2 + 1]);
+          for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
+            sum0 = _mm256_add_epi16(sum0, reinterpret_cast<const __m256i*>(
+                accumulation[perspectives[p]][i])[j * 2 + 0]);
+            sum1 = _mm256_add_epi16(sum1, reinterpret_cast<const __m256i*>(
+                accumulation[perspectives[p]][i])[j * 2 + 1]);
+          }
+
+  #if defined(__MINGW32__) || defined(__MINGW64__)
+          _mm256_storeu_si256
+  #else
+          _mm256_store_si256
+  #endif
+
+          (&out[j], _mm256_permute4x64_epi64(_mm256_max_epi8(
+              _mm256_packs_epi16(sum0, sum1), kZero), kControl));
+        }
+
+  #elif defined(USE_SSSE3)
+        auto out = reinterpret_cast<__m128i*>(&output[offset]);
+        for (IndexType j = 0; j < kNumChunks; ++j) {
+          __m128i sum0 = _mm_load_si128(&reinterpret_cast<const __m128i*>(
+              accumulation[perspectives[p]][0])[j * 2 + 0]);
+          __m128i sum1 = _mm_load_si128(&reinterpret_cast<const __m128i*>(
+              accumulation[perspectives[p]][0])[j * 2 + 1]);
+          for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
+            sum0 = _mm_add_epi16(sum0, reinterpret_cast<const __m128i*>(
+                accumulation[perspectives[p]][i])[j * 2 + 0]);
+            sum1 = _mm_add_epi16(sum1, reinterpret_cast<const __m128i*>(
+                accumulation[perspectives[p]][i])[j * 2 + 1]);
+          }
+      const __m128i packedbytes = _mm_packs_epi16(sum0, sum1);
+
+          _mm_store_si128(&out[j],
+
+  #ifdef USE_SSE41
+            _mm_max_epi8(packedbytes, kZero)
+  #else
+            _mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s)
+  #endif
+
+          );
+        }
+
+  #elif defined(IS_ARM)
+        const auto out = reinterpret_cast<int8x8_t*>(&output[offset]);
+        for (IndexType j = 0; j < kNumChunks; ++j) {
+          int16x8_t sum = reinterpret_cast<const int16x8_t*>(
+              accumulation[perspectives[p]][0])[j];
+          for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
+            sum = vaddq_s16(sum, reinterpret_cast<const int16x8_t*>(
+                accumulation[perspectives[p]][i])[j]);
+          }
+          out[j] = vmax_s8(vqmovn_s16(sum), kZero);
+        }
+
+  #else
+        for (IndexType j = 0; j < kHalfDimensions; ++j) {
+          BiasType sum = accumulation[static_cast<int>(perspectives[p])][0][j];
+          for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
+            sum += accumulation[static_cast<int>(perspectives[p])][i][j];
+          }
+          output[offset + j] = static_cast<OutputType>(
+              std::max<int>(0, std::min<int>(127, sum)));
+        }
+  #endif
+
+      }
+    }
+
+   private:
+    // Calculate cumulative value without using difference calculation
+    void RefreshAccumulator(const Position& pos) const {
+      auto& accumulator = pos.state()->accumulator;
+      for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
+        Features::IndexList active_indices[2];
+        RawFeatures::AppendActiveIndices(pos, kRefreshTriggers[i],
+                                         active_indices);
+        for (Color perspective : { WHITE, BLACK }) {
+          if (i == 0) {
+            std::memcpy(accumulator.accumulation[perspective][i], biases_,
+                        kHalfDimensions * sizeof(BiasType));
+          } else {
+            std::memset(accumulator.accumulation[perspective][i], 0,
+                        kHalfDimensions * sizeof(BiasType));
+          }
+          for (const auto index : active_indices[perspective]) {
+            const IndexType offset = kHalfDimensions * index;
+
+  #if defined(USE_AVX2)
+            auto accumulation = reinterpret_cast<__m256i*>(
+                &accumulator.accumulation[perspective][i][0]);
+            auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
+            constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
+            for (IndexType j = 0; j < kNumChunks; ++j) {
+  #if defined(__MINGW32__) || defined(__MINGW64__)
+              _mm256_storeu_si256(&accumulation[j], _mm256_add_epi16(_mm256_loadu_si256(&accumulation[j]), column[j]));
+  #else
+              accumulation[j] = _mm256_add_epi16(accumulation[j], column[j]);
+  #endif
+            }
+
+  #elif defined(USE_SSE2)
+            auto accumulation = reinterpret_cast<__m128i*>(
+                &accumulator.accumulation[perspective][i][0]);
+            auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
+            constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
+            for (IndexType j = 0; j < kNumChunks; ++j) {
+              accumulation[j] = _mm_add_epi16(accumulation[j], column[j]);
+            }
+
+  #elif defined(IS_ARM)
+            auto accumulation = reinterpret_cast<int16x8_t*>(
+                &accumulator.accumulation[perspective][i][0]);
+            auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
+            constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
+            for (IndexType j = 0; j < kNumChunks; ++j) {
+              accumulation[j] = vaddq_s16(accumulation[j], column[j]);
+            }
+
+  #else
+            for (IndexType j = 0; j < kHalfDimensions; ++j) {
+              accumulator.accumulation[perspective][i][j] += weights_[offset + j];
+            }
+  #endif
+
+          }
+        }
+      }
+
+      accumulator.computed_accumulation = true;
+      accumulator.computed_score = false;
+    }
+
+    // Calculate cumulative value using difference calculation
+    void UpdateAccumulator(const Position& pos) const {
+      const auto prev_accumulator = pos.state()->previous->accumulator;
+      auto& accumulator = pos.state()->accumulator;
+      for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
+        Features::IndexList removed_indices[2], added_indices[2];
+        bool reset[2];
+        RawFeatures::AppendChangedIndices(pos, kRefreshTriggers[i],
+                                          removed_indices, added_indices, reset);
+        for (Color perspective : { WHITE, BLACK }) {
+
+  #if defined(USE_AVX2)
+          constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
+          auto accumulation = reinterpret_cast<__m256i*>(
+              &accumulator.accumulation[perspective][i][0]);
+
+  #elif defined(USE_SSE2)
+          constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
+          auto accumulation = reinterpret_cast<__m128i*>(
+              &accumulator.accumulation[perspective][i][0]);
+
+  #elif defined(IS_ARM)
+          constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
+          auto accumulation = reinterpret_cast<int16x8_t*>(
+              &accumulator.accumulation[perspective][i][0]);
+  #endif
+
+          if (reset[perspective]) {
+            if (i == 0) {
+              std::memcpy(accumulator.accumulation[perspective][i], biases_,
+                          kHalfDimensions * sizeof(BiasType));
+            } else {
+              std::memset(accumulator.accumulation[perspective][i], 0,
+                          kHalfDimensions * sizeof(BiasType));
+            }
+          } else {// Difference calculation for the feature amount changed from 1 to 0
+            std::memcpy(accumulator.accumulation[perspective][i],
+                        prev_accumulator.accumulation[perspective][i],
+                        kHalfDimensions * sizeof(BiasType));
+            for (const auto index : removed_indices[perspective]) {
+              const IndexType offset = kHalfDimensions * index;
+
+  #if defined(USE_AVX2)
+              auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
+              for (IndexType j = 0; j < kNumChunks; ++j) {
+                accumulation[j] = _mm256_sub_epi16(accumulation[j], column[j]);
+              }
+
+  #elif defined(USE_SSE2)
+              auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
+              for (IndexType j = 0; j < kNumChunks; ++j) {
+                accumulation[j] = _mm_sub_epi16(accumulation[j], column[j]);
+              }
+
+  #elif defined(IS_ARM)
+              auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
+              for (IndexType j = 0; j < kNumChunks; ++j) {
+                accumulation[j] = vsubq_s16(accumulation[j], column[j]);
+              }
+
+  #else
+              for (IndexType j = 0; j < kHalfDimensions; ++j) {
+                accumulator.accumulation[perspective][i][j] -=
+                    weights_[offset + j];
+              }
+  #endif
+
+            }
+          }
+          {// Difference calculation for features that changed from 0 to 1
+            for (const auto index : added_indices[perspective]) {
+              const IndexType offset = kHalfDimensions * index;
+
+  #if defined(USE_AVX2)
+              auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
+              for (IndexType j = 0; j < kNumChunks; ++j) {
+                accumulation[j] = _mm256_add_epi16(accumulation[j], column[j]);
+              }
+
+  #elif defined(USE_SSE2)
+              auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
+              for (IndexType j = 0; j < kNumChunks; ++j) {
+                accumulation[j] = _mm_add_epi16(accumulation[j], column[j]);
+              }
+
+  #elif defined(IS_ARM)
+              auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
+              for (IndexType j = 0; j < kNumChunks; ++j) {
+                accumulation[j] = vaddq_s16(accumulation[j], column[j]);
+              }
+
+  #else
+              for (IndexType j = 0; j < kHalfDimensions; ++j) {
+                accumulator.accumulation[perspective][i][j] +=
+                    weights_[offset + j];
+              }
+  #endif
+
+            }
+          }
+        }
+      }
+
+      accumulator.computed_accumulation = true;
+      accumulator.computed_score = false;
+    }
+
+    // parameter type
+    using BiasType = std::int16_t;
+    using WeightType = std::int16_t;
+
+    // Make the learning class a friend
+    friend class Trainer<FeatureTransformer>;
+
+    // parameter
+    alignas(kCacheLineSize) BiasType biases_[kHalfDimensions];
+    alignas(kCacheLineSize)
+        WeightType weights_[kHalfDimensions * kInputDimensions];
+  };
+
+}  // namespace Eval::NNUE
+
+#endif // #ifndef NNUE_FEATURE_TRANSFORMER_H_INCLUDED
diff --git a/src/position.cpp b/src/position.cpp
index 90677337..b6b2f691 100644
--- a/src/position.cpp
+++ b/src/position.cpp
@@ -80,7 +80,7 @@ std::ostream& operator<<(std::ostream& os, const Position& pos) {
   {
       StateInfo st;
       Position p;
-      p.set(pos.fen(), pos.is_chess960(), &st, pos.this_thread());
+      p.set(pos.fen(), pos.is_chess960(), pos.use_nnue(), &st, pos.this_thread());
       Tablebases::ProbeState s1, s2;
       Tablebases::WDLScore wdl = Tablebases::probe_wdl(p, &s1);
       int dtz = Tablebases::probe_dtz(p, &s2);
@@ -154,7 +154,7 @@ void Position::init() {
 /// This function is not very robust - make sure that input FENs are correct,
 /// this is assumed to be the responsibility of the GUI.
 
-Position& Position::set(const string& fenStr, bool isChess960, StateInfo* si, Thread* th) {
+Position& Position::set(const string& fenStr, bool isChess960, bool useNnue, StateInfo* si, Thread* th) {
 /*
    A FEN string defines a particular position using only the ASCII character set.
 
@@ -200,14 +200,8 @@ Position& Position::set(const string& fenStr, bool isChess960, StateInfo* si, Th
   std::fill_n(&pieceList[0][0], sizeof(pieceList) / sizeof(Square), SQ_NONE);
   st = si;
 
-#if defined(EVAL_NNUE)
-  // clear evalList. It is cleared when memset is cleared to zero above...
-  evalList.clear();
-
-  // In updating the PieceList, we have to set which piece is where,
-  // A counter of how much each piece has been used
-  PieceNumber next_piece_number = PIECE_NUMBER_ZERO;
-#endif  // defined(EVAL_NNUE)
+  // Each piece on board gets a unique ID used to track the piece later
+  PieceId piece_id, next_piece_id = PIECE_ID_ZERO;
 
   ss >> std::noskipws;
 
@@ -225,13 +219,15 @@ Position& Position::set(const string& fenStr, bool isChess960, StateInfo* si, Th
           auto pc = Piece(idx);
           put_piece(pc, sq);
 
-#if defined(EVAL_NNUE)
-          PieceNumber piece_no =
-            (idx == W_KING) ?PIECE_NUMBER_WKING : //
-            (idx == B_KING) ?PIECE_NUMBER_BKING : // back ball
-            next_piece_number++; // otherwise
-          evalList.put_piece(piece_no, sq, pc); // Place the pc piece in the sq box
-#endif  // defined(EVAL_NNUE)
+          if (useNnue)
+          {
+            // Kings get a fixed ID, other pieces get ID in order of placement
+            piece_id =
+              (idx == W_KING) ? PIECE_ID_WKING :
+              (idx == B_KING) ? PIECE_ID_BKING :
+              next_piece_id++;
+            evalList.put_piece(piece_id, sq, pc);
+          }
 
           ++sq;
       }
@@ -299,13 +295,11 @@ Position& Position::set(const string& fenStr, bool isChess960, StateInfo* si, Th
   gamePly = std::max(2 * (gamePly - 1), 0) + (sideToMove == BLACK);
 
   chess960 = isChess960;
+  nnue = useNnue;
   thisThread = th;
   set_state(st);
 
   assert(pos_is_ok());
-#if defined(EVAL_NNUE)
-  assert(evalList.is_valid(*this));
-#endif  // defined(EVAL_NNUE)
 
   return *this;
 }
@@ -410,7 +404,7 @@ Position& Position::set(const string& code, Color c, StateInfo* si) {
   string fenStr = "8/" + sides[0] + char(8 - sides[0].length() + '0') + "/8/8/8/8/"
                        + sides[1] + char(8 - sides[1].length() + '0') + "/8 w - - 0 10";
 
-  return set(fenStr, false, si, nullptr);
+  return set(fenStr, false, use_nnue(), si, nullptr);
 }
 
 
@@ -727,10 +721,13 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
   ++st->rule50;
   ++st->pliesFromNull;
 
-#if defined(EVAL_NNUE)
+  // Used by NNUE
   st->accumulator.computed_accumulation = false;
   st->accumulator.computed_score = false;
-#endif  // defined(EVAL_NNUE)
+  PieceId dp0 = PIECE_ID_NONE;
+  PieceId dp1 = PIECE_ID_NONE;
+  auto& dp = st->dirtyPiece;
+  dp.dirty_num = 1;
 
   Color us = sideToMove;
   Color them = ~us;
@@ -739,20 +736,10 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
   Piece pc = piece_on(from);
   Piece captured = type_of(m) == ENPASSANT ? make_piece(them, PAWN) : piece_on(to);
 
-#if defined(EVAL_NNUE)
-  PieceNumber piece_no0 = PIECE_NUMBER_NB;
-  PieceNumber piece_no1 = PIECE_NUMBER_NB;
-#endif  // defined(EVAL_NNUE)
-
   assert(color_of(pc) == us);
   assert(captured == NO_PIECE || color_of(captured) == (type_of(m) != CASTLING ? them : us));
   assert(type_of(captured) != KING);
 
-#if defined(EVAL_NNUE)
-  auto& dp = st->dirtyPiece;
-  dp.dirty_num = 1;
-#endif  // defined(EVAL_NNUE)
-
   if (type_of(m) == CASTLING)
   {
       assert(pc == make_piece(us, KING));
@@ -782,37 +769,23 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
               assert(relative_rank(us, to) == RANK_6);
               assert(piece_on(to) == NO_PIECE);
               assert(piece_on(capsq) == make_piece(them, PAWN));
-
-#if defined(EVAL_NNUE)
-              piece_no1 = piece_no_of(capsq);
-#endif  // defined(EVAL_NNUE)
-
-              //board[capsq] = NO_PIECE; // Not done by remove_piece()
-#if defined(EVAL_NNUE)
-              evalList.piece_no_list_board[capsq] = PIECE_NUMBER_NB;
-#endif  // defined(EVAL_NNUE)
-          }
-          else {
-#if defined(EVAL_NNUE)
-            piece_no1 = piece_no_of(capsq);
-#endif  // defined(EVAL_NNUE)
           }
 
           st->pawnKey ^= Zobrist::psq[captured][capsq];
       }
-      else {
+      else
           st->nonPawnMaterial[them] -= PieceValue[MG][captured];
 
-#if defined(EVAL_NNUE)
-          piece_no1 = piece_no_of(capsq);
-#endif  // defined(EVAL_NNUE)
-      }
+      if (use_nnue())
+          dp1 = piece_id_on(capsq);
 
       // Update board and piece lists
       remove_piece(capsq);
 
       if (type_of(m) == ENPASSANT)
+      {
           board[capsq] = NO_PIECE;
+      }
 
       // Update material hash key and prefetch access to materialTable
       k ^= Zobrist::psq[captured][capsq];
@@ -822,20 +795,17 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
       // Reset rule 50 counter
       st->rule50 = 0;
 
-#if defined(EVAL_NNUE)
-      dp.dirty_num = 2; // 2 pieces moved
-
-      dp.pieceNo[1] = piece_no1;
-      dp.changed_piece[1].old_piece = evalList.bona_piece(piece_no1);
-      // Do not use Eval::EvalList::put_piece() because the piece is removed
-      // from the game, and the corresponding elements of the piece lists
-      // needs to be Eval::BONA_PIECE_ZERO.
-      evalList.set_piece_on_board(piece_no1, Eval::BONA_PIECE_ZERO, Eval::BONA_PIECE_ZERO, capsq);
-      // Set PIECE_NUMBER_NB to piece_no_of_board[capsq] directly because it
-      // will not be overritten to pc if the move type is enpassant.
-      evalList.piece_no_list_board[capsq] = PIECE_NUMBER_NB;
-      dp.changed_piece[1].new_piece = evalList.bona_piece(piece_no1);
-#endif  // defined(EVAL_NNUE)
+      if (use_nnue())
+      {
+          dp.dirty_num = 2; // 2 pieces moved
+          dp.pieceId[1] = dp1;
+          dp.old_piece[1] = evalList.piece_with_id(dp1);
+          // Do not use EvalList::put_piece() because the piece is removed
+          // from the game, and the corresponding elements of the piece lists
+          // needs to be PS_NONE.
+          evalList.put_piece(dp1, capsq, NO_PIECE);
+          dp.new_piece[1] = evalList.piece_with_id(dp1);
+      }
   }
 
   // Update hash key
@@ -858,19 +828,16 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
 
   // Move the piece. The tricky Chess960 castling is handled earlier
   if (type_of(m) != CASTLING) {
-#if defined(EVAL_NNUE)
-    piece_no0 = piece_no_of(from);
-#endif  // defined(EVAL_NNUE)
-
     move_piece(from, to);
 
-#if defined(EVAL_NNUE)
-    dp.pieceNo[0] = piece_no0;
-    dp.changed_piece[0].old_piece = evalList.bona_piece(piece_no0);
-    evalList.piece_no_list_board[from] = PIECE_NUMBER_NB;
-    evalList.put_piece(piece_no0, to, pc);
-    dp.changed_piece[0].new_piece = evalList.bona_piece(piece_no0);
-#endif  // defined(EVAL_NNUE)
+    if (use_nnue())
+    {
+        dp0 = piece_id_on(from);
+        dp.pieceId[0] = dp0;
+        dp.old_piece[0] = evalList.piece_with_id(dp0);
+        evalList.put_piece(dp0, to, pc);
+        dp.new_piece[0] = evalList.piece_with_id(dp0);
+    }
   }
 
   // If the moving piece is a pawn do some special extra work
@@ -894,14 +861,12 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
           remove_piece(to);
           put_piece(promotion, to);
 
-#if defined(EVAL_NNUE)
-          piece_no0 = piece_no_of(to);
-          //dp.pieceNo[0] = piece_no0;
-          //dp.changed_piece[0].old_piece = evalList.bona_piece(piece_no0);
-          assert(evalList.piece_no_list_board[from] == PIECE_NUMBER_NB);
-          evalList.put_piece(piece_no0, to, promotion);
-          dp.changed_piece[0].new_piece = evalList.bona_piece(piece_no0);
-#endif  // defined(EVAL_NNUE)
+          if (use_nnue())
+          {
+              dp0 = piece_id_on(to);
+              evalList.put_piece(dp0, to, promotion);
+              dp.new_piece[0] = evalList.piece_with_id(dp0);
+          }
 
           // Update hash keys
           k ^= Zobrist::psq[pc][to] ^ Zobrist::psq[promotion][to];
@@ -953,12 +918,7 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
       }
   }
 
-  //std::cout << *this << std::endl;
-
   assert(pos_is_ok());
-#if defined(EVAL_NNUE)
-  assert(evalList.is_valid(*this));
-#endif  // defined(EVAL_NNUE)
 }
 
 
@@ -988,11 +948,6 @@ void Position::undo_move(Move m) {
       remove_piece(to);
       pc = make_piece(us, PAWN);
       put_piece(pc, to);
-
-#if defined(EVAL_NNUE)
-      PieceNumber piece_no0 = st->dirtyPiece.pieceNo[0];
-      evalList.put_piece(piece_no0, to, pc);
-#endif  // defined(EVAL_NNUE)
   }
 
   if (type_of(m) == CASTLING)
@@ -1005,11 +960,11 @@ void Position::undo_move(Move m) {
       
       move_piece(to, from); // Put the piece back at the source square
 
-#if defined(EVAL_NNUE)
-      PieceNumber piece_no0 = st->dirtyPiece.pieceNo[0];
-      evalList.put_piece(piece_no0, from, pc);
-      evalList.piece_no_list_board[to] = PIECE_NUMBER_NB;
-#endif  // defined(EVAL_NNUE)
+      if (use_nnue())
+      {
+          PieceId dp0 = st->dirtyPiece.pieceId[0];
+          evalList.put_piece(dp0, from, pc);
+      }
 
       if (st->capturedPiece)
       {
@@ -1028,12 +983,13 @@ void Position::undo_move(Move m) {
 
           put_piece(st->capturedPiece, capsq); // Restore the captured piece
 
-#if defined(EVAL_NNUE)
-          PieceNumber piece_no1 = st->dirtyPiece.pieceNo[1];
-          assert(evalList.bona_piece(piece_no1).fw == Eval::BONA_PIECE_ZERO);
-          assert(evalList.bona_piece(piece_no1).fb == Eval::BONA_PIECE_ZERO);
-          evalList.put_piece(piece_no1, capsq, st->capturedPiece);
-#endif  // defined(EVAL_NNUE)
+          if (use_nnue())
+          {
+              PieceId dp1 = st->dirtyPiece.pieceId[1];
+              assert(evalList.piece_with_id(dp1).fw == PS_NONE);
+              assert(evalList.piece_with_id(dp1).fb == PS_NONE);
+              evalList.put_piece(dp1, capsq, st->capturedPiece);
+          }
       }
   }
 
@@ -1042,9 +998,6 @@ void Position::undo_move(Move m) {
   --gamePly;
 
   assert(pos_is_ok());
-#if defined(EVAL_NNUE)
-  assert(evalList.is_valid(*this));
-#endif  // defined(EVAL_NNUE)
 }
 
 
@@ -1052,32 +1005,12 @@ void Position::undo_move(Move m) {
 /// is a bit tricky in Chess960 where from/to squares can overlap.
 template<bool Do>
 void Position::do_castling(Color us, Square from, Square& to, Square& rfrom, Square& rto) {
-#if defined(EVAL_NNUE)
-  auto& dp = st->dirtyPiece;
-   // Record the moved pieces in StateInfo for difference calculation.
-   dp.dirty_num = 2; // 2 pieces moved
-
-  PieceNumber piece_no0;
-  PieceNumber piece_no1;
-
-  if (Do) {
-    piece_no0 = piece_no_of(from);
-    piece_no1 = piece_no_of(to);
-  }
-#endif  // defined(EVAL_NNUE)
 
   bool kingSide = to > from;
   rfrom = to; // Castling is encoded as "king captures friendly rook"
   rto = relative_square(us, kingSide ? SQ_F1 : SQ_D1);
   to = relative_square(us, kingSide ? SQ_G1 : SQ_C1);
 
-#if defined(EVAL_NNUE)
-  if (!Do) {
-    piece_no0 = piece_no_of(to);
-    piece_no1 = piece_no_of(rto);
-  }
-#endif  // defined(EVAL_NNUE)
-
   // Remove both pieces first since squares could overlap in Chess960
   remove_piece(Do ? from : to);
   remove_piece(Do ? rfrom : rto);
@@ -1085,27 +1018,31 @@ void Position::do_castling(Color us, Square from, Square& to, Square& rfrom, Squ
   put_piece(make_piece(us, KING), Do ? to : from);
   put_piece(make_piece(us, ROOK), Do ? rto : rfrom);
 
-#if defined(EVAL_NNUE)
-  if (Do) {
-    dp.pieceNo[0] = piece_no0;
-    dp.changed_piece[0].old_piece = evalList.bona_piece(piece_no0);
-    evalList.piece_no_list_board[from] = PIECE_NUMBER_NB;
-    evalList.put_piece(piece_no0, to, make_piece(us, KING));
-    dp.changed_piece[0].new_piece = evalList.bona_piece(piece_no0);
+  if (use_nnue())
+  {
+    PieceId dp0, dp1;
+    auto& dp = st->dirtyPiece;
+    dp.dirty_num = 2; // 2 pieces moved
 
-    dp.pieceNo[1] = piece_no1;
-    dp.changed_piece[1].old_piece = evalList.bona_piece(piece_no1);
-    evalList.piece_no_list_board[rfrom] = PIECE_NUMBER_NB;
-    evalList.put_piece(piece_no1, rto, make_piece(us, ROOK));
-    dp.changed_piece[1].new_piece = evalList.bona_piece(piece_no1);
+    if (Do) {
+      dp0 = piece_id_on(from);
+      dp1 = piece_id_on(rfrom);
+      dp.pieceId[0] = dp0;
+      dp.old_piece[0] = evalList.piece_with_id(dp0);
+      evalList.put_piece(dp0, to, make_piece(us, KING));
+      dp.new_piece[0] = evalList.piece_with_id(dp0);
+      dp.pieceId[1] = dp1;
+      dp.old_piece[1] = evalList.piece_with_id(dp1);
+      evalList.put_piece(dp1, rto, make_piece(us, ROOK));
+      dp.new_piece[1] = evalList.piece_with_id(dp1);
+    }
+    else {
+      dp0 = piece_id_on(to);
+      dp1 = piece_id_on(rto);
+      evalList.put_piece(dp0, from, make_piece(us, KING));
+      evalList.put_piece(dp1, rfrom, make_piece(us, ROOK));
+    }
   }
-  else {
-    evalList.piece_no_list_board[to] = PIECE_NUMBER_NB;
-    evalList.put_piece(piece_no0, from, make_piece(us, KING));
-    evalList.piece_no_list_board[rto] = PIECE_NUMBER_NB;
-    evalList.put_piece(piece_no1, rfrom, make_piece(us, ROOK));
-  }
-#endif  // defined(EVAL_NNUE)
 }
 
 
@@ -1130,9 +1067,8 @@ void Position::do_null_move(StateInfo& newSt) {
   st->key ^= Zobrist::side;
   prefetch(TT.first_entry(st->key));
 
-#if defined(EVAL_NNUE)
-  st->accumulator.computed_score = false;
-#endif
+  if (use_nnue())
+      st->accumulator.computed_score = false;
 
   ++st->rule50;
   st->pliesFromNull = 0;
@@ -1388,7 +1324,7 @@ void Position::flip() {
   std::getline(ss, token); // Half and full moves
   f += token;
 
-  set(f, is_chess960(), st, this_thread());
+  set(f, is_chess960(), use_nnue(), st, this_thread());
 
   assert(pos_is_ok());
 }
@@ -1464,12 +1400,18 @@ bool Position::pos_is_ok() const {
   return true;
 }
 
-#if defined(EVAL_NNUE)
-PieceNumber Position::piece_no_of(Square sq) const
+StateInfo* Position::state() const {
+  return st;
+}
+
+const EvalList* Position::eval_list() const {
+  return &evalList;
+}
+
+PieceId Position::piece_id_on(Square sq) const
 {
   assert(piece_on(sq) != NO_PIECE);
-  PieceNumber n = evalList.piece_no_of_board(sq);
-  assert(is_ok(n));
-  return n;
+  PieceId pid = evalList.piece_id_list[sq];
+  assert(is_ok(pid));
+  return pid;
 }
-#endif  // defined(EVAL_NNUE)
diff --git a/src/position.h b/src/position.h
index 725be527..19642f6d 100644
--- a/src/position.h
+++ b/src/position.h
@@ -23,16 +23,14 @@
 
 #include <cassert>
 #include <deque>
-#include <iostream>
 #include <memory> // For std::unique_ptr
 #include <string>
 
 #include "bitboard.h"
 #include "evaluate.h"
-#include "misc.h"
 #include "types.h"
 
-#include "eval/nnue/nnue_accumulator.h"
+#include "nnue/nnue_accumulator.h"
 
 
 /// StateInfo struct stores information needed to restore a Position object to
@@ -60,12 +58,9 @@ struct StateInfo {
   Bitboard   checkSquares[PIECE_TYPE_NB];
   int        repetition;
 
-#if defined(EVAL_NNUE)
+  // Used by NNUE
   Eval::NNUE::Accumulator accumulator;
-
-   // For management of evaluation value difference calculation
-  Eval::DirtyPiece dirtyPiece;
-#endif  // defined(EVAL_NNUE)
+  DirtyPiece dirtyPiece;
 };
 
 
@@ -82,9 +77,6 @@ typedef std::unique_ptr<std::deque<StateInfo>> StateListPtr;
 /// traversing the search tree.
 class Thread;
 
-// packed sfen
-struct PackedSfen { uint8_t data[32]; };
-
 class Position {
 public:
   static void init();
@@ -94,7 +86,7 @@ public:
   Position& operator=(const Position&) = delete;
 
   // FEN string input/output
-  Position& set(const std::string& fenStr, bool isChess960, StateInfo* si, Thread* th);
+  Position& set(const std::string& fenStr, bool isChess960,  bool useNnue, StateInfo* si, Thread* th);
   Position& set(const std::string& code, Color c, StateInfo* si);
   const std::string fen() const;
 
@@ -165,6 +157,7 @@ public:
   Color side_to_move() const;
   int game_ply() const;
   bool is_chess960() const;
+  bool use_nnue() const;
   Thread* this_thread() const;
   bool is_draw(int ply) const;
   bool has_game_cycle(int ply) const;
@@ -178,36 +171,9 @@ public:
   bool pos_is_ok() const;
   void flip();
 
-#if defined(EVAL_NNUE) || defined(EVAL_LEARN)
-  // --- StateInfo
-
-  // Returns the StateInfo corresponding to the current situation.
-  // For example, if state()->capturedPiece, the pieces captured in the previous phase are stored.
-  StateInfo* state() const { return st; }
-
-  // Information such as where and which piece number is used for the evaluation function.
-  const Eval::EvalList* eval_list() const { return &evalList; }
-#endif  // defined(EVAL_NNUE) || defined(EVAL_LEARN)
-
-#if defined(EVAL_LEARN)
-  // --sfenization helper
-
-  // Get the packed sfen. Returns to the buffer specified in the argument.
-  // Do not include gamePly in pack.
-  void sfen_pack(PackedSfen& sfen);
-
-  // �� It is slow to go through sfen, so I made a function to set packed sfen directly.
-  // Equivalent to pos.set(sfen_unpack(data),si,th);.
-  // If there is a problem with the passed phase and there is an error, non-zero is returned.
-  // PackedSfen does not include gamePly so it cannot be restored. If you want to set it, specify it with an argument.
-  int set_from_packed_sfen(const PackedSfen& sfen, StateInfo* si, Thread* th, bool mirror = false);
-
-  // Give the board, hand piece, and turn, and return the sfen.
-  //static std::string sfen_from_rawdata(Piece board[81], Hand hands[2], Color turn, int gamePly);
-
-  // Returns the position of the ball on the c side.
-  Square king_square(Color c) const { return pieceList[make_piece(c, KING)][0]; }
-#endif // EVAL_LEARN
+  // Used by NNUE
+  StateInfo* state() const;
+  const EvalList* eval_list() const;
 
 private:
   // Initialization helpers (used while setting up a position)
@@ -222,10 +188,8 @@ private:
   template<bool Do>
   void do_castling(Color us, Square from, Square& to, Square& rfrom, Square& rto);
 
-#if defined(EVAL_NNUE)
-  // Returns the PieceNumber of the piece in the sq box on the board.
-  PieceNumber piece_no_of(Square sq) const;
-#endif  // defined(EVAL_NNUE)
+  // ID of a piece on a given square
+  PieceId piece_id_on(Square sq) const;
 
   // Data members
   Piece board[SQUARE_NB];
@@ -243,11 +207,10 @@ private:
   Thread* thisThread;
   StateInfo* st;
   bool chess960;
+  bool nnue;
 
-#if defined(EVAL_NNUE) || defined(EVAL_LEARN)
-  // List of pieces used in the evaluation function
-  Eval::EvalList evalList;
-#endif  // defined(EVAL_NNUE) || defined(EVAL_LEARN)
+  // List of pieces used in NNUE evaluation function
+  EvalList evalList;
 };
 
 namespace PSQT {
@@ -413,6 +376,10 @@ inline bool Position::is_chess960() const {
   return chess960;
 }
 
+inline bool Position::use_nnue() const {
+  return nnue;
+}
+
 inline bool Position::capture_or_promotion(Move m) const {
   assert(is_ok(m));
   return type_of(m) != NORMAL ? type_of(m) != CASTLING : !empty(to_sq(m));
diff --git a/src/search.cpp b/src/search.cpp
index 3ccc1d89..17ccab92 100644
--- a/src/search.cpp
+++ b/src/search.cpp
@@ -925,9 +925,12 @@ namespace {
 
                 if (value >= probcutBeta)
                 {
-                    tte->save(posKey, value_to_tt(value, ss->ply), ttPv,
-                        BOUND_LOWER,
-                        depth - 3, move, ss->staticEval);
+                    if ( !(ttHit
+                       && tte->depth() >= depth - 3
+                       && ttValue != VALUE_NONE))
+                        tte->save(posKey, value_to_tt(value, ss->ply), ttPv,
+                            BOUND_LOWER,
+                            depth - 3, move, ss->staticEval);
                     return value;
                 }
             }
@@ -985,7 +988,7 @@ moves_loop: // When in check, search starts from here
 
       ss->moveCount = ++moveCount;
 
-      if (rootNode && thisThread == Threads.main() && Time.elapsed() > 3000 && !Limits.silent)
+      if (rootNode && thisThread == Threads.main() && Time.elapsed() > 3000)
           sync_cout << "info depth " << depth
                     << " currmove " << UCI::move(move, pos.is_chess960())
                     << " currmovenumber " << moveCount + thisThread->pvIdx << sync_endl;
@@ -1553,13 +1556,7 @@ moves_loop: // When in check, search starts from here
       prefetch(TT.first_entry(pos.key_after(move)));
 
       // Check for legality just before making the move
-      if (
-#if defined(EVAL_LEARN)
-        // HACK: pos.piece_on(from_sq(m)) sometimes will be NO_PIECE during machine learning.
-        !pos.pseudo_legal(move) ||
-#endif // EVAL_LEARN
-        !pos.legal(move)
-        )
+      if (!pos.legal(move))
       {
           moveCount--;
           continue;
@@ -1959,316 +1956,3 @@ void Tablebases::rank_root_moves(Position& pos, Search::RootMoves& rootMoves) {
             m.tbRank = 0;
     }
 }
-
-// --- expose the functions such as fixed depth search used for learning to the outside
-
-#if defined (EVAL_LEARN)
-
-namespace Learner
-{
-  // For learning, prepare a stub that can call search,qsearch() from one thread.
-  // From now on, it is better to have a Searcher and prepare a substitution table for each thread like Apery.
-  // It might have been good.
-
-  // Initialization for learning.
-  // Called from Learner::search(),Learner::qsearch().
-  void init_for_search(Position& pos, Stack* ss)
-  {
-
-    // RootNode requires ss->ply == 0.
-    // Because it clears to zero, ss->ply == 0, so it's okay...
-
-    std::memset(ss - 7, 0, 10 * sizeof(Stack));
-
-    // About Search::Limits
-    // Be careful because this member variable is global and affects other threads.
-    {
-      auto& limits = Search::Limits;
-
-      // Make the search equivalent to the "go infinite" command. (Because it is troublesome if time management is done)
-      limits.infinite = true;
-
-      // Since PV is an obstacle when displayed, erase it.
-      limits.silent = true;
-
-      // If you use this, it will be compared with the accumulated nodes of each thread. Therefore, do not use it.
-      limits.nodes = 0;
-
-      // depth is also processed by the one passed as an argument of Learner::search().
-      limits.depth = 0;
-
-      // Set a large value to prevent the draw value from being returned due to the number of moves near the draw.
-      //limits.max_game_ply = 1 << 16;
-
-      // If you do not include the ball entry rule, it will be a draw and it will be difficult to settle.
-      //limits.enteringKingRule = EnteringKingRule::EKR_27_POINT;
-    }
-
-    // Set DrawValue
-    {
-      // Because it is not prepared for each thread
-      // May be overwritten by another thread. There is no help for it.
-      // If that happens, I think it should be 0.
-      //drawValueTable[REPETITION_DRAW][BLACK] = VALUE_ZERO;
-      //drawValueTable[REPETITION_DRAW][WHITE] = VALUE_ZERO;
-    }
-
-    // Regarding this_thread.
-
-    {
-      auto th = pos.this_thread();
-
-      th->completedDepth = 0;
-      th->selDepth = 0;
-      th->rootDepth = 0;
-
-	  // Zero initialization of the number of search nodes
-      th->nodes = 0;
-
-      // Clear all history types. This initialization takes a little time, and the accuracy of the search is rather low, so the good and bad are not well understood.
-      // th->clear();
-
-      int ct = int(Options["Contempt"]) * PawnValueEg / 100; // From centipawns
-      Color us = pos.side_to_move();
-
-      // In analysis mode, adjust contempt in accordance with user preference
-      if (Limits.infinite || Options["UCI_AnalyseMode"])
-        ct = Options["Analysis Contempt"] == "Off" ? 0
-        : Options["Analysis Contempt"] == "Both" ? ct
-        : Options["Analysis Contempt"] == "White" && us == BLACK ? -ct
-        : Options["Analysis Contempt"] == "Black" && us == WHITE ? -ct
-        : ct;
-
-      // Evaluation score is from the white point of view
-      th->contempt = (us == WHITE ? make_score(ct, ct / 2)
-        : -make_score(ct, ct / 2));
-
-      for (int i = 7; i > 0; i--)
-          (ss - i)->continuationHistory = &th->continuationHistory[0][0][NO_PIECE][0]; // Use as a sentinel
-
- // set rootMoves
-      auto& rootMoves = th->rootMoves;
-
-      rootMoves.clear();
-      for (auto m: MoveList<LEGAL>(pos))
-        rootMoves.push_back(Search::RootMove(m));
-
-      assert(!rootMoves.empty());
-
-      //#if defined(USE_GLOBAL_OPTIONS)
-      // Since the generation of the substitution table for each search thread should be managed,
-      // Increase the generation of the substitution table for this thread because it is a new search.
-            //TT.new_search(th->thread_id());
-
-            // ↑ If you call new_search here, it may be a loss because you can't use the previous search result.
-            // Do not do this here, but caller should do TT.new_search(th->thread_id()) for each station ...
-
-            // →Because we want to avoid reaching the same final diagram, use the substitution table commonly for all threads when generating teachers.
-      //#endif
-    }
-  }
-
-  // A pair of reader and evaluation value. Returned by Learner::search(),Learner::qsearch().
-  typedef std::pair<Value, std::vector<Move> > ValueAndPV;
-
-  // Stationary search.
-  //
-  // Precondition) Search thread is set by pos.set_this_thread(Threads[thread_id]).
-  // Also, when Threads.stop arrives, the search is interrupted, so the PV at that time is not correct.
-  // After returning from search(), if Threads.stop == true, do not use the search result.
-  // Also, note that before calling, if you do not call it with Threads.stop == false, the search will be interrupted and it will return.
-  //
-  // If it is clogged, MOVE_RESIGN is returned in the PV array.
-  //
-  //Although it was possible to specify alpha and beta with arguments, this will show the result when searching in that window
-  // Because it writes to the substitution table, the value that can be pruned is written to that window when learning
-  // As it has a bad effect, I decided to stop allowing the window range to be specified.
-  ValueAndPV qsearch(Position& pos)
-  {
-    Stack stack[MAX_PLY + 10], * ss = stack + 7;
-    Move pv[MAX_PLY + 1];
-
-    init_for_search(pos, ss);
-    ss->pv = pv; // For the time being, it must be a dummy and somewhere with a buffer.
-
-    if (pos.is_draw(0)) {
-      // Return draw value if draw.
-      return { VALUE_DRAW, {} };
-    }
-
-    // Is it stuck?
-    if (MoveList<LEGAL>(pos).size() == 0)
-    {
-      // Return the mated value if checkmated.
-      return { mated_in(/*ss->ply*/ 0 + 1), {} };
-    }
-
-    auto bestValue = ::qsearch<PV>(pos, ss, -VALUE_INFINITE, VALUE_INFINITE, 0);
-
-  // Returns the PV obtained.
-    std::vector<Move> pvs;
-    for (Move* p = &ss->pv[0]; is_ok(*p); ++p)
-      pvs.push_back(*p);
-
-    return ValueAndPV(bestValue, pvs);
-  }
-
-  // Normal search. Depth depth (specified as an integer).
-  // 3 If you want a score for hand reading,
-  // auto v = search(pos,3);
-  // Do something like
-  // Evaluation value is obtained in v.first and PV is obtained in v.second.
-  // When multi pv is enabled, you can get the PV (reading line) array in pos.this_thread()->rootMoves[N].pv.
-  // Specify multi pv with the argument multiPV of this function. (The value of Options["MultiPV"] is ignored)
-  //
-  // Declaration win judgment is not done as root (because it is troublesome to handle), so it is not done here.
-  // Handle it by the caller.
-  //
-  // Precondition) Search thread is set by pos.set_this_thread(Threads[thread_id]).
-  // Also, when Threads.stop arrives, the search is interrupted, so the PV at that time is not correct.
-  // After returning from search(), if Threads.stop == true, do not use the search result.
-  // Also, note that before calling, if you do not call it with Threads.stop == false, the search will be interrupted and it will return.
-
-  ValueAndPV search(Position& pos, int depth_, size_t multiPV /* = 1 */, uint64_t nodesLimit /* = 0 */)
-  {
-    std::vector<Move> pvs;
-
-    Depth depth = depth_;
-    if (depth < 0)
-      return std::pair<Value, std::vector<Move>>(Eval::evaluate(pos), std::vector<Move>());
-
-    if (depth == 0)
-      return qsearch(pos);
-
-    Stack stack[MAX_PLY + 10], * ss = stack + 7;
-    Move pv[MAX_PLY + 1];
-
-    init_for_search(pos, ss);
-
-	ss->pv = pv; // For the time being, it must be a dummy and somewhere with a buffer.
-
-    // Initialize the variables related to this_thread
-    auto th = pos.this_thread();
-    auto& rootDepth = th->rootDepth;
-    auto& pvIdx = th->pvIdx;
-    auto& pvLast = th->pvLast;
-    auto& rootMoves = th->rootMoves;
-    auto& completedDepth = th->completedDepth;
-    auto& selDepth = th->selDepth;
-
-     // A function to search the top N of this stage as best move
-     //size_t multiPV = Options["MultiPV"];
-
-     // Do not exceed the number of moves in this situation
-    multiPV = std::min(multiPV, rootMoves.size());
-
-     // If you do not multiply the node limit by the value of MultiPV, you will not be thinking about the same node for one candidate hand when you fix the depth and have MultiPV.
-    nodesLimit *= multiPV;
-
-    Value alpha = -VALUE_INFINITE;
-    Value beta = VALUE_INFINITE;
-    Value delta = -VALUE_INFINITE;
-    Value bestValue = -VALUE_INFINITE;
-
-    while ((rootDepth += 1) <= depth
-	  // exit this loop even if the node limit is exceeded
-      // The number of search nodes is passed in the argument of this function.
-      && !(nodesLimit /* limited nodes */ && th->nodes.load(std::memory_order_relaxed) >= nodesLimit)
-      )
-    {
-      for (RootMove& rm : rootMoves)
-        rm.previousScore = rm.score;
-
-      size_t pvFirst = 0;
-      pvLast = 0;
-
-      // MultiPV loop. We perform a full root search for each PV line
-      for (pvIdx = 0; pvIdx < multiPV && !Threads.stop; ++pvIdx)
-      {
-        if (pvIdx == pvLast)
-        {
-          pvFirst = pvLast;
-          for (pvLast++; pvLast < rootMoves.size(); pvLast++)
-            if (rootMoves[pvLast].tbRank != rootMoves[pvFirst].tbRank)
-              break;
-        }
-
-	    // selDepth output with USI info for each depth and PV line
-        selDepth = 0;
-
-        // Switch to aspiration search for depth 5 and above.
-        if (rootDepth >= 5 * 1)
-        {
-          delta = Value(20);
-
-          Value p = rootMoves[pvIdx].previousScore;
-
-          alpha = std::max(p - delta, -VALUE_INFINITE);
-          beta = std::min(p + delta, VALUE_INFINITE);
-        }
-
-        // aspiration search
-        int failedHighCnt = 0;
-        while (true)
-        {
-          Depth adjustedDepth = std::max(1, rootDepth - failedHighCnt * 1);
-          bestValue = ::search<PV>(pos, ss, alpha, beta, adjustedDepth, false);
-
-          stable_sort(rootMoves.begin() + pvIdx, rootMoves.end());
-          //my_stable_sort(pos.this_thread()->thread_id(),&rootMoves[0] + pvIdx, rootMoves.size() - pvIdx);
-
-		  // Expand aspiration window for fail low/high.
-          // However, if it is the value specified by the argument, it will be treated as fail low/high and break.
-          if (bestValue <= alpha)
-          {
-            beta = (alpha + beta) / 2;
-            alpha = std::max(bestValue - delta, -VALUE_INFINITE);
-
-            failedHighCnt = 0;
-            //if (mainThread)
-            //    mainThread->stopOnPonderhit = false;
-
-          }
-          else if (bestValue >= beta)
-          {
-            beta = std::min(bestValue + delta, VALUE_INFINITE);
-            ++failedHighCnt;
-          }
-          else
-            break;
-
-          delta += delta / 4 + 5;
-          assert(-VALUE_INFINITE <= alpha && beta <= VALUE_INFINITE);
-
-          // runaway check
-          //assert(th->nodes.load(std::memory_order_relaxed) <= 1000000 );
-        }
-
-        stable_sort(rootMoves.begin(), rootMoves.begin() + pvIdx + 1);
-        //my_stable_sort(pos.this_thread()->thread_id() , &rootMoves[0] , pvIdx + 1);
-
-      } // multi PV
-
-      completedDepth = rootDepth;
-    }
-
-    // Pass PV_is(ok) to eliminate this PV, there may be NULL_MOVE in the middle.
-    // → PV should not be NULL_MOVE because it is PV
-    // MOVE_WIN has never been thrust. (For now)
-    for (Move move : rootMoves[0].pv)
-    {
-      if (!is_ok(move))
-        break;
-      pvs.push_back(move);
-    }
-
-    //sync_cout << rootDepth << sync_endl;
-
-    // Considering multiPV, the score of rootMoves[0] is returned as bestValue.
-    bestValue = rootMoves[0].score;
-
-    return ValueAndPV(bestValue, pvs);
-  }
-
-}
-#endif
diff --git a/src/search.h b/src/search.h
index eae1cafc..3e855c8b 100644
--- a/src/search.h
+++ b/src/search.h
@@ -88,7 +88,6 @@ struct LimitsType {
     time[WHITE] = time[BLACK] = inc[WHITE] = inc[BLACK] = npmsec = movetime = TimePoint(0);
     movestogo = depth = mate = perft = infinite = 0;
     nodes = 0;
-    silent = false;
   }
 
   bool use_time_management() const {
@@ -99,9 +98,6 @@ struct LimitsType {
   TimePoint time[COLOR_NB], inc[COLOR_NB], npmsec, movetime, startTime;
   int movestogo, depth, mate, perft, infinite;
   int64_t nodes;
-  // Silent mode that does not output to the screen (for continuous self-play in process)
-  // Do not output PV at this time.
-  bool silent;
 };
 
 extern LimitsType Limits;
diff --git a/src/thread.cpp b/src/thread.cpp
index a0ee2b25..d72bef81 100644
--- a/src/thread.cpp
+++ b/src/thread.cpp
@@ -216,7 +216,7 @@ void ThreadPool::start_thinking(Position& pos, StateListPtr& states,
       th->nodes = th->tbHits = th->nmpMinPly = th->bestMoveChanges = 0;
       th->rootDepth = th->completedDepth = 0;
       th->rootMoves = rootMoves;
-      th->rootPos.set(pos.fen(), pos.is_chess960(), &setupStates->back(), th);
+      th->rootPos.set(pos.fen(), pos.is_chess960(), pos.use_nnue(), &setupStates->back(), th);
   }
 
   setupStates->back() = tmp;
diff --git a/src/tt.cpp b/src/tt.cpp
index cfbb2ae6..34590903 100644
--- a/src/tt.cpp
+++ b/src/tt.cpp
@@ -116,9 +116,6 @@ void TranspositionTable::clear() {
 /// TTEntry t2 if its replace value is greater than that of t2.
 
 TTEntry* TranspositionTable::probe(const Key key, bool& found) const {
-#if defined(DISABLE_TT)
-  return found = false, first_entry(0);
-#else
 
   TTEntry* const tte = first_entry(key);
   const uint16_t key16 = (uint16_t)key;  // Use the low 16 bits as key inside the cluster
@@ -143,7 +140,6 @@ TTEntry* TranspositionTable::probe(const Key key, bool& found) const {
           replace = &tte[i];
 
   return found = false, replace;
-#endif
 }
 
 
diff --git a/src/types.h b/src/types.h
index 773d9247..f8c32e9e 100644
--- a/src/types.h
+++ b/src/types.h
@@ -40,7 +40,6 @@
 
 #include <cassert>
 #include <cctype>
-#include <climits>
 #include <cstdint>
 #include <cstdlib>
 #include <algorithm>
@@ -132,8 +131,6 @@ enum Color {
   WHITE, BLACK, COLOR_NB = 2
 };
 
-constexpr Color Colors[2] = { WHITE, BLACK };
-
 enum CastlingRights {
   NO_CASTLING,
   WHITE_OO,
@@ -190,10 +187,7 @@ enum Value : int {
   QueenValueMg  = 2538,  QueenValueEg  = 2682,
   Tempo = 28,
 
-  MidgameLimit  = 15258, EndgameLimit  = 3915,
-
-// Maximum value returned by the evaluation function (I want it to be around 2**14..)
-  VALUE_MAX_EVAL = 27000,
+  MidgameLimit  = 15258, EndgameLimit  = 3915
 };
 
 enum PieceType {
@@ -209,6 +203,21 @@ enum Piece {
   PIECE_NB = 16
 };
 
+// An ID used to track the pieces. Max. 32 pieces on board.
+enum PieceId {
+  PIECE_ID_ZERO   = 0,
+  PIECE_ID_KING   = 30,
+  PIECE_ID_WKING  = 30,
+  PIECE_ID_BKING  = 31,
+  PIECE_ID_NONE   = 32
+};
+
+inline PieceId operator++(PieceId& d, int) {
+  PieceId x = d;
+  d = PieceId(int(d) + 1);
+  return x;
+}
+
 constexpr Value PieceValue[PHASE_NB][PIECE_NB] = {
   { VALUE_ZERO, PawnValueMg, KnightValueMg, BishopValueMg, RookValueMg, QueenValueMg, VALUE_ZERO, VALUE_ZERO,
     VALUE_ZERO, PawnValueMg, KnightValueMg, BishopValueMg, RookValueMg, QueenValueMg, VALUE_ZERO, VALUE_ZERO },
@@ -238,8 +247,8 @@ enum Square : int {
   SQ_A8, SQ_B8, SQ_C8, SQ_D8, SQ_E8, SQ_F8, SQ_G8, SQ_H8,
   SQ_NONE,
 
-  SQUARE_ZERO = 0, SQUARE_NB = 64,
-  SQUARE_NB_PLUS1 = SQUARE_NB + 1, // If there are no balls, it is treated as having moved to SQUARE_NB, so it may be necessary to secure the array with SQUARE_NB+1, so this constant is used.
+  SQUARE_ZERO = 0,
+  SQUARE_NB   = 64
 };
 
 enum Direction : int {
@@ -262,6 +271,101 @@ enum Rank : int {
   RANK_1, RANK_2, RANK_3, RANK_4, RANK_5, RANK_6, RANK_7, RANK_8, RANK_NB
 };
 
+// unique number for each piece type on each square
+enum PieceSquare : uint32_t
+{
+    PS_NONE     =  0,
+    PS_W_PAWN   =  1,
+    PS_B_PAWN   =  1 * SQUARE_NB + 1,
+    PS_W_KNIGHT =  2 * SQUARE_NB + 1,
+    PS_B_KNIGHT =  3 * SQUARE_NB + 1,
+    PS_W_BISHOP =  4 * SQUARE_NB + 1,
+    PS_B_BISHOP =  5 * SQUARE_NB + 1,
+    PS_W_ROOK   =  6 * SQUARE_NB + 1,
+    PS_B_ROOK   =  7 * SQUARE_NB + 1,
+    PS_W_QUEEN  =  8 * SQUARE_NB + 1,
+    PS_B_QUEEN  =  9 * SQUARE_NB + 1,
+    PS_W_KING   = 10 * SQUARE_NB + 1,
+    PS_END      = PS_W_KING, // pieces without kings (pawns included)
+    PS_B_KING   = 11 * SQUARE_NB + 1,
+    PS_END2     = 12 * SQUARE_NB + 1
+};
+
+struct ExtPieceSquare
+{
+    PieceSquare from[COLOR_NB];
+
+    ExtPieceSquare() {}
+    ExtPieceSquare(PieceSquare fw, PieceSquare fb) : from{fw, fb} {}
+};
+
+// Array for finding the PieceSquare corresponding to the piece on the board
+extern ExtPieceSquare kpp_board_index[PIECE_NB];
+
+// Structure holding which tracked piece (PieceId) is where (PieceSquare)
+class EvalList
+{
+    // Return relative square when turning the board 180 degrees
+    constexpr Square rotate180(Square sq) {
+      return (Square)(sq ^ 63);
+    }
+
+public:
+    // Max. number of pieces without kings is 30 but must be a multiple of 4 in AVX2
+    static const int MAX_LENGTH = 32;
+
+    // Array that holds the piece id for the pieces on the board
+    PieceId piece_id_list[SQUARE_NB];
+
+    // List of pieces, separate from White and Black POV
+    PieceSquare* piece_list_fw() const { return const_cast<PieceSquare*>(pieceListFw); }
+    PieceSquare* piece_list_fb() const { return const_cast<PieceSquare*>(pieceListFb); }
+
+    // Place the piece pc with piece_id on the square sq on the board
+    void put_piece(PieceId piece_id, Square sq, Piece pc)
+    {
+        assert(is_ok(piece_id));
+        if (pc != NO_PIECE)
+        {
+          pieceListFw[piece_id] = PieceSquare(kpp_board_index[pc].from[WHITE] + sq);
+          pieceListFb[piece_id] = PieceSquare(kpp_board_index[pc].from[BLACK] + rotate180(sq));
+          piece_id_list[sq] = piece_id;
+        }
+        else
+        {
+          pieceListFw[piece_id] = PS_NONE;
+          pieceListFb[piece_id] = PS_NONE;
+          piece_id_list[sq] = piece_id;
+        }
+    }
+
+    // Convert the specified piece_id piece to ExtPieceSquare type and return it
+    ExtPieceSquare piece_with_id(PieceId piece_id) const
+    {
+        ExtPieceSquare eps;
+        eps.from[WHITE] = pieceListFw[piece_id];
+        eps.from[BLACK] = pieceListFb[piece_id];
+        return eps;
+    }
+
+private:
+    PieceSquare pieceListFw[MAX_LENGTH];
+    PieceSquare pieceListFb[MAX_LENGTH];
+};
+
+// For differential evaluation of pieces that changed since last turn
+struct DirtyPiece
+{
+    // Number of changed pieces
+    int dirty_num;
+
+    // The ids of changed pieces, max. 2 pieces can change in one move
+    PieceId pieceId[2];
+
+    // What changed from the piece with that piece number
+    ExtPieceSquare old_piece[2];
+    ExtPieceSquare new_piece[2];
+};
 
 /// Score enum stores a middlegame and an endgame value in a single integer (enum).
 /// The least significant 16 bits are used to store the middlegame value and the
@@ -287,10 +391,10 @@ inline Value mg_value(Score s) {
 }
 
 #define ENABLE_BASE_OPERATORS_ON(T)                                \
-constexpr T operator+(T d1, int d2) { return T(int(d1) + d2); } \
-constexpr T operator-(T d1, int d2) { return T(int(d1) - d2); } \
+constexpr T operator+(T d1, int d2) { return T(int(d1) + d2); }    \
+constexpr T operator-(T d1, int d2) { return T(int(d1) - d2); }    \
 constexpr T operator-(T d) { return T(-int(d)); }                  \
-inline T& operator+=(T& d1, int d2) { return d1 = d1 + d2; }         \
+inline T& operator+=(T& d1, int d2) { return d1 = d1 + d2; }       \
 inline T& operator-=(T& d1, int d2) { return d1 = d1 - d2; }
 
 #define ENABLE_INCR_OPERATORS_ON(T)                                \
@@ -309,8 +413,10 @@ inline T& operator/=(T& d, int i) { return d = T(int(d) / i); }
 ENABLE_FULL_OPERATORS_ON(Value)
 ENABLE_FULL_OPERATORS_ON(Direction)
 
-ENABLE_INCR_OPERATORS_ON(PieceType)
 ENABLE_INCR_OPERATORS_ON(Piece)
+ENABLE_INCR_OPERATORS_ON(PieceSquare)
+ENABLE_INCR_OPERATORS_ON(PieceId)
+ENABLE_INCR_OPERATORS_ON(PieceType)
 ENABLE_INCR_OPERATORS_ON(Square)
 ENABLE_INCR_OPERATORS_ON(File)
 ENABLE_INCR_OPERATORS_ON(Rank)
@@ -398,6 +504,10 @@ inline Color color_of(Piece pc) {
   return Color(pc >> 3);
 }
 
+constexpr bool is_ok(PieceId pid) { 
+  return pid < PIECE_ID_NONE;
+}
+
 constexpr bool is_ok(Square s) {
   return s >= SQ_A1 && s <= SQ_H8;
 }
@@ -463,44 +573,6 @@ constexpr bool is_ok(Move m) {
   return from_sq(m) != to_sq(m); // Catch MOVE_NULL and MOVE_NONE
 }
 
-// Return squares when turning the board 180��
-constexpr Square Inv(Square sq) { return (Square)((SQUARE_NB - 1) - sq); }
-
-// Return squares when mirroring the board
-constexpr Square Mir(Square sq) { return make_square(File(7 - (int)file_of(sq)), rank_of(sq)); }
-
-#if defined(EVAL_NNUE) || defined(EVAL_LEARN)
-// --------------------
-// 		piece box
-// --------------------
-
-// A number used to manage the piece list (which piece is where) used in the Position class.
-enum PieceNumber : uint8_t
-{
-	PIECE_NUMBER_PAWN = 0,
-	PIECE_NUMBER_KNIGHT = 16,
-	PIECE_NUMBER_BISHOP = 20,
-	PIECE_NUMBER_ROOK = 24,
-	PIECE_NUMBER_QUEEN = 28,
-	PIECE_NUMBER_KING = 30,
-	PIECE_NUMBER_WKING = 30,
-	PIECE_NUMBER_BKING = 31, // Use this if you need the numbers of the first and second balls
-	PIECE_NUMBER_ZERO = 0,
-	PIECE_NUMBER_NB = 32,
-};
-
-inline PieceNumber& operator++(PieceNumber& d) { return d = PieceNumber(int8_t(d) + 1); }
-inline PieceNumber operator++(PieceNumber& d, int) {
-  PieceNumber x = d;
-  d = PieceNumber(int8_t(d) + 1);
-  return x;
-}
-inline PieceNumber& operator--(PieceNumber& d) { return d = PieceNumber(int8_t(d) - 1); }
-
-// Piece Number integrity check. for assert.
-constexpr bool is_ok(PieceNumber pn) { return pn < PIECE_NUMBER_NB; }
-#endif  // defined(EVAL_NNUE) || defined(EVAL_LEARN)
-
 /// Based on a congruential pseudo random number generator
 constexpr Key make_key(uint64_t seed) {
   return seed * 6364136223846793005ULL + 1442695040888963407ULL;
diff --git a/src/uci.cpp b/src/uci.cpp
index 6d86ebca..c2160773 100644
--- a/src/uci.cpp
+++ b/src/uci.cpp
@@ -34,55 +34,16 @@
 #include "uci.h"
 #include "syzygy/tbprobe.h"
 
-#if defined(EVAL_NNUE) && defined(ENABLE_TEST_CMD)
-#include "eval/nnue/nnue_test_command.h"
-#endif
-
 using namespace std;
 
 extern vector<string> setup_bench(const Position&, istream&);
 
-// FEN string of the initial position, normal chess
-const char* StartFEN = "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1";
-
-// Command to automatically generate a game record
-#if defined (EVAL_LEARN)
-namespace Learner
-{
-  // Automatic generation of teacher position
-  void gen_sfen(Position& pos, istringstream& is);
-
-  // Learning from the generated game record
-  void learn(Position& pos, istringstream& is);
-
-#if defined(GENSFEN2019)
-  // Automatic generation command of teacher phase under development
-  void gen_sfen2019(Position& pos, istringstream& is);
-#endif
-
-  // A pair of reader and evaluation value. Returned by Learner::search(),Learner::qsearch().
-  typedef std::pair<Value, std::vector<Move> > ValueAndPV;
-
-  ValueAndPV qsearch(Position& pos);
-  ValueAndPV search(Position& pos, int depth_, size_t multiPV = 1, uint64_t nodesLimit = 0);
-
-}
-#endif
-
-#if defined(EVAL_NNUE) && defined(ENABLE_TEST_CMD)
-void test_cmd(Position& pos, istringstream& is)
-{
-    // Initialize as it may be searched.
-    init_nnue();
-
-    std::string param;
-    is >> param;
-
-    if (param == "nnue") Eval::NNUE::TestCommand(pos, is);
-}
-#endif
-
 namespace {
+
+  // FEN string of the initial position, normal chess
+  const char* StartFEN = "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1";
+
+
   // position() is called when engine receives the "position" UCI command.
   // The function sets up the position described in the given FEN string ("fen")
   // or the starting position ("startpos") and then makes the moves given in the
@@ -107,7 +68,7 @@ namespace {
         return;
 
     states = StateListPtr(new std::deque<StateInfo>(1)); // Drop old and create a new one
-    pos.set(fen, Options["UCI_Chess960"], &states->back(), Threads.main());
+    pos.set(fen, Options["UCI_Chess960"], Options["Use NNUE"], &states->back(), Threads.main());
 
     // Parse move list (if any)
     while (is >> token && (m = UCI::to_move(pos, token)) != MOVE_NONE)
@@ -211,11 +172,9 @@ namespace {
         else if (token == "position")   position(pos, is, states);
         else if (token == "ucinewgame")
         {
-#if defined(EVAL_NNUE)
-            init_nnue();
-#endif
+            init_nnue(Options["EvalFile"]);
             Search::clear();
-            elapsed = now(); // Search::clear() may take some while
+            elapsed = now(); // initialization may take some time
         }
     }
 
@@ -251,86 +210,20 @@ namespace {
      return int(0.5 + 1000 / (1 + std::exp((a - x) / b)));
   }
 
-// When you calculate check sum, save it and check the consistency later.
-  uint64_t eval_sum;
 } // namespace
 
-// Make is_ready_cmd() callable from outside. (Because I want to call it from the bench command etc.)
-// Note that the phase is not initialized.
-void init_nnue(bool skipCorruptCheck)
+
+void UCI::init_nnue(const std::string& evalFile)
 {
-#if defined(EVAL_NNUE)
-  // After receiving "isready", modify so that a line feed is sent every 5 seconds until "readyok" is returned. (keep alive processing)
-  // From USI 2.0 specifications.
-  // -The time out time after "is ready" is about 30 seconds. Beyond this, if you want to initialize the evaluation function and secure the hash table,
-  // You should send some kind of message (breakable) from the thinking engine side.
-  // -Shogi GUI already does so, so MyShogi will follow along.
-  //-Also, the engine side of Yaneura King modifies it so that after "isready" is received, a line feed is sent every 5 seconds until "readyok" is returned.
-
-  // Perform processing that may take time, such as reading the evaluation function, at this timing.
-  // If you do a time-consuming process at startup, Shogi place will make a timeout judgment and retire the recognition as a thinking engine.
-  if (!UCI::load_eval_finished)
+  if (UCI::use_nnue && !UCI::load_eval_finished)
   {
-      // Read evaluation function
-      Eval::load_eval();
-
-      // Calculate and save checksum (to check for subsequent memory corruption)
-      eval_sum = Eval::calc_check_sum();
-
-      // display soft name
-      Eval::print_softname(eval_sum);
-
+      // Load evaluation function from a file
+      Eval::NNUE::load_eval(evalFile);
       UCI::load_eval_finished = true;
   }
-  else
-  {
-      // Check the checksum every time to see if the memory has been corrupted.
-      // It seems that the time is a little wasteful, but it is good because it is about 0.1 seconds.
-      if (!skipCorruptCheck && eval_sum != Eval::calc_check_sum())
-          sync_cout << "Error! : EVAL memory is corrupted" << sync_endl;
-  }
-#endif  // defined(EVAL_NNUE)
 }
 
 
-// --------------------
-// Call qsearch(),search() directly for testing
-// --------------------
-
-#if defined(EVAL_LEARN)
-void qsearch_cmd(Position& pos)
-{
-  cout << "qsearch : ";
-  auto pv = Learner::qsearch(pos);
-  cout << "Value = " << pv.first << " , " << UCI::value(pv.first) << " , PV = ";
-  for (auto m : pv.second)
-    cout << UCI::move(m, false) << " ";
-  cout << endl;
-}
-
-void search_cmd(Position& pos, istringstream& is)
-{
-  string token;
-  int depth = 1;
-  int multi_pv = (int)Options["MultiPV"];
-  while (is >> token)
-  {
-    if (token == "depth")
-      is >> depth;
-    if (token == "multipv")
-      is >> multi_pv;
-  }
-
-  cout << "search depth = " << depth << " , multi_pv = " << multi_pv << " : ";
-  auto pv = Learner::search(pos, depth, multi_pv);
-  cout << "Value = " << pv.first << " , " << UCI::value(pv.first) << " , PV = ";
-  for (auto m : pv.second)
-    cout << UCI::move(m, false) << " ";
-  cout << endl;
-}
-
-#endif
-
 /// UCI::loop() waits for a command from stdin, parses it and calls the appropriate
 /// function. Also intercepts EOF from stdin to ensure gracefully exiting if the
 /// GUI dies unexpectedly. When called with some command line arguments, e.g. to
@@ -343,7 +236,7 @@ void UCI::loop(int argc, char* argv[]) {
   string token, cmd;
   StateListPtr states(new std::deque<StateInfo>(1));
 
-  pos.set(StartFEN, false, &states->back(), Threads.main());
+  pos.set(StartFEN, false, pos.use_nnue(), &states->back(), Threads.main());
 
   for (int i = 1; i < argc; ++i)
       cmd += std::string(argv[i]) + " ";
@@ -378,12 +271,14 @@ void UCI::loop(int argc, char* argv[]) {
       else if (token == "position")   position(pos, is, states);
       else if (token == "ucinewgame")
       {
-#if defined(EVAL_NNUE)
-          init_nnue();
-#endif
+          init_nnue(Options["EvalFile"]);
           Search::clear();
       }
-      else if (token == "isready")    sync_cout << "readyok" << sync_endl;
+      else if (token == "isready")
+      {
+          init_nnue(Options["EvalFile"]);
+          sync_cout << "readyok" << sync_endl;
+      }
 
       // Additional custom non-UCI commands, mainly for debugging.
       // Do not use these commands during a search!
@@ -392,28 +287,8 @@ void UCI::loop(int argc, char* argv[]) {
       else if (token == "d")        sync_cout << pos << sync_endl;
       else if (token == "eval")     sync_cout << Eval::trace(pos) << sync_endl;
       else if (token == "compiler") sync_cout << compiler_info() << sync_endl;
-#if defined (EVAL_LEARN)
-      else if (token == "gensfen") Learner::gen_sfen(pos, is);
-      else if (token == "learn") Learner::learn(pos, is);
-
-#if defined (GENSFEN2019)
-	  // Command to generate teacher phase under development
-      else if (token == "gensfen2019") Learner::gen_sfen2019(pos, is);
-#endif
-      // Command to call qsearch(),search() directly for testing
-      else if (token == "qsearch") qsearch_cmd(pos);
-      else if (token == "search") search_cmd(pos, is);
-
-#endif
-
-#if defined(EVAL_NNUE)
-      else if (token == "eval_nnue") sync_cout << "eval_nnue = " << Eval::compute_eval(pos) << sync_endl;
-#endif
-
-#if defined(EVAL_NNUE) && defined(ENABLE_TEST_CMD)
-      // test command
-      else if (token == "test") test_cmd(pos, is);
-#endif
+      else if (token == "evalnn")   sync_cout << "NNUE evaluation: "
+                                    << Eval::NNUE::compute_eval(pos) << sync_endl;
       else
           sync_cout << "Unknown command: " << cmd << sync_endl;
 
diff --git a/src/uci.h b/src/uci.h
index 6529f90c..34f01bac 100644
--- a/src/uci.h
+++ b/src/uci.h
@@ -76,19 +76,13 @@ std::string pv(const Position& pos, Depth depth, Value alpha, Value beta);
 std::string wdl(Value v, int ply);
 Move to_move(const Position& pos, std::string& str);
 
-// Flag that read the evaluation function. This is set to false when evaldir is changed.
-extern bool load_eval_finished; // = false;
+void init_nnue(const std::string& evalFile);
+
+extern bool load_eval_finished;
+extern bool use_nnue;
+
 } // namespace UCI
 
 extern UCI::OptionsMap Options;
 
-// Processing when USI "isready" command is called. At this time, the evaluation function is read.
-// Used when you want to load the evaluation function when "isready" does not come in handler of benchmark command etc.
-// If skipCorruptCheck == true, skip memory corruption check by check sum when reading the evaluation function a second time.
-// * This function is inconvenient if it is not available in Stockfish, so add it.
-
-void init_nnue(bool skipCorruptCheck = false);
-
-extern const char* StartFEN;
-
 #endif // #ifndef UCI_H_INCLUDED
diff --git a/src/ucioption.cpp b/src/ucioption.cpp
index 8658adb4..6311cd2a 100644
--- a/src/ucioption.cpp
+++ b/src/ucioption.cpp
@@ -42,8 +42,22 @@ void on_hash_size(const Option& o) { TT.resize(size_t(o)); }
 void on_logger(const Option& o) { start_logger(o); }
 void on_threads(const Option& o) { Threads.set(size_t(o)); }
 void on_tb_path(const Option& o) { Tablebases::init(o); }
-void on_eval_file(const Option& o) { load_eval_finished = false; init_nnue(); }
 
+void on_use_nnue(const Option& o) {
+  use_nnue = o;
+
+  if (use_nnue)
+    std::cout << "info string NNUE eval used" << std::endl;
+  else
+    std::cout << "info string Standard eval used" << std::endl;
+
+  init_nnue(Options["EvalFile"]);
+}
+
+void on_eval_file(const Option& o) {
+  load_eval_finished = false;
+  init_nnue(o);
+}
 
 /// Our case insensitive less() function as required by UCI protocol
 bool CaseInsensitiveLess::operator() (const string& s1, const string& s2) const {
@@ -80,27 +94,8 @@ void init(OptionsMap& o) {
   o["SyzygyProbeDepth"]      << Option(1, 1, 100);
   o["Syzygy50MoveRule"]      << Option(true);
   o["SyzygyProbeLimit"]      << Option(7, 0, 7);
-  // Evaluation function file name. When this is changed, it is necessary to reread the evaluation function at the next ucinewgame timing.
-#if defined(__linux__)
-  o["EvalFile"]              << Option("eval/nn.bin", on_eval_file);
-#else
-  o["EvalFile"]              << Option("eval\\nn.bin", on_eval_file);
-#endif
-  // When the evaluation function is loaded at the ucinewgame timing, it is necessary to convert the new evaluation function.
-  // I want to hit the test eval convert command, but there is no new evaluation function
-  // It ends abnormally before executing this command.
-  // Therefore, with this hidden option, you can suppress the loading of the evaluation function when ucinewgame,
-  // Hit the test eval convert command.
-  o["SkipLoadingEval"]       << Option(false);
-  // how many moves to use a fixed move
-  o["BookMoves"] << Option(16, 0, 10000);
-
-#if defined(EVAL_LEARN)
-  // When learning the evaluation function, you can change the folder to save the evaluation function.
-  // Evalsave by default. This folder shall be prepared in advance.
-  // Automatically dig a folder under this folder like "0/", "1/", ... and save the evaluation function file there.
-  o["EvalSaveDir"] << Option("evalsave");
-#endif
+  o["Use NNUE"]              << Option(true, on_use_nnue);
+  o["EvalFile"]              << Option("nn.bin", on_eval_file);
 }
 
 
@@ -209,6 +204,6 @@ Option& Option::operator=(const string& v) {
   return *this;
 }
 
-// Flag that read the evaluation function. This is set to false when evaldir is changed.
+bool use_nnue = true;
 bool load_eval_finished = false;
 } // namespace UCI